-
Notifications
You must be signed in to change notification settings - Fork 57
157 lines (146 loc) · 6.54 KB
/
pr-path-detection.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Check Paths and Hyperlinks
on:
pull_request:
branches: [main]
types: [opened, reopened, ready_for_review, synchronize]
jobs:
check-the-validity-of-hyperlinks-in-README:
runs-on: ubuntu-latest
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout Repo docs
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check the Validity of Hyperlinks
run: |
cd ${{github.workspace}}
fail="FALSE"
# url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin')
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
if [ -n "$changed_files" ]; then
for changed_file in $changed_files; do
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'linkedin') || true
if [ -n "$url_lines" ]; then
for url_line in $url_lines; do
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, status code: $response, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successful*****"
else
urls_line+=("$url_line")
echo "Status code: $response_retry, Link $url validation failed, will retry later."
fi
fi
done
fi
done
fi
echo "**************Start Retry**************"
for link in "${urls_line[@]}"; do
url=$(echo "$link"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$link"|cut -d':' -f1 | cut -d'/' -f2-)
attempt_num=1
while [ $attempt_num -le 5 ]; do
do_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
if [ "$do_retry" -eq 200 ]; then
echo "$url Retry successful"
break
else
echo "$url Validation failed, retrying..."
((attempt_num++))
sleep 10
fi
done
if [ $attempt_num -gt 5 ]; then
echo "Invalid link from ${{github.workspace}}/$path: $url status code: $do_retry"
fail="TRUE"
fi
done
if [[ "$fail" == "TRUE" ]]; then
exit 1
else
echo "All hyperlinks are valid."
fi
shell: bash
check-the-validity-of-relative-path:
runs-on: ubuntu-latest
steps:
- name: Clean up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout Repo docs
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checking Relative Path Validity
run: |
cd ${{github.workspace}}
fail="FALSE"
repo_name=${{ github.event.pull_request.head.repo.full_name }}
if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then
owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1)
branch="https://github.com/$owner/docs/tree/${{ github.event.pull_request.head.ref }}"
else
branch="https://github.com/opea-project/docs/blob/${{ github.event.pull_request.head.ref }}"
fi
link_head="https://github.com/opea-project/docs/blob/main"
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http'|grep -Ev 'mailto'|grep -Ev 'portal.azure.com')
if [ -n "$png_lines" ]; then
for png_line in $png_lines; do
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
if [[ "${png_path:0:1}" == "/" ]]; then
check_path="."$(echo "$png_path" | cut -d '#' -f1)
elif [[ "$png_path" == *#* ]]; then
relative_path=$(echo "$png_path" | cut -d '#' -f1)
if [ -n "$relative_path" ]; then
check_path=$(dirname "$refer_path")/$relative_path
png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}')
else
check_path=$refer_path
fi
else
check_path=$(dirname "$refer_path")/$png_path
fi
if [ -e "$check_path" ]; then
real_path=$(realpath $check_path)
if [[ "$png_line" == *#* ]]; then
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||')$png_path
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev"
fail="TRUE"
fi
else
echo "Validation succeed $png_line"
fi
fi
fi
else
echo "$check_path does not exist $png_line"
fail="TRUE"
fi
done
fi
if [[ "$fail" == "TRUE" ]]; then
exit 1
else
echo "All relative links valid."
fi
shell: bash