-
Notifications
You must be signed in to change notification settings - Fork 2
/
license_checker.sh
134 lines (103 loc) · 4.21 KB
/
license_checker.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
#clear folder
rm -rf ./*yml ./*.txt ./*.log ./*.csv license
mkdir license
LOG_FILE=./checker.log
now=$(date +"%T")
echo "$now : start to check license of cf components" >> $LOG_FILE
APACHE_LICENSE="Apache License"
LICENSE_VERSION="Version 2.0"
MESSAGE_LICENSE_TEXT_FOUND="found"
MESSAGE_LICENSE_FILE_NOT_FOUND="-"
MESSAGE_LICENSE_TEXT_NOT_FOUND="-"
MESSAGE_LICENSE_VERSION_NOT_CORRECT="wrong version"
NUM_SUCC=0
NUM_FAIL=0
#add head to the csv file
CSV_FILE="license_table.csv";
FILE_PATTERN="LICENSE license LICENSE.TXT LICENSE.txt License.txt license.txt LICENSE.MD LICENSE.md";
echo "file pattern,$FILE_PATTERN" >> $CSV_FILE
#we use apache license V2 currently.
echo "license pattern,$APACHE_LICENSE" >> $CSV_FILE
echo "version pattern,$LICENSE_VERSION" >> $CSV_FILE
echo "Repository name,License file,License info,URL" >> $CSV_FILE
curl https://api.github.com/orgs/cloudfoundry/repos?page=[1-6] >> repos.yml
curl https://api.github.com/orgs/cloudfoundry-incubator/repos?page=[1-6] >> incubator_repos.yml
#get gits url
echo "get gits url";
REPOS=$(cat repos* *incu* | grep git_url | cut -f 4 -d'"');
for REPO in $REPOS;
do
#echo REPO is $REPO;
echo $REPO >> git.txt;
done;
# check if a license file exists
echo "check license files"
COMPOENT=""
LICENSE=""
FILE_NAME=""
GITS=$(cat git.txt);
for GIT in $GITS;
do
COMPONENT=$(echo $GIT | sed -r 's#git://github.com/(.*).git#\1#g');
COMPONENT_PATH=$(echo "https://github.com/"$COMPONENT);
now=$(date +"%T")
echo "$now"
echo "start to check component: $COMPONENT"
LICENSE=$(echo "https://github.com/"$COMPONENT"/blob/master")
FILE_NAME="./license/"$(echo $COMPONENT | sed -r 's/\//\_/g');
for LICENSE_NAME in $FILE_PATTERN
do
LICENSE_NAME="$LICENSE/$LICENSE_NAME";
echo "curl from $LICENSE_NAME and store it to $FILE_NAME";
if [ -f $FILE_NAME ]
then
rm $FILE_NAME
fi;
curl $LICENSE_NAME >> $FILE_NAME
#if the file size is very small (actually the size is 21 for abnormal case, so it is enough for us to set 25 here), normally it is somthing like " {"error" : "Not Found" } "..
if [ -f $FILE_NAME ] && [ $(stat -c%s $FILE_NAME ) -gt 25 ]; #notice!!! after $FILE_NAME, there must be a space!
then
break
fi
done;
# It seems to be enough to provide functions above to check the license
# but if there is any license file with other names, we can add new name patterns in FILE_PATTERN to support that in the future.
#use base name and ignore folder(path) name
LICENSE_NAME=$(basename $LICENSE_NAME)
if [ ! -f $FILE_NAME ] || [ $(stat -c%s $FILE_NAME ) -lt 25 ];
then
rm $FILE_NAME;
echo "for component $COMPONENT " $LICENSE " doesn't exist!" >> $LOG_FILE
echo "$COMPONENT,$MESSAGE_LICENSE_FILE_NOT_FOUND,$MESSAGE_LICENSE_TEXT_NOT_FOUND,$COMPONENT_PATH" >> $CSV_FILE
NUM_FAIL=$(($NUM_FAIL + 1))
else
#file exists
#check if it contains apache license key word
if grep -q "$APACHE_LICENSE" $FILE_NAME
then
#check if it contains correct version
if grep -q "$LICENSE_VERSION" $FILE_NAME
then
#this component do have a license file with apache license and correct version
echo "for component $COMPONENT " $LICENSE " is found. it contains Apache license with correct version." >> $LOG_FILE
echo "$COMPONENT,$LICENSE_NAME,$MESSAGE_LICENSE_TEXT_FOUND,$COMPONENT_PATH" >> $CSV_FILE
NUM_SUCC=$(($NUM_SUCC + 1))
else
#this component do have a license file with apache license, but with incorrect version
echo "for component $COMPONENT " $LICENSE " is found. it contains Apache license but with incorrect version!" >> $LOG_FILE
echo "$COMPONENT,$LICENSE_NAME,$MESSAGE_LICENSE_VERSION_NOT_CORRECT,$COMPONENT_PATH" >> $CSV_FILE
NUM_FAIL=$(($NUM_FAIL + 1))
fi
else
echo "for component $COMPONENT " $LICENSE " is found. But it doesn't include Apache license!" >> $LOG_FILE
echo "$COMPONENT,$LICENSE_NAME,$MESSAGE_LICENSE_TEXT_NOT_FOUND,$COMPONENT_PATH" >> $CSV_FILE
NUM_FAIL=$(($NUM_FAIL + 1))
fi
fi
done;
now=$(date +"%T")
echo "$now : end of checking license of cf components" >> $LOG_FILE
echo " " >> $CSV_FILE
echo "succ cases,$NUM_SUCC" >> $CSV_FILE
echo "fail cases,$NUM_FAIL" >> $CSV_FILE