-
Notifications
You must be signed in to change notification settings - Fork 1
/
train_object_detector_cars.py
233 lines (173 loc) · 8.01 KB
/
train_object_detector_cars.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import os
import sys
import glob
import dlib
from skimage import io
import numpy as np
import cv2
def train(training_xml_path, model_file="detector.svm"):
assert os.path.isfile(training_xml_path)
assert not os.path.isfile(model_file)
# Now let's do the training. The train_simple_object_detector() function has a
# bunch of options, all of which come with reasonable default values. The next
# few lines goes over some of these options.
options = dlib.simple_object_detector_training_options()
# Since faces are left/right symmetric we can tell the trainer to train a
# symmetric detector. This helps it get the most value out of the training
# data.
options.add_left_right_image_flips = True
# The trainer is a kind of support vector machine and therefore has the usual
# SVM C parameter. In general, a bigger C encourages it to fit the training
# data better but might lead to overfitting. You must find the best C value
# empirically by checking how well the trained detector works on a test set of
# images you haven't trained on. Don't just leave the value set at 5. Try a
# few different C values and see what works best for your data.
options.C = 10
# Tell the code how many CPU cores your computer has for the fastest training.
options.num_threads = 6
options.epsilon = 0.001
options.be_verbose = True
options.detection_window_size = 4096 #(32, 32)
# options.upsample_limit = 8
# This function does the actual training. It will save the final detector to
# detector.svm. The input is an XML file that lists the images in the training
# dataset and also contains the positions of the face boxes. To create your
# own XML files you can use the imglab tool which can be found in the
# tools/imglab folder. It is a simple graphical tool for labeling objects in
# images with boxes. To see how to use it read the tools/imglab/README.txt
# file. But for this example, we just use the training.xml file included with
# dlib.
print("Goingt to train ...")
dlib.train_simple_object_detector(training_xml_path, model_file, options)
# Now that we have a face detector we can test it. The first statement tests
# it on the training data. It will print(the precision, recall, and then)
# average precision.
print("") # Print blank line to create gap from previous output
print("Training accuracy: {}".format(
dlib.test_simple_object_detector(training_xml_path, model_file)))
def dlib_test(test_folder, model_file="detector.svm"):
from utils.preprocess import generate_all_abs_filenames
# Now let's use the detector as you would in a normal application. First we
# will load it from disk.
detector = dlib.simple_object_detector(model_file)
# We can look at the HOG filter we learned. It should look like a face. Neat!
win_det = dlib.image_window()
win_det.set_image(detector)
# Now let's run the detector over the images in the faces folder and display the
# results.
print("Showing detections on the images in the testing folder...")
win = dlib.image_window()
files = generate_all_abs_filenames(test_folder)
for f in files:
# print("Processing file: {}".format(f))
img = cv2.imread(f)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# img = cv2.resize(img, None, fx=0.5, fy=0.5)
# img = io.imread(f)
# img = cv2.resize(img, (640, 640))
# if np.mean(img) > 2.0:
# print("Going to dividie 255")
# img = img/255.0
dets = detector(img)
# print("Number of faces detected: {}".format(len(dets)))
# for k, d in enumerate(dets):
# print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
# k, d.left(), d.top(), d.right(), d.bottom()))
win.clear_overlay()
win.set_image(img)
win.add_overlay(dets)
# dlib.hit_enter_to_continue()
# import time
# time.sleep(0.001)
def show_with_cv_one(img):
# We can look at the HOG filter we learned. It should look like a face. Neat!
# win_det = dlib.image_window()
# win_det.set_image(detector)
import time
start_time = time.clock()
img = cv2.resize(img, None, fx=0.5, fy=0.5)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
dets = detector(img)
# print("Number of faces detected: {}".format(len(dets)))
for k, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
k, d.left(), d.top(), d.right(), d.bottom()))
rect = [[d.left(), d.top()], [d.right(), d.top()], [d.right(), d.bottom()], d.left(), d.bottom()]
print(rect)
img = cv2.rectangle(img, (d.left(), d.top()), (d.right(), d.bottom()), (0,0,255))
end_time = time.clock()
# compute the tracking fps
current_fps = 1.0 / (end_time - start_time)
cv2.putText(img, "FPS:{:5.2f}".format(current_fps), (5, 15),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255))
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
return img
def pipeline_inference(img):
global detector
img = show_with_cv_one(img)
cv2.imshow("Dlib detection result", img)
cv2.waitKey(1)
output = img
return output
def combine_dlib_xml(files, combine_f='dlib_xml_total.xml'):
# merge dlib file together
# ./imglab --add /home/miao/dataset/armer_video/v001/dlib_conf/manual_label_armer_half_size.xml /home/miao/dataset/armer_video/v002/dlib_conf/manual_label_armer_half_size.xml
# --add will output merge.xml, and the name cannot be easily changed
# shuffle
# ./imglab merged.xml --shuffle
import shutil
import subprocess
from subprocess import TimeoutExpired, PIPE
for f in files:
assert os.path.isfile(f)
tmp_dir = '/tmp/combine_dlib_xml/'
if not os.path.isdir(tmp_dir):
os.makedirs(tmp_dir)
file1 = tmp_dir+'/file1.xml'
file2 = tmp_dir+'/file2.xml'
dlib_imglab_dir = '/home/miao/icra2018_dji/dlib_198/tools/imglab/build/'
shutil.copyfile(files[0], file1)
for f in files[1:]:
shutil.copyfile(f, file2)
proc = subprocess.Popen( [dlib_imglab_dir+'/imglab', '--add', file1, file2 ], stdout=PIPE )
try:
outs, errs = proc.communicate(timeout=15)
except TimeoutExpired:
proc.kill()
outs, errs = proc.communicate()
shutil.copyfile('merged.xml', file1)
shutil.copyfile('merged.xml', combine_f)
proc = subprocess.Popen([dlib_imglab_dir + '/imglab', '--shuffle', combine_f], stdout=PIPE)
try:
outs, errs = proc.communicate(timeout=15)
except TimeoutExpired:
proc.kill()
outs, errs = proc.communicate()
if __name__ == '__main__':
from utils.preprocess import split_the_abs_filename
"""
dlib_f = 'manual_label_armer.xml'
d_f = lambda x: "/home/rm/icra18_dji/dataset/rm_car/train/{}/dlib_conf/{}".format(x, dlib_f)
list_d = ['v001', 'v002', 'v003', 'v004', 'v005']
xml_files = [d_f(d) for d in list_d]
print(xml_files)
combine_f = 'dlib_merge.xml'
# combine_dlib_xml(files=xml_files, combine_f=combine_f)
training_xml_path = combine_f
save_model_file = combine_f[:-4] + '.svm'
"""
from conf.conf_loader import dlib_dir_conf, video_file_conf
training_xml_path = dlib_dir_conf+'/dlib_armer_600_half_size.xml'
save_model_file = training_xml_path[:-4] + '.svm'
# train(training_xml_path, save_model_file)
# dlib_test(frame_dir_conf, model_file=save_model_file)
test_v_f = '/home/rm/icra18_dji/dataset/rm_car/test/t001.mp4'
# test_v_f = video_file_conf
video_output = test_v_f[: -4] + '_dlib_detection.mp4'
# Now let's use the detector as you would in a normal application. First we
# will load it from disk.
detector = dlib.simple_object_detector(save_model_file)
from moviepy.editor import VideoFileClip
clip1 = VideoFileClip(test_v_f)
clip = clip1.fl_image(pipeline_inference)
clip.write_videofile(video_output, audio=False)