-
Notifications
You must be signed in to change notification settings - Fork 356
/
recognize_gesture.py
144 lines (128 loc) · 4.5 KB
/
recognize_gesture.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import cv2, pickle
import numpy as np
import tensorflow as tf
from cnn_tf import cnn_model_fn
import os
import sqlite3
from keras.models import load_model
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.logging.set_verbosity(tf.logging.ERROR)
classifier = tf.estimator.Estimator(model_dir="tmp/cnn_model2", model_fn=cnn_model_fn)
prediction = None
model = load_model('cnn_model_keras2.h5')
def get_image_size():
img = cv2.imread('gestures/0/100.jpg', 0)
return img.shape
image_x, image_y = get_image_size()
def tf_process_image(img):
img = cv2.resize(img, (image_x, image_y))
img = np.array(img, dtype=np.float32)
np_array = np.array(img)
return np_array
def tf_predict(classifier, image):
'''
need help with prediction using tensorflow
'''
global prediction
processed_array = tf_process_image(image)
pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x":processed_array}, shuffle=False)
pred = classifier.predict(input_fn=pred_input_fn)
prediction = next(pred)
print(prediction)
def keras_process_image(img):
img = cv2.resize(img, (image_x, image_y))
img = np.array(img, dtype=np.float32)
img = np.reshape(img, (1, image_x, image_y, 1))
return img
def keras_predict(model, image):
processed = keras_process_image(image)
pred_probab = model.predict(processed)[0]
pred_class = list(pred_probab).index(max(pred_probab))
return max(pred_probab), pred_class
def get_pred_text_from_db(pred_class):
conn = sqlite3.connect("gesture_db.db")
cmd = "SELECT g_name FROM gesture WHERE g_id="+str(pred_class)
cursor = conn.execute(cmd)
for row in cursor:
return row[0]
def split_sentence(text, num_of_words):
'''
Splits a text into group of num_of_words
'''
list_words = text.split(" ")
length = len(list_words)
splitted_sentence = []
b_index = 0
e_index = num_of_words
while length > 0:
part = ""
for word in list_words[b_index:e_index]:
part = part + " " + word
splitted_sentence.append(part)
b_index += num_of_words
e_index += num_of_words
length -= num_of_words
return splitted_sentence
def put_splitted_text_in_blackboard(blackboard, splitted_text):
y = 200
for text in splitted_text:
cv2.putText(blackboard, text, (4, y), cv2.FONT_HERSHEY_TRIPLEX, 2, (255, 255, 255))
y += 50
def get_hand_hist():
with open("hist", "rb") as f:
hist = pickle.load(f)
return hist
def recognize():
global prediction
cam = cv2.VideoCapture(1)
if cam.read()[0] == False:
cam = cv2.VideoCapture(0)
hist = get_hand_hist()
x, y, w, h = 300, 100, 300, 300
while True:
text = ""
img = cam.read()[1]
img = cv2.flip(img, 1)
img = cv2.resize(img, (640, 480))
imgCrop = img[y:y+h, x:x+w]
imgHSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
dst = cv2.calcBackProject([imgHSV], [0, 1], hist, [0, 180, 0, 256], 1)
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10,10))
cv2.filter2D(dst,-1,disc,dst)
blur = cv2.GaussianBlur(dst, (11,11), 0)
blur = cv2.medianBlur(blur, 15)
thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
thresh = cv2.merge((thresh,thresh,thresh))
thresh = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY)
thresh = thresh[y:y+h, x:x+w]
(openCV_ver,_,__) = cv2.__version__.split(".")
if openCV_ver=='3':
contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1]
elif openCV_ver=='4':
contours = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
if len(contours) > 0:
contour = max(contours, key = cv2.contourArea)
#print(cv2.contourArea(contour))
if cv2.contourArea(contour) > 10000:
x1, y1, w1, h1 = cv2.boundingRect(contour)
save_img = thresh[y1:y1+h1, x1:x1+w1]
if w1 > h1:
save_img = cv2.copyMakeBorder(save_img, int((w1-h1)/2) , int((w1-h1)/2) , 0, 0, cv2.BORDER_CONSTANT, (0, 0, 0))
elif h1 > w1:
save_img = cv2.copyMakeBorder(save_img, 0, 0, int((h1-w1)/2) , int((h1-w1)/2) , cv2.BORDER_CONSTANT, (0, 0, 0))
pred_probab, pred_class = keras_predict(model, save_img)
if pred_probab*100 > 80:
text = get_pred_text_from_db(pred_class)
print(text)
blackboard = np.zeros((480, 640, 3), dtype=np.uint8)
splitted_text = split_sentence(text, 2)
put_splitted_text_in_blackboard(blackboard, splitted_text)
#cv2.putText(blackboard, text, (30, 200), cv2.FONT_HERSHEY_TRIPLEX, 1.3, (255, 255, 255))
cv2.rectangle(img, (x,y), (x+w, y+h), (0,255,0), 2)
res = np.hstack((img, blackboard))
cv2.imshow("Recognizing gesture", res)
cv2.imshow("thresh", thresh)
if cv2.waitKey(1) == ord('q'):
break
keras_predict(model, np.zeros((50, 50), dtype=np.uint8))
recognize()