-
Notifications
You must be signed in to change notification settings - Fork 8
/
v1like_funcs.py
475 lines (384 loc) · 13.9 KB
/
v1like_funcs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" v1like_funcs module
Key sub-operations performed in a simple V1-like model
(normalization, linear filtering, downsampling, etc.)
"""
import Image
import scipy as N
import scipy.signal
fftconv = scipy.signal.fftconvolve
conv = scipy.signal.convolve
from npclockit import clockit_onprofile
import time
PROFILE = False
# -------------------------------------------------------------------------
@clockit_onprofile(PROFILE)
def v1like_norm(hin, conv_mode, kshape, threshold):
""" V1S local normalization
Each pixel in the input image is divisively normalized by the L2 norm
of the pixels in a local neighborhood around it, and the result of this
division is placed in the output image.
Inputs:
hin -- a 3-dimensional array (width X height X rgb)
kshape -- kernel shape (tuple) ex: (3,3) for a 3x3 normalization
neighborhood
threshold -- magnitude threshold, if the vector's length is below
it doesn't get resized ex: 1.
Outputs:
hout -- a normalized 3-dimensional array (width X height X rgb)
"""
eps = 1e-5
kh, kw = kshape
dtype = hin.dtype
hsrc = hin[:].copy()
# -- prepare hout
hin_h, hin_w, hin_d = hin.shape
hout_h = hin_h - kh + 1
hout_w = hin_w - kw + 1
hout_d = hin_d
hout = N.empty((hout_h, hout_w, hout_d), 'f')
# -- compute numerator (hnum) and divisor (hdiv)
# sum kernel
hin_d = hin.shape[-1]
kshape3d = list(kshape) + [hin_d]
ker = N.ones(kshape3d, dtype=dtype)
size = ker.size
# compute sum-of-square
hsq = hsrc ** 2.
hssq = conv(hsq, ker, conv_mode).astype(dtype)
# compute hnum and hdiv
ys = kh / 2
xs = kw / 2
hout_h, hout_w, hout_d = hout.shape[-3:]
hs = hout_h
ws = hout_w
hsum = conv(hsrc, ker, conv_mode).astype(dtype)
hnum = hsrc[ys:ys+hs, xs:xs+ws] - (hsum/size)
val = (hssq - (hsum**2.)/size)
N.putmask(val, val<0, 0) # to avoid negative sqrt
hdiv = val ** (1./2) + eps
# -- apply normalization
# 'volume' threshold
N.putmask(hdiv, hdiv < (threshold+eps), 1.)
result = (hnum / hdiv)
hout[:] = result
return hout
@clockit_onprofile(PROFILE)
def v1like_norm2(hin, conv_mode, kshape, threshold):
""" V1LIKE local normalization
Each pixel in the input image is divisively normalized by the L2 norm
of the pixels in a local neighborhood around it, and the result of this
division is placed in the output image.
Inputs:
hin -- a 3-dimensional array (width X height X rgb)
kshape -- kernel shape (tuple) ex: (3,3) for a 3x3 normalization
neighborhood
threshold -- magnitude threshold, if the vector's length is below
it doesn't get resized ex: 1.
Outputs:
hout -- a normalized 3-dimensional array (width X height X rgb)
"""
eps = 1e-5
kh, kw = kshape
dtype = hin.dtype
hsrc = hin[:].copy()
# -- prepare hout
hin_h, hin_w, hin_d = hin.shape
hout_h = hin_h# - kh + 1
hout_w = hin_w# - kw + 1
if conv_mode != "same":
hout_h = hout_h - kh + 1
hout_w = hout_w - kw + 1
hout_d = hin_d
hout = N.empty((hout_h, hout_w, hout_d), 'float32')
# -- compute numerator (hnum) and divisor (hdiv)
# sum kernel
hin_d = hin.shape[-1]
kshape3d = list(kshape) + [hin_d]
ker = N.ones(kshape3d, dtype=dtype)
size = ker.size
# compute sum-of-square
hsq = hsrc ** 2.
#hssq = conv(hsq, ker, conv_mode).astype(dtype)
kerH = ker[:,0,0][:, None]#, None]
kerW = ker[0,:,0][None, :]#, None]
kerD = ker[0,0,:][None, None, :]
#s = time.time()
#r = conv(hsq, kerD, 'valid')[:,:,0]
#print time.time()-s
#s = time.time()
hssq = conv(
conv(
conv(hsq, kerD, 'valid')[:,:,0].astype(dtype),
kerW,
conv_mode),
kerH,
conv_mode).astype(dtype)
#hssq = conv(kerH,
#conv(kerW,
#conv(hsq, kerD, 'valid')[:,:,0].astype(dtype),
#conv_mode),
#conv_mode).astype(dtype)
hssq = hssq[:,:,None]
#print time.time()-s
# compute hnum and hdiv
ys = kh / 2
xs = kw / 2
hout_h, hout_w, hout_d = hout.shape[-3:]
hs = hout_h
ws = hout_w
#hsum = conv(hsrc, ker, conv_mode).astype(dtype)
hsum = conv(
conv(
conv(hsrc,
kerD, 'valid')[:,:,0].astype(dtype),
kerW,
conv_mode),
kerH,
conv_mode).astype(dtype)
#hsum = conv(kerH,
#conv(kerW,
#conv(hsrc,
#kerD, 'valid')[:,:,0].astype(dtype),
#conv_mode),
#conv_mode).astype(dtype)
hsum = hsum[:,:,None]
if conv_mode == 'same':
hnum = hsrc - (hsum/size)
else:
hnum = hsrc[ys:ys+hs, xs:xs+ws] - (hsum/size)
val = (hssq - (hsum**2.)/size)
val[val<0] = 0
hdiv = val ** (1./2) + eps
# -- apply normalization
# 'volume' threshold
N.putmask(hdiv, hdiv < (threshold+eps), 1.)
result = (hnum / hdiv)
#print result.shape
hout[:] = result
#print hout.shape, hout.dtype
return hout
v1like_norm = v1like_norm2
# -------------------------------------------------------------------------
fft_cache = {}
@clockit_onprofile(PROFILE)
def v1like_filter(hin, conv_mode, filterbank, use_cache=False):
""" V1LIKE linear filtering
Perform separable convolutions on an image with a set of filters
Inputs:
hin -- input image (a 2-dimensional array)
filterbank -- TODO list of tuples with 1d filters (row, col)
used to perform separable convolution
use_cache -- Boolean, use internal fft_cache (works _well_ if the input
shapes don't vary much, otherwise you'll blow away the memory)
Outputs:
hout -- a 3-dimensional array with outputs of the filters
(width X height X n_filters)
"""
nfilters = len(filterbank)
filt0 = filterbank[0]
fft_shape = N.array(hin.shape) + N.array(filt0.shape) - 1
hin_fft = scipy.signal.fftn(hin, fft_shape)
if conv_mode == "valid":
hout_shape = list( N.array(hin.shape[:2]) - N.array(filt0.shape[:2]) + 1 ) + [nfilters]
hout_new = N.empty(hout_shape, 'f')
begy = filt0.shape[0]
endy = begy + hout_shape[0]
begx = filt0.shape[1]
endx = begx + hout_shape[1]
elif conv_mode == "same":
hout_shape = hin.shape[:2] + (nfilters,)
hout_new = N.empty(hout_shape, 'f')
begy = filt0.shape[0] / 2
endy = begy + hout_shape[0]
begx = filt0.shape[1] / 2
endx = begx + hout_shape[1]
else:
raise NotImplementedError
for i in xrange(nfilters):
filt = filterbank[i]
if use_cache:
key = (filt.tostring(), tuple(fft_shape))
if key in fft_cache:
filt_fft = fft_cache[key]
else:
filt_fft = scipy.signal.fftn(filt, fft_shape)
fft_cache[key] = filt_fft
else:
filt_fft = scipy.signal.fftn(filt, fft_shape)
res_fft = scipy.signal.ifftn(hin_fft*filt_fft)
res_fft = res_fft[begy:endy, begx:endx]
hout_new[:,:,i] = N.real(res_fft)
hout = hout_new
return hout
# -------------------------------------------------------------------------
@clockit_onprofile(PROFILE)
#@profile
def v1like_pool(hin, conv_mode, lsum_ksize, outshape=None, order=1):
""" V1LIKE Pooling
Boxcar Low-pass filter featuremap-wise
Inputs:
hin -- a 3-dimensional array (width X height X n_channels)
lsum_ksize -- kernel size of the local sum ex: 17
outshape -- fixed output shape (2d slices)
order -- XXX
Outputs:
hout -- resulting 3-dimensional array
"""
order = float(order)
assert(order >= 1)
# -- local sum
if lsum_ksize is not None:
hin_h, hin_w, hin_d = hin.shape
dtype = hin.dtype
if conv_mode == "valid":
aux_shape = auxh, auxw, auxd = hin_h-lsum_ksize+1, hin_w-lsum_ksize+1, hin_d
aux = N.empty(aux_shape, dtype)
else:
aux = N.empty(hin.shape, dtype)
k1d = N.ones((lsum_ksize), 'f')
k2d = N.ones((lsum_ksize, lsum_ksize), 'f')
krow = k1d[None,:]
kcol = k1d[:,None]
for d in xrange(aux.shape[2]):
if order == 1:
aux[:,:,d] = conv(conv(hin[:,:,d], krow, conv_mode), kcol, conv_mode)
else:
aux[:,:,d] = conv(conv(hin[:,:,d]**order, krow, conv_mode), kcol, conv_mode)**(1./order)
else:
aux = hin
# -- resample output
if outshape is None or outshape == aux.shape:
hout = aux
else:
hout = sresample(aux, outshape)
return hout
# -------------------------------------------------------------------------
@clockit_onprofile(PROFILE)
def sresample(src, outshape):
""" Simple 3d array resampling
Inputs:
src -- a ndimensional array (dim>2)
outshape -- fixed output shape for the first 2 dimensions
Outputs:
hout -- resulting n-dimensional array
"""
inh, inw = inshape = src.shape[:2]
outh, outw = outshape
hslice = (N.arange(outh) * (inh-1.)/(outh-1.)).round().astype(int)
wslice = (N.arange(outw) * (inw-1.)/(outw-1.)).round().astype(int)
hout = src[hslice, :][:, wslice]
return hout.copy()
# -------------------------------------------------------------------------
@clockit_onprofile(PROFILE)
def get_image(img_fname, max_edge=None, min_edge=None,
resize_method='bicubic'):
""" Return a resized image as a numpy array
Inputs:
img_fname -- image filename
max_edge -- maximum edge length (None = no resize)
min_edge -- minimum edge length (None = no resize)
resize_method -- 'antialias' or 'bicubic'
Outputs:
imga -- result
"""
# -- open image
img = Image.open(img_fname)#.convert("RGB")
if max_edge is not None:
# -- resize so that the biggest edge is max_edge (keep aspect ratio)
iw, ih = img.size
if iw > ih:
new_iw = max_edge
new_ih = int(round(1.* max_edge * ih/iw))
else:
new_iw = int(round(1.* max_edge * iw/ih))
new_ih = max_edge
if resize_method.lower() == 'bicubic':
img = img.resize((new_iw, new_ih), Image.BICUBIC)
elif resize_method.lower() == 'antialias':
img = img.resize((new_iw, new_ih), Image.ANTIALIAS)
else:
raise ValueError("resize_method '%s' not understood", resize_method)
# -- convert to a numpy array
imga = N.misc.fromimage(img)#/255.
return imga
# -------------------------------------------------------------------------
@clockit_onprofile(PROFILE)
def get_image2(img_fname, resize=None):
""" Return a resized image as a numpy array
Inputs:
img_fname -- image filename
resize -- tuple of (type, size) where type='min_edge' or 'max_edge'
if None = no resize
Outputs:
imga -- result
"""
# -- open image
img = Image.open(img_fname)
# -- resize image if needed
if resize is not None:
rtype, rsize = resize
if rtype == 'min_edge':
# -- resize so that the smallest edge is rsize (keep aspect ratio)
iw, ih = img.size
if iw < ih:
new_iw = rsize
new_ih = int(round(1.* rsize * ih/iw))
else:
new_iw = int(round(1.* rsize * iw/ih))
new_ih = rsize
elif rtype == 'max_edge':
# -- resize so that the biggest edge is rszie (keep aspect ratio)
iw, ih = img.size
if iw > ih:
new_iw = rsize
new_ih = int(round(1.* rsize * ih/iw))
else:
new_iw = int(round(1.* rsize * iw/ih))
new_ih = rsize
else:
raise ValueError, "resize parameter not understood"
if resize_method.lower() == 'bicubic':
img = img.resize((new_iw, new_ih), Image.BICUBIC)
elif resize_method.lower() == 'antialias':
img = img.resize((new_iw, new_ih), Image.ANTIALIAS)
else:
raise ValueError("resize_method '%s' not understood", resize_method)
# -- convert to a numpy array
imga = N.misc.fromimage(img)#/255.
return imga
# -------------------------------------------------------------------------
@clockit_onprofile(PROFILE)
def rephists(hin, division, nfeatures):
""" Compute local feature histograms from a given 3d (width X height X
n_channels) image.
These histograms are intended to serve as easy-to-compute additional
features that can be concatenated onto the V1-like output vector to
increase performance with little additional complexity. These additional
features are only used in the V1LIKE+ (i.e. + 'easy tricks') version of
the model.
Inputs:
hin -- 3d image (width X height X n_channels)
division -- granularity of the local histograms (e.g. 2 corresponds
to computing feature histograms in each quadrant)
nfeatures -- desired number of resulting features
Outputs:
fvector -- feature vector
"""
hin_h, hin_w, hin_d = hin.shape
nzones = hin_d * division**2
nbins = nfeatures / nzones
sx = (hin_w-1.)/division
sy = (hin_h-1.)/division
fvector = N.zeros((nfeatures), 'f')
hists = []
for d in xrange(hin_d):
h = [N.histogram(hin[j*sy:(j+1)*sy,i*sx:(i+1)*sx,d], bins=nbins)[0].ravel()
for i in xrange(division)
for j in xrange(division)
]
hists += [h]
hists = N.array(hists, 'f').ravel()
fvector[:hists.size] = hists
return fvector