2c2d57c7
Chunk
ILSVRC datapath h...
|
1
2
3
|
__author__ = 'chunk'
from . import *
|
84648488
Chunk
reverted.
|
4
|
from ..mfeat import HOG, IntraBlockDiff
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
from ..mspark import SC
from ..common import *
import os, sys
from PIL import Image
from hashlib import md5
import csv
import shutil
import json
import collections
import happybase
from ..mjpeg import *
from ..msteg import *
|
9ff70cf4
Chunk
capacity engeneer...
|
19
|
from ..msteg.steganography import LSB, F3, F4, F5
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
20
|
|
d1042d03
Chunk
staged.
|
21
22
23
24
|
import numpy as np
from numpy.random import randn
import pandas as pd
from scipy import stats
|
ec755e37
Chunk
cropping.
|
25
|
import random
|
d1042d03
Chunk
staged.
|
26
|
|
080c30c2
Chunk
F5 lib updated. I...
|
27
28
|
from subprocess import Popen, PIPE, STDOUT
|
84648488
Chunk
reverted.
|
29
|
|
d1042d03
Chunk
staged.
|
30
31
|
np.random.seed(sum(map(ord, "whoami")))
|
080c30c2
Chunk
F5 lib updated. I...
|
32
33
|
package_dir = os.path.dirname(os.path.abspath(__file__))
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
34
35
|
class DataILSVRC(DataDumperBase):
|
84648488
Chunk
reverted.
|
36
|
def __init__(self, base_dir='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train'):
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
DataDumperBase.__init__(self, base_dir, category)
self.base_dir = base_dir
self.category = category
self.data_dir = os.path.join(self.base_dir, self.category)
self.dst_dir = os.path.join(self.base_dir, 'dst', self.category)
self.list_file = os.path.join(self.dst_dir, 'file-tag.tsv')
self.feat_dir = os.path.join(self.dst_dir, 'Feat')
self.img_dir = os.path.join(self.dst_dir, 'Img')
self.dict_data = {}
self.table_name = self.base_dir.strip('/').split('/')[-1] + '-' + self.category
|
02528074
Chunk
staged.
|
51
|
self.sparker = None
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
52
53
|
def format(self):
|
f1fa5b17
Chunk
review & streaming.
|
54
|
print "formatting..."
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
55
56
57
58
59
|
self.extract()
def _hash_copy(self, image):
if not image.endswith('jpg'):
img = Image.open(image)
|
080c30c2
Chunk
F5 lib updated. I...
|
60
61
|
img.save('../res/tmp.jpg', format='JPEG')
image = '../res/tmp.jpg'
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
62
63
64
65
66
|
with open(image, 'rb') as f:
index = md5(f.read()).hexdigest()
im = Jpeg(image, key=sample_key)
|
84648488
Chunk
reverted.
|
67
|
self.dict_data[index] = [im.image_width, im.image_height, im.image_width * im.image_height, im.getCapacity(),
|
9ff70cf4
Chunk
capacity engeneer...
|
68
|
im.getQuality()]
|
d0be60e7
Chunk
jpeg update.
|
69
70
|
# self.dict_data[index] = [im.image_width, im.image_height, os.path.getsize(image), im.getQuality()]
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
71
72
|
# origion:
|
1dc7c44b
Chunk
crawler-hbase-spa...
|
73
|
# dir = base + 'Img/Train/' + index[:3]
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
74
75
76
77
78
79
80
81
82
83
84
|
dir = os.path.join(self.img_dir, index[:3])
if not os.path.exists(dir):
os.makedirs(dir)
image_path = os.path.join(dir, index[3:] + '.jpg')
# print image_path
if not os.path.exists(image_path):
shutil.copy(image, image_path)
else:
pass
|
554a7b9a
Chunk
staged.
|
85
|
def get_feat(self, image, feattype='ibd', **kwargs):
|
84648488
Chunk
reverted.
|
86
87
88
89
90
|
size = kwargs.get('size', (48, 48))
if feattype == 'hog':
feater = HOG.FeatHOG(size=size)
elif feattype == 'ibd':
|
554a7b9a
Chunk
staged.
|
91
92
93
94
95
96
97
98
|
feater = IntraBlockDiff.FeatIntraBlockDiff()
else:
raise Exception("Unknown feature type!")
desc = feater.feat(image)
return desc
|
84648488
Chunk
reverted.
|
99
|
|
554a7b9a
Chunk
staged.
|
100
|
def extract_feat(self, feattype='ibd'):
|
f1fa5b17
Chunk
review & streaming.
|
101
|
print "extracting feat..."
|
84648488
Chunk
reverted.
|
102
103
104
|
if feattype == 'hog':
feater = HOG.FeatHOG(size=(48, 48))
elif feattype == 'ibd':
|
554a7b9a
Chunk
staged.
|
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
feater = IntraBlockDiff.FeatIntraBlockDiff()
else:
raise Exception("Unknown feature type!")
list_image = []
with open(self.list_file, 'rb') as tsvfile:
tsvfile = csv.reader(tsvfile, delimiter='\t')
for line in tsvfile:
list_image.append(line[0])
dict_featbuf = {}
for imgname in list_image:
# if imgtag == 'True':
image = os.path.join(self.img_dir, imgname[:3], imgname[3:] + '.jpg')
desc = feater.feat(image)
dict_featbuf[imgname] = desc
for imgname, desc in dict_featbuf.items():
# print imgname, desc
dir = os.path.join(self.feat_dir, imgname[:3])
if not os.path.exists(dir):
os.makedirs(dir)
featpath = os.path.join(dir, imgname[3:].split('.')[0] + '.' + feattype)
with open(featpath, 'wb') as featfile:
featfile.write(json.dumps(desc.tolist()))
|
080c30c2
Chunk
F5 lib updated. I...
|
131
132
133
134
|
def _build_list(self, list_file=None):
if list_file == None:
list_file = self.list_file
assert list_file != None
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
135
136
137
|
ordict_img = collections.OrderedDict(sorted(self.dict_data.items(), key=lambda d: d[0]))
|
080c30c2
Chunk
F5 lib updated. I...
|
138
|
with open(list_file, 'w') as f:
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
139
140
141
142
|
tsvfile = csv.writer(f, delimiter='\t')
for key, value in ordict_img.items():
tsvfile.writerow([key] + value)
|
080c30c2
Chunk
F5 lib updated. I...
|
143
144
145
146
147
|
def _anaylis(self, list_file=None):
if list_file == None:
list_file = self.list_file
assert list_file != None
|
84648488
Chunk
reverted.
|
148
|
df_ILS = pd.read_csv(list_file, names=['hash', 'width', 'height', 'size', 'capacity', 'quality'], sep='\t')
|
d1042d03
Chunk
staged.
|
149
|
length = df_ILS.shape[0]
|
9ff70cf4
Chunk
capacity engeneer...
|
150
|
df_ILS = df_ILS.sort(['capacity', 'size', 'quality'], ascending=True)
|
9371f8fa
Chunk
SVM param engenee...
|
151
|
rand_class = stats.bernoulli.rvs(0.8, size=length)
|
d1042d03
Chunk
staged.
|
152
|
|
9ff70cf4
Chunk
capacity engeneer...
|
153
|
df_ILS['rate'] = np.zeros(df_ILS.shape[0], np.float64)
|
d0be60e7
Chunk
jpeg update.
|
154
155
|
df_ILS['chosen'] = rand_class
df_ILS['class'] = np.zeros(length, np.int32)
|
d1042d03
Chunk
staged.
|
156
|
|
d0be60e7
Chunk
jpeg update.
|
157
|
df_ILS.to_csv(list_file, header=False, index=False, sep='\t')
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
158
159
|
def extract(self):
|
f1fa5b17
Chunk
review & streaming.
|
160
|
print "extracting data..."
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
161
162
163
|
for path, subdirs, files in os.walk(self.data_dir):
for name in files:
imagepath = os.path.join(path, name)
|
d1042d03
Chunk
staged.
|
164
165
166
167
168
|
# print imagepath
try:
self._hash_copy(imagepath)
except:
pass
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
169
|
|
d1042d03
Chunk
staged.
|
170
171
|
self._build_list()
self._anaylis()
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
172
|
|
84648488
Chunk
reverted.
|
173
|
|
9ff70cf4
Chunk
capacity engeneer...
|
174
|
def _embed_outer(self):
|
080c30c2
Chunk
F5 lib updated. I...
|
175
176
177
|
self.dict_data = {}
dict_embedresult = {}
os.environ["CLASSPATH"] = os.path.join(package_dir, "../libs/F5/")
|
d0be60e7
Chunk
jpeg update.
|
178
|
cmd = 'java Embed %s %s -e %s -p password -c "stegan by chunk " -q %d'
|
080c30c2
Chunk
F5 lib updated. I...
|
179
|
|
9ff70cf4
Chunk
capacity engeneer...
|
180
|
df_ILS = pd.read_csv(self.list_file,
|
84648488
Chunk
reverted.
|
181
|
names=['hash', 'width', 'height', 'size', 'capacity', 'quality', 'chosen', 'class'],
|
080c30c2
Chunk
F5 lib updated. I...
|
182
|
sep='\t')
|
d0be60e7
Chunk
jpeg update.
|
183
|
df_ILS_TARGET = df_ILS[df_ILS['chosen'] == 1]
|
9ff70cf4
Chunk
capacity engeneer...
|
184
|
|
84648488
Chunk
reverted.
|
185
|
for hash, size, quality in zip(df_ILS_TARGET['hash'], df_ILS_TARGET['size'], df_ILS_TARGET['quality']):
|
d0be60e7
Chunk
jpeg update.
|
186
|
path_img = os.path.join(self.img_dir, hash[:3], hash[3:] + '.jpg')
|
080c30c2
Chunk
F5 lib updated. I...
|
187
188
|
if path_img:
print path_img
|
84648488
Chunk
reverted.
|
189
|
p = Popen(cmd % (path_img, 'res/tmp.jpg', 'res/toembed', quality), shell=True, stdout=PIPE,
|
9ff70cf4
Chunk
capacity engeneer...
|
190
|
stderr=STDOUT)
|
d0be60e7
Chunk
jpeg update.
|
191
|
dict_embedresult[hash] = [line.strip('\n') for line in p.stdout.readlines()]
|
080c30c2
Chunk
F5 lib updated. I...
|
192
193
194
195
|
try:
self._hash_copy('res/tmp.jpg')
except:
pass
|
9ff70cf4
Chunk
capacity engeneer...
|
196
|
|
080c30c2
Chunk
F5 lib updated. I...
|
197
198
199
200
201
|
with open(self.list_file + '.embed.log', 'wb') as f:
tsvfile = csv.writer(f, delimiter='\t')
for key, value in dict_embedresult.items():
tsvfile.writerow([key] + value)
|
080c30c2
Chunk
F5 lib updated. I...
|
202
203
|
self._build_list(self.list_file + '.embed')
|
d0be60e7
Chunk
jpeg update.
|
204
|
# merge
|
84648488
Chunk
reverted.
|
205
|
df_ILS_EMBED = pd.read_csv(self.list_file + '.embed', names=['hash', 'width', 'height', 'size', 'quality'],
|
d0be60e7
Chunk
jpeg update.
|
206
207
208
209
210
211
212
213
|
sep='\t')
length = df_ILS_EMBED.shape[0]
df_ILS_EMBED = df_ILS_EMBED.sort(['size', 'quality'], ascending=True)
df_ILS_EMBED['chosen'] = np.zeros(length, np.int32)
df_ILS_EMBED['class'] = np.ones(length, np.int32)
df_ILS = df_ILS.append(df_ILS_EMBED, ignore_index=True)
df_ILS.to_csv(self.list_file, header=False, index=False, sep='\t')
|
080c30c2
Chunk
F5 lib updated. I...
|
214
|
|
9ff70cf4
Chunk
capacity engeneer...
|
215
216
217
218
219
|
def _embed_inner(self, rate=None):
self.dict_data = {}
f5 = F5.F5(sample_key, 1)
tmp_img = os.path.join(package_dir, '../res/tmp.jpg')
df_ILS = pd.read_csv(self.list_file,
|
84648488
Chunk
reverted.
|
220
|
names=['hash', 'width', 'height', 'size', 'capacity', 'quality', 'rate', 'chosen',
|
9ff70cf4
Chunk
capacity engeneer...
|
221
222
223
224
225
226
227
228
229
|
'class'],
sep='\t')
df_ILS_TARGET = df_ILS[df_ILS['chosen'] == 1]
for hash, capacity in zip(df_ILS_TARGET['hash'], df_ILS_TARGET['capacity']):
path_img = os.path.join(self.img_dir, hash[:3], hash[3:] + '.jpg')
if path_img:
print path_img
if rate == None:
|
84648488
Chunk
reverted.
|
230
|
embed_rate = f5.embed_raw_data(path_img, os.path.join(package_dir, '../res/toembed'), tmp_img)
|
9ff70cf4
Chunk
capacity engeneer...
|
231
232
|
else:
assert (rate >= 0 and rate < 1)
|
9371f8fa
Chunk
SVM param engenee...
|
233
|
# print capacity
|
9ff70cf4
Chunk
capacity engeneer...
|
234
235
236
237
238
239
|
hidden = np.random.bytes(int(capacity * rate) / 8)
embed_rate = f5.embed_raw_data(path_img, hidden, tmp_img, frommem=True)
try:
with open(tmp_img, 'rb') as f:
index = md5(f.read()).hexdigest()
im = Jpeg(tmp_img, key=sample_key)
|
84648488
Chunk
reverted.
|
240
|
self.dict_data[index] = [im.image_width, im.image_height, im.image_width * im.image_height,
|
9ff70cf4
Chunk
capacity engeneer...
|
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
|
im.getCapacity(),
im.getQuality(), embed_rate]
dir = os.path.join(self.img_dir, index[:3])
if not os.path.exists(dir):
os.makedirs(dir)
image_path = os.path.join(dir, index[3:] + '.jpg')
if not os.path.exists(image_path):
shutil.copy(tmp_img, image_path)
else:
pass
except:
pass
self._build_list(self.list_file + '.embed')
# merge
df_ILS_EMBED = pd.read_csv(self.list_file + '.embed',
|
84648488
Chunk
reverted.
|
259
|
names=['hash', 'width', 'height', 'size', 'capacity', 'quality', 'rate'],
|
9ff70cf4
Chunk
capacity engeneer...
|
260
261
262
263
264
265
266
267
268
269
270
271
|
sep='\t')
df_ILS_EMBED = df_ILS_EMBED.sort(['rate', 'capacity', 'size', 'quality'], ascending=True)
df_ILS_EMBED['chosen'] = np.zeros(df_ILS_EMBED.shape[0], np.int32)
df_ILS_EMBED['class'] = np.ones(df_ILS_EMBED.shape[0], np.int32)
# print df_ILS_EMBED.dtypes
# print df_ILS.dtypes
# Form the intersection of two Index objects. Sortedness of the result is not guaranteed
df_ILS = df_ILS.append(df_ILS_EMBED, ignore_index=True)
df_ILS.to_csv(self.list_file, header=False, index=False, sep='\t')
|
554a7b9a
Chunk
staged.
|
272
|
def embed(self, rate=None):
|
f1fa5b17
Chunk
review & streaming.
|
273
|
print "embedding data..."
|
9371f8fa
Chunk
SVM param engenee...
|
274
|
self._embed_inner(rate)
|
9ff70cf4
Chunk
capacity engeneer...
|
275
|
|
84648488
Chunk
reverted.
|
276
|
|
ec755e37
Chunk
cropping.
|
277
|
def crop(self, size=(300, 300)):
|
e6be6b61
Chunk
import caffe.
|
278
279
280
|
cropped_dir = self.data_dir + '_crop_pil'
if not os.path.exists(cropped_dir):
os.makedirs(cropped_dir)
|
ec755e37
Chunk
cropping.
|
281
282
283
284
285
286
287
288
|
for path, subdirs, files in os.walk(self.data_dir):
for name in files:
image = os.path.join(path, name)
print image
W, H = size
try:
im = Image.open(image)
|
b9990e77
Chunk
staged.
|
289
|
qt = im.quantization
|
ec755e37
Chunk
cropping.
|
290
291
292
293
294
|
w, h = im.size
if w < W or h < H:
continue
left, upper = random.randint(0, w - W), random.randint(0, h - H)
im = im.crop((left, upper, left + W, upper + H))
|
e6be6b61
Chunk
import caffe.
|
295
|
im.save(os.path.join(cropped_dir, name), qtables=qt)
|
ec755e37
Chunk
cropping.
|
296
297
298
299
300
301
|
except Exception as e:
print '[EXCPT]', e
pass
# try:
# img = cv2.imread(image, cv2.CV_LOAD_IMAGE_UNCHANGED)
|
b9990e77
Chunk
staged.
|
302
303
|
# h, w = img.shape[:2]
# if w < 300 or h < 300:
|
25c0c9c9
Chunk
feat.ravel()[[i*3...
|
304
|
# continue
|
e6be6b61
Chunk
import caffe.
|
305
|
# left, upper = random.randint(0, w - 300), random.randint(0, h - 300)
|
bde8352b
Chunk
shuffling.
|
306
307
|
# img_crop = img[upper:upper + 300, left:left + 300]
# cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop)
|
ec755e37
Chunk
cropping.
|
308
|
# except Exception as e:
|
bde8352b
Chunk
shuffling.
|
309
|
# print '[EXCPT]', e
|
ec755e37
Chunk
cropping.
|
310
311
|
# pass
|
84648488
Chunk
reverted.
|
312
|
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
313
|
def get_table(self):
|
f1fa5b17
Chunk
review & streaming.
|
314
|
print "getting table..."
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
315
316
317
318
319
320
321
322
323
|
if self.table != None:
return self.table
if self.connection is None:
c = happybase.Connection('HPC-server')
self.connection = c
tables = self.connection.tables()
if self.table_name not in tables:
|
f4fb4381
Chunk
staged.
|
324
325
326
327
328
|
families_compressed = {'cf_pic': dict(compression='LZO'),
'cf_info': dict(max_versions=10,compression='LZO'),
'cf_tag': dict(compression='LZO'),
'cf_feat': dict(compression='LZO'),
}
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
329
330
331
332
333
|
families = {'cf_pic': dict(),
'cf_info': dict(max_versions=10),
'cf_tag': dict(),
'cf_feat': dict(),
}
|
51708346
Chunk
final experiments...
|
334
|
self.connection.create_table(name=self.table_name, families=families_compressed)
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
335
336
337
338
339
340
341
|
table = self.connection.table(name=self.table_name)
self.table = table
return table
|
d47ae6ce
Chunk
staged.
|
342
|
def delete_table(self, table_name=None, disable=True):
|
f1fa5b17
Chunk
review & streaming.
|
343
|
print "deleting table..."
|
d47ae6ce
Chunk
staged.
|
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
|
if table_name == None:
table_name = self.table_name
if self.connection is None:
c = happybase.Connection('HPC-server')
self.connection = c
tables = self.connection.tables()
if table_name not in tables:
return False
else:
try:
self.connection.delete_table(table_name, disable)
except:
print 'Exception when deleting table.'
raise
return True
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
361
|
|
ad70caf6
Chunk
staged.
|
362
|
def store_img(self):
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
363
364
365
366
367
368
369
370
371
|
if self.table == None:
self.table = self.get_table()
dict_databuf = {}
with open(self.list_file, 'rb') as tsvfile:
tsvfile = csv.reader(tsvfile, delimiter='\t')
for line in tsvfile:
path_img = os.path.join(self.img_dir, line[0][:3], line[0][3:] + '.jpg')
|
24768a99
Chunk
mode 'hbase' fini...
|
372
|
if path_img:
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
373
374
375
376
|
with open(path_img, 'rb') as fpic:
dict_databuf[line[0] + '.jpg'] = fpic.read()
try:
|
489c5608
Chunk
debugging...
|
377
|
with self.table.batch(batch_size=2000) as b:
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
378
379
380
381
|
for imgname, imgdata in dict_databuf.items():
b.put(imgname, {'cf_pic:data': imgdata})
except ValueError:
raise
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
382
|
|
84648488
Chunk
reverted.
|
383
|
|
cb798a7f
Chunk
libs & scripts in...
|
384
385
386
387
388
389
390
391
392
|
def store_info(self, infotype='all'):
if self.table == None:
self.table = self.get_table()
dict_infobuf = {}
with open(self.list_file, 'rb') as tsvfile:
tsvfile = csv.reader(tsvfile, delimiter='\t')
for line in tsvfile:
|
080c30c2
Chunk
F5 lib updated. I...
|
393
|
dict_infobuf[line[0] + '.jpg'] = line[1:-2]
|
cb798a7f
Chunk
libs & scripts in...
|
394
395
396
397
398
399
|
if infotype == 'all':
try:
with self.table.batch(batch_size=5000) as b:
for imgname, imginfo in dict_infobuf.items():
b.put(imgname,
|
84648488
Chunk
reverted.
|
400
|
{'cf_info:width': imginfo[0], 'cf_info:height': imginfo[1], 'cf_info:size': imginfo[2],
|
554a7b9a
Chunk
staged.
|
401
402
|
'cf_info:capacity': imginfo[3],
'cf_info:quality': imginfo[4]})
|
cb798a7f
Chunk
libs & scripts in...
|
403
404
|
except ValueError:
raise
|
cb798a7f
Chunk
libs & scripts in...
|
405
|
else:
|
080c30c2
Chunk
F5 lib updated. I...
|
406
|
raise Exception("Unknown infotype!")
|
cb798a7f
Chunk
libs & scripts in...
|
407
|
|
84648488
Chunk
reverted.
|
408
|
|
080c30c2
Chunk
F5 lib updated. I...
|
409
|
def store_tag(self, tagtype='all'):
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
410
411
412
413
414
415
416
417
|
if self.table == None:
self.table = self.get_table()
dict_tagbuf = {}
with open(self.list_file, 'rb') as tsvfile:
tsvfile = csv.reader(tsvfile, delimiter='\t')
for line in tsvfile:
|
080c30c2
Chunk
F5 lib updated. I...
|
418
|
dict_tagbuf[line[0] + '.jpg'] = line[-2:]
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
419
|
|
080c30c2
Chunk
F5 lib updated. I...
|
420
421
422
423
424
425
426
|
if tagtype == 'all':
try:
with self.table.batch(batch_size=5000) as b:
for imgname, imgtag in dict_tagbuf.items():
b.put(imgname, {'cf_tag:chosen': imgtag[0], 'cf_tag:class': imgtag[1]})
except ValueError:
raise
|
080c30c2
Chunk
F5 lib updated. I...
|
427
428
|
else:
raise Exception("Unknown tagtype!")
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
429
|
|
84648488
Chunk
reverted.
|
430
|
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
|
def store_feat(self, feattype='ibd'):
if self.table == None:
self.table = self.get_table()
dict_featbuf = {}
for path, subdirs, files in os.walk(self.feat_dir):
for name in files:
featpath = os.path.join(path, name)
# print featpath
with open(featpath, 'rb') as featfile:
imgname = path.split('/')[-1] + name.replace('.' + feattype, '.jpg')
dict_featbuf[imgname] = featfile.read()
try:
with self.table.batch(batch_size=5000) as b:
for imgname, featdesc in dict_featbuf.items():
b.put(imgname, {'cf_feat:' + feattype: featdesc})
except ValueError:
raise
pass
|
84648488
Chunk
reverted.
|
452
|
|
bde8352b
Chunk
shuffling.
|
453
|
def load_data(self, mode='local', feattype='ibd', tagtype='class', shuffle=False):
|
f1fa5b17
Chunk
review & streaming.
|
454
|
print "loading data..."
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
455
456
457
458
459
460
|
INDEX = []
X = []
Y = []
if mode == "local":
|
d0be60e7
Chunk
jpeg update.
|
461
462
|
dict_dataset = {}
|
ec755e37
Chunk
cropping.
|
463
464
465
466
467
468
|
if feattype == 'coef': # raw
with open(self.list_file, 'rb') as tsvfile:
tsvfile = csv.reader(tsvfile, delimiter='\t')
for line in tsvfile:
hash = line[0]
tag = line[-1]
|
bbd2f705
Chunk
cropping.
|
469
|
image = os.path.join(self.img_dir, hash[:3], hash[3:] + '.jpg')
|
ec755e37
Chunk
cropping.
|
470
471
|
if image:
im = Jpeg(image, key=sample_key)
|
b9990e77
Chunk
staged.
|
472
473
474
|
dict_dataset[hash] = (tag, im.getCoefMatrix(channel='Y'))
for tag, feat in dict_dataset.values():
|
84648488
Chunk
reverted.
|
475
|
feat.ravel()[[i * 200 + j for i in range(0, 200, 8) for j in range(0, 200, 8)]] = 0
|
bde8352b
Chunk
shuffling.
|
476
477
|
feat = np.absolute(feat)
feat = np.bitwise_and(feat, 1)
|
e6be6b61
Chunk
import caffe.
|
478
|
X.append(feat.ravel())
|
b9990e77
Chunk
staged.
|
479
|
Y.append(int(tag))
|
ec755e37
Chunk
cropping.
|
480
481
482
483
484
485
486
487
488
489
490
|
else:
with open(self.list_file, 'rb') as tsvfile:
tsvfile = csv.reader(tsvfile, delimiter='\t')
for line in tsvfile:
hash = line[0]
tag = line[-1]
path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype)
if path_feat:
with open(path_feat, 'rb') as featfile:
dict_dataset[hash] = (tag, json.loads(featfile.read()))
|
d0be60e7
Chunk
jpeg update.
|
491
|
|
b9990e77
Chunk
staged.
|
492
493
494
495
|
for tag, feat in dict_dataset.values():
# X.append([item for sublist in feat for subsublist in sublist for item in subsublist])
X.append(np.array(feat).ravel().tolist())
Y.append(int(tag))
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
496
|
|
ec755e37
Chunk
cropping.
|
497
|
elif mode == "hbase": # remote
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
498
499
500
501
502
|
if self.table == None:
self.table = self.get_table()
col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype
for key, data in self.table.scan(columns=[col_feat, col_tag]):
|
ec755e37
Chunk
cropping.
|
503
|
X.append(
|
84648488
Chunk
reverted.
|
504
|
[item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist])
|
02528074
Chunk
staged.
|
505
|
Y.append(int(data[col_tag]))
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
506
|
|
ec755e37
Chunk
cropping.
|
507
|
elif mode == "spark": # cluster
|
02528074
Chunk
staged.
|
508
|
if self.sparker == None:
|
84648488
Chunk
reverted.
|
509
|
self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077')
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
510
|
|
02528074
Chunk
staged.
|
511
|
result = self.sparker.read_hbase(self.table_name) # result = {key:[feat,tag],...}
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
512
513
514
|
for feat, tag in result:
X.append(feat)
Y.append(tag)
|
cb798a7f
Chunk
libs & scripts in...
|
515
|
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
516
517
518
|
else:
raise Exception("Unknown mode!")
|
bde8352b
Chunk
shuffling.
|
519
520
521
522
523
|
if shuffle:
# shuffling
Z = zip(X, Y)
np.random.shuffle(Z)
return Z
|
2c2d57c7
Chunk
ILSVRC datapath h...
|
524
|
|
bde8352b
Chunk
shuffling.
|
525
|
return X, Y
|
84648488
Chunk
reverted.
|
|
|