Commit 0bd44a28a8510a0496ca6271fe2a362dd152c584
1 parent
66c71f34
Exists in
master
and in
1 other branch
staged.
Showing
2 changed files
with
46 additions
and
12 deletions
Show diff stats
mdata/ILSVRC_S.py
@@ -244,7 +244,7 @@ class DataILSVRC_S(DataDumperBase): | @@ -244,7 +244,7 @@ class DataILSVRC_S(DataDumperBase): | ||
244 | elif mode == 'spark': | 244 | elif mode == 'spark': |
245 | if self.sparker == None: | 245 | if self.sparker == None: |
246 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | 246 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
247 | - master='spark://HPC-server:7077') | 247 | + master='spark://HPC-server:7077') |
248 | 248 | ||
249 | cols = [ | 249 | cols = [ |
250 | 'cf_pic:data', | 250 | 'cf_pic:data', |
@@ -267,14 +267,14 @@ class DataILSVRC_S(DataDumperBase): | @@ -267,14 +267,14 @@ class DataILSVRC_S(DataDumperBase): | ||
267 | 267 | ||
268 | 268 | ||
269 | self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, | 269 | self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, |
270 | - collect=False).mapValues( | 270 | + collect=False).mapValues( |
271 | lambda data: [data] + SC.rddinfo_ILS(data)) | 271 | lambda data: [data] + SC.rddinfo_ILS(data)) |
272 | 272 | ||
273 | if not writeback: | 273 | if not writeback: |
274 | return self.rdd_data | 274 | return self.rdd_data |
275 | else: | 275 | else: |
276 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, | 276 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, |
277 | - withdata=withdata) | 277 | + withdata=withdata) |
278 | 278 | ||
279 | else: | 279 | else: |
280 | raise Exception("Unknown mode!") | 280 | raise Exception("Unknown mode!") |
@@ -367,7 +367,7 @@ class DataILSVRC_S(DataDumperBase): | @@ -367,7 +367,7 @@ class DataILSVRC_S(DataDumperBase): | ||
367 | elif mode == 'spark': | 367 | elif mode == 'spark': |
368 | if self.sparker == None: | 368 | if self.sparker == None: |
369 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | 369 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
370 | - master='spark://HPC-server:7077') | 370 | + master='spark://HPC-server:7077') |
371 | 371 | ||
372 | cols = [ | 372 | cols = [ |
373 | 'cf_pic:data', | 373 | 'cf_pic:data', |
@@ -392,7 +392,7 @@ class DataILSVRC_S(DataDumperBase): | @@ -392,7 +392,7 @@ class DataILSVRC_S(DataDumperBase): | ||
392 | return self.rdd_data | 392 | return self.rdd_data |
393 | else: | 393 | else: |
394 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, | 394 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, |
395 | - withdata=withdata) | 395 | + withdata=withdata) |
396 | 396 | ||
397 | else: | 397 | else: |
398 | raise Exception("Unknown mode!") | 398 | raise Exception("Unknown mode!") |
@@ -461,7 +461,7 @@ class DataILSVRC_S(DataDumperBase): | @@ -461,7 +461,7 @@ class DataILSVRC_S(DataDumperBase): | ||
461 | elif mode == 'spark': | 461 | elif mode == 'spark': |
462 | if self.sparker == None: | 462 | if self.sparker == None: |
463 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | 463 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
464 | - master='spark://HPC-server:7077') | 464 | + master='spark://HPC-server:7077') |
465 | 465 | ||
466 | cols = [ | 466 | cols = [ |
467 | 'cf_pic:data', | 467 | 'cf_pic:data', |
@@ -488,7 +488,7 @@ class DataILSVRC_S(DataDumperBase): | @@ -488,7 +488,7 @@ class DataILSVRC_S(DataDumperBase): | ||
488 | return self.rdd_data | 488 | return self.rdd_data |
489 | else: | 489 | else: |
490 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, | 490 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, |
491 | - withdata=withdata) | 491 | + withdata=withdata) |
492 | 492 | ||
493 | 493 | ||
494 | else: | 494 | else: |
@@ -505,21 +505,55 @@ class DataILSVRC_S(DataDumperBase): | @@ -505,21 +505,55 @@ class DataILSVRC_S(DataDumperBase): | ||
505 | INDEX = [] | 505 | INDEX = [] |
506 | X = [] | 506 | X = [] |
507 | Y = [] | 507 | Y = [] |
508 | + if mode == "local": | ||
509 | + dict_dataset = {} | ||
510 | + | ||
511 | + if feattype == 'coef': # raw | ||
512 | + with open(self.list_file, 'rb') as tsvfile: | ||
513 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | ||
514 | + for line in tsvfile: | ||
515 | + hash = line[0] | ||
516 | + tag = line[-1] | ||
517 | + image = os.path.join(self.img_dir, hash[:3], hash[3:] + '.jpg') | ||
518 | + if image: | ||
519 | + im = Jpeg(image, key=sample_key) | ||
520 | + dict_dataset[hash] = (tag, im.getCoefMatrix(channel='Y')) | ||
521 | + | ||
522 | + for tag, feat in dict_dataset.values(): | ||
523 | + X.append(feat.tolist()) | ||
524 | + Y.append(int(tag)) | ||
508 | 525 | ||
509 | - if mode == "hbase": | 526 | + else: |
527 | + with open(self.list_file, 'rb') as tsvfile: | ||
528 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | ||
529 | + for line in tsvfile: | ||
530 | + hash = line[0] | ||
531 | + tag = line[-1] | ||
532 | + path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype) | ||
533 | + if path_feat: | ||
534 | + with open(path_feat, 'rb') as featfile: | ||
535 | + dict_dataset[hash] = (tag, json.loads(featfile.read())) | ||
536 | + | ||
537 | + for tag, feat in dict_dataset.values(): | ||
538 | + # X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) | ||
539 | + X.append(np.array(feat).ravel().tolist()) | ||
540 | + Y.append(int(tag)) | ||
541 | + | ||
542 | + elif mode == "hbase": | ||
510 | if self.table == None: | 543 | if self.table == None: |
511 | self.table = self.get_table() | 544 | self.table = self.get_table() |
512 | 545 | ||
513 | col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype | 546 | col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype |
514 | for key, data in self.table.scan(columns=[col_feat, col_tag]): | 547 | for key, data in self.table.scan(columns=[col_feat, col_tag]): |
515 | - X.append( | ||
516 | - [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | 548 | + # X.append( |
549 | + # [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | ||
550 | + X.append(np.array(json.loads(data[col_feat])).ravel().tolist()) | ||
517 | Y.append(int(data[col_tag])) | 551 | Y.append(int(data[col_tag])) |
518 | 552 | ||
519 | elif mode == "spark" or mode == "cluster": | 553 | elif mode == "spark" or mode == "cluster": |
520 | if self.sparker == None: | 554 | if self.sparker == None: |
521 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | 555 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
522 | - master='spark://HPC-server:7077') | 556 | + master='spark://HPC-server:7077') |
523 | 557 | ||
524 | rdd_dataset = self.sparker.read_hbase(self.table_name, func=SC.rddparse_dataset_ILS, collect=False) | 558 | rdd_dataset = self.sparker.read_hbase(self.table_name, func=SC.rddparse_dataset_ILS, collect=False) |
525 | if not collect: | 559 | if not collect: |
test/test_model.py
@@ -148,7 +148,7 @@ def test_SVM_ILSVRC_S(): | @@ -148,7 +148,7 @@ def test_SVM_ILSVRC_S(): | ||
148 | def test_THEANO_crop(): | 148 | def test_THEANO_crop(): |
149 | 149 | ||
150 | timer.mark() | 150 | timer.mark() |
151 | - dilc = ILSVRC.DataILSVRC(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil') | 151 | + dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil') |
152 | X, Y = dilc.load_data(mode='local', feattype='coef') | 152 | X, Y = dilc.load_data(mode='local', feattype='coef') |
153 | timer.report() | 153 | timer.report() |
154 | 154 |