Commit 0bd44a28a8510a0496ca6271fe2a362dd152c584
1 parent
66c71f34
Exists in
master
and in
1 other branch
staged.
Showing
2 changed files
with
46 additions
and
12 deletions
Show diff stats
mdata/ILSVRC_S.py
... | ... | @@ -244,7 +244,7 @@ class DataILSVRC_S(DataDumperBase): |
244 | 244 | elif mode == 'spark': |
245 | 245 | if self.sparker == None: |
246 | 246 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
247 | - master='spark://HPC-server:7077') | |
247 | + master='spark://HPC-server:7077') | |
248 | 248 | |
249 | 249 | cols = [ |
250 | 250 | 'cf_pic:data', |
... | ... | @@ -267,14 +267,14 @@ class DataILSVRC_S(DataDumperBase): |
267 | 267 | |
268 | 268 | |
269 | 269 | self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, |
270 | - collect=False).mapValues( | |
270 | + collect=False).mapValues( | |
271 | 271 | lambda data: [data] + SC.rddinfo_ILS(data)) |
272 | 272 | |
273 | 273 | if not writeback: |
274 | 274 | return self.rdd_data |
275 | 275 | else: |
276 | 276 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, |
277 | - withdata=withdata) | |
277 | + withdata=withdata) | |
278 | 278 | |
279 | 279 | else: |
280 | 280 | raise Exception("Unknown mode!") |
... | ... | @@ -367,7 +367,7 @@ class DataILSVRC_S(DataDumperBase): |
367 | 367 | elif mode == 'spark': |
368 | 368 | if self.sparker == None: |
369 | 369 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
370 | - master='spark://HPC-server:7077') | |
370 | + master='spark://HPC-server:7077') | |
371 | 371 | |
372 | 372 | cols = [ |
373 | 373 | 'cf_pic:data', |
... | ... | @@ -392,7 +392,7 @@ class DataILSVRC_S(DataDumperBase): |
392 | 392 | return self.rdd_data |
393 | 393 | else: |
394 | 394 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, |
395 | - withdata=withdata) | |
395 | + withdata=withdata) | |
396 | 396 | |
397 | 397 | else: |
398 | 398 | raise Exception("Unknown mode!") |
... | ... | @@ -461,7 +461,7 @@ class DataILSVRC_S(DataDumperBase): |
461 | 461 | elif mode == 'spark': |
462 | 462 | if self.sparker == None: |
463 | 463 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
464 | - master='spark://HPC-server:7077') | |
464 | + master='spark://HPC-server:7077') | |
465 | 465 | |
466 | 466 | cols = [ |
467 | 467 | 'cf_pic:data', |
... | ... | @@ -488,7 +488,7 @@ class DataILSVRC_S(DataDumperBase): |
488 | 488 | return self.rdd_data |
489 | 489 | else: |
490 | 490 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, |
491 | - withdata=withdata) | |
491 | + withdata=withdata) | |
492 | 492 | |
493 | 493 | |
494 | 494 | else: |
... | ... | @@ -505,21 +505,55 @@ class DataILSVRC_S(DataDumperBase): |
505 | 505 | INDEX = [] |
506 | 506 | X = [] |
507 | 507 | Y = [] |
508 | + if mode == "local": | |
509 | + dict_dataset = {} | |
510 | + | |
511 | + if feattype == 'coef': # raw | |
512 | + with open(self.list_file, 'rb') as tsvfile: | |
513 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | |
514 | + for line in tsvfile: | |
515 | + hash = line[0] | |
516 | + tag = line[-1] | |
517 | + image = os.path.join(self.img_dir, hash[:3], hash[3:] + '.jpg') | |
518 | + if image: | |
519 | + im = Jpeg(image, key=sample_key) | |
520 | + dict_dataset[hash] = (tag, im.getCoefMatrix(channel='Y')) | |
521 | + | |
522 | + for tag, feat in dict_dataset.values(): | |
523 | + X.append(feat.tolist()) | |
524 | + Y.append(int(tag)) | |
508 | 525 | |
509 | - if mode == "hbase": | |
526 | + else: | |
527 | + with open(self.list_file, 'rb') as tsvfile: | |
528 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | |
529 | + for line in tsvfile: | |
530 | + hash = line[0] | |
531 | + tag = line[-1] | |
532 | + path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype) | |
533 | + if path_feat: | |
534 | + with open(path_feat, 'rb') as featfile: | |
535 | + dict_dataset[hash] = (tag, json.loads(featfile.read())) | |
536 | + | |
537 | + for tag, feat in dict_dataset.values(): | |
538 | + # X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) | |
539 | + X.append(np.array(feat).ravel().tolist()) | |
540 | + Y.append(int(tag)) | |
541 | + | |
542 | + elif mode == "hbase": | |
510 | 543 | if self.table == None: |
511 | 544 | self.table = self.get_table() |
512 | 545 | |
513 | 546 | col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype |
514 | 547 | for key, data in self.table.scan(columns=[col_feat, col_tag]): |
515 | - X.append( | |
516 | - [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | |
548 | + # X.append( | |
549 | + # [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | |
550 | + X.append(np.array(json.loads(data[col_feat])).ravel().tolist()) | |
517 | 551 | Y.append(int(data[col_tag])) |
518 | 552 | |
519 | 553 | elif mode == "spark" or mode == "cluster": |
520 | 554 | if self.sparker == None: |
521 | 555 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
522 | - master='spark://HPC-server:7077') | |
556 | + master='spark://HPC-server:7077') | |
523 | 557 | |
524 | 558 | rdd_dataset = self.sparker.read_hbase(self.table_name, func=SC.rddparse_dataset_ILS, collect=False) |
525 | 559 | if not collect: | ... | ... |
test/test_model.py
... | ... | @@ -148,7 +148,7 @@ def test_SVM_ILSVRC_S(): |
148 | 148 | def test_THEANO_crop(): |
149 | 149 | |
150 | 150 | timer.mark() |
151 | - dilc = ILSVRC.DataILSVRC(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil') | |
151 | + dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil') | |
152 | 152 | X, Y = dilc.load_data(mode='local', feattype='coef') |
153 | 153 | timer.report() |
154 | 154 | ... | ... |