Commit 0bd44a28a8510a0496ca6271fe2a362dd152c584
1 parent
66c71f34
Exists in
master
and in
1 other branch
staged.
Showing
2 changed files
with
46 additions
and
12 deletions
Show diff stats
mdata/ILSVRC_S.py
| ... | ... | @@ -244,7 +244,7 @@ class DataILSVRC_S(DataDumperBase): |
| 244 | 244 | elif mode == 'spark': |
| 245 | 245 | if self.sparker == None: |
| 246 | 246 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
| 247 | - master='spark://HPC-server:7077') | |
| 247 | + master='spark://HPC-server:7077') | |
| 248 | 248 | |
| 249 | 249 | cols = [ |
| 250 | 250 | 'cf_pic:data', |
| ... | ... | @@ -267,14 +267,14 @@ class DataILSVRC_S(DataDumperBase): |
| 267 | 267 | |
| 268 | 268 | |
| 269 | 269 | self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, |
| 270 | - collect=False).mapValues( | |
| 270 | + collect=False).mapValues( | |
| 271 | 271 | lambda data: [data] + SC.rddinfo_ILS(data)) |
| 272 | 272 | |
| 273 | 273 | if not writeback: |
| 274 | 274 | return self.rdd_data |
| 275 | 275 | else: |
| 276 | 276 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, |
| 277 | - withdata=withdata) | |
| 277 | + withdata=withdata) | |
| 278 | 278 | |
| 279 | 279 | else: |
| 280 | 280 | raise Exception("Unknown mode!") |
| ... | ... | @@ -367,7 +367,7 @@ class DataILSVRC_S(DataDumperBase): |
| 367 | 367 | elif mode == 'spark': |
| 368 | 368 | if self.sparker == None: |
| 369 | 369 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
| 370 | - master='spark://HPC-server:7077') | |
| 370 | + master='spark://HPC-server:7077') | |
| 371 | 371 | |
| 372 | 372 | cols = [ |
| 373 | 373 | 'cf_pic:data', |
| ... | ... | @@ -392,7 +392,7 @@ class DataILSVRC_S(DataDumperBase): |
| 392 | 392 | return self.rdd_data |
| 393 | 393 | else: |
| 394 | 394 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, |
| 395 | - withdata=withdata) | |
| 395 | + withdata=withdata) | |
| 396 | 396 | |
| 397 | 397 | else: |
| 398 | 398 | raise Exception("Unknown mode!") |
| ... | ... | @@ -461,7 +461,7 @@ class DataILSVRC_S(DataDumperBase): |
| 461 | 461 | elif mode == 'spark': |
| 462 | 462 | if self.sparker == None: |
| 463 | 463 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
| 464 | - master='spark://HPC-server:7077') | |
| 464 | + master='spark://HPC-server:7077') | |
| 465 | 465 | |
| 466 | 466 | cols = [ |
| 467 | 467 | 'cf_pic:data', |
| ... | ... | @@ -488,7 +488,7 @@ class DataILSVRC_S(DataDumperBase): |
| 488 | 488 | return self.rdd_data |
| 489 | 489 | else: |
| 490 | 490 | self.sparker.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, |
| 491 | - withdata=withdata) | |
| 491 | + withdata=withdata) | |
| 492 | 492 | |
| 493 | 493 | |
| 494 | 494 | else: |
| ... | ... | @@ -505,21 +505,55 @@ class DataILSVRC_S(DataDumperBase): |
| 505 | 505 | INDEX = [] |
| 506 | 506 | X = [] |
| 507 | 507 | Y = [] |
| 508 | + if mode == "local": | |
| 509 | + dict_dataset = {} | |
| 510 | + | |
| 511 | + if feattype == 'coef': # raw | |
| 512 | + with open(self.list_file, 'rb') as tsvfile: | |
| 513 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | |
| 514 | + for line in tsvfile: | |
| 515 | + hash = line[0] | |
| 516 | + tag = line[-1] | |
| 517 | + image = os.path.join(self.img_dir, hash[:3], hash[3:] + '.jpg') | |
| 518 | + if image: | |
| 519 | + im = Jpeg(image, key=sample_key) | |
| 520 | + dict_dataset[hash] = (tag, im.getCoefMatrix(channel='Y')) | |
| 521 | + | |
| 522 | + for tag, feat in dict_dataset.values(): | |
| 523 | + X.append(feat.tolist()) | |
| 524 | + Y.append(int(tag)) | |
| 508 | 525 | |
| 509 | - if mode == "hbase": | |
| 526 | + else: | |
| 527 | + with open(self.list_file, 'rb') as tsvfile: | |
| 528 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | |
| 529 | + for line in tsvfile: | |
| 530 | + hash = line[0] | |
| 531 | + tag = line[-1] | |
| 532 | + path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype) | |
| 533 | + if path_feat: | |
| 534 | + with open(path_feat, 'rb') as featfile: | |
| 535 | + dict_dataset[hash] = (tag, json.loads(featfile.read())) | |
| 536 | + | |
| 537 | + for tag, feat in dict_dataset.values(): | |
| 538 | + # X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) | |
| 539 | + X.append(np.array(feat).ravel().tolist()) | |
| 540 | + Y.append(int(tag)) | |
| 541 | + | |
| 542 | + elif mode == "hbase": | |
| 510 | 543 | if self.table == None: |
| 511 | 544 | self.table = self.get_table() |
| 512 | 545 | |
| 513 | 546 | col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype |
| 514 | 547 | for key, data in self.table.scan(columns=[col_feat, col_tag]): |
| 515 | - X.append( | |
| 516 | - [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | |
| 548 | + # X.append( | |
| 549 | + # [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | |
| 550 | + X.append(np.array(json.loads(data[col_feat])).ravel().tolist()) | |
| 517 | 551 | Y.append(int(data[col_tag])) |
| 518 | 552 | |
| 519 | 553 | elif mode == "spark" or mode == "cluster": |
| 520 | 554 | if self.sparker == None: |
| 521 | 555 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
| 522 | - master='spark://HPC-server:7077') | |
| 556 | + master='spark://HPC-server:7077') | |
| 523 | 557 | |
| 524 | 558 | rdd_dataset = self.sparker.read_hbase(self.table_name, func=SC.rddparse_dataset_ILS, collect=False) |
| 525 | 559 | if not collect: | ... | ... |
test/test_model.py
| ... | ... | @@ -148,7 +148,7 @@ def test_SVM_ILSVRC_S(): |
| 148 | 148 | def test_THEANO_crop(): |
| 149 | 149 | |
| 150 | 150 | timer.mark() |
| 151 | - dilc = ILSVRC.DataILSVRC(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil') | |
| 151 | + dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil') | |
| 152 | 152 | X, Y = dilc.load_data(mode='local', feattype='coef') |
| 153 | 153 | timer.report() |
| 154 | 154 | ... | ... |