Commit 5c9c44da22fd65b593e5b7b7cf5d788c5f11a930
1 parent
54e2adda
Exists in
master
and in
1 other branch
staged.
Showing
1 changed file
with
29 additions
and
27 deletions
Show diff stats
test/test_whole.py
| @@ -4,6 +4,7 @@ from ..mspark import SC | @@ -4,6 +4,7 @@ from ..mspark import SC | ||
| 4 | from pyspark.mllib.regression import LabeledPoint | 4 | from pyspark.mllib.regression import LabeledPoint |
| 5 | import happybase | 5 | import happybase |
| 6 | 6 | ||
| 7 | + | ||
| 7 | def test_whole(): | 8 | def test_whole(): |
| 8 | cols0 = [ | 9 | cols0 = [ |
| 9 | 'cf_pic:data', | 10 | 'cf_pic:data', |
| @@ -33,7 +34,7 @@ def test_whole(): | @@ -33,7 +34,7 @@ def test_whole(): | ||
| 33 | 34 | ||
| 34 | # rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False) \ | 35 | # rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False) \ |
| 35 | # .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \ | 36 | # .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \ |
| 36 | - # .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \ | 37 | + # .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \ |
| 37 | # .mapValues(lambda items: SC.rddfeat_ILS(items)) | 38 | # .mapValues(lambda items: SC.rddfeat_ILS(items)) |
| 38 | 39 | ||
| 39 | rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False).mapValues( | 40 | rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False).mapValues( |
| @@ -64,36 +65,37 @@ def test_whole_ext(): | @@ -64,36 +65,37 @@ def test_whole_ext(): | ||
| 64 | cols = ['cf_pic:data'] | 65 | cols = ['cf_pic:data'] |
| 65 | list_data = [] | 66 | list_data = [] |
| 66 | for key, data in table.scan(columns=cols): | 67 | for key, data in table.scan(columns=cols): |
| 67 | - data = data['cf_pic:data'] | ||
| 68 | - list_data.append((key,data)) | 68 | + data = data['cf_pic:data'] |
| 69 | + list_data.append((key, data)) | ||
| 69 | 70 | ||
| 70 | sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077') | 71 | sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077') |
| 71 | - rdd_data = sparker.sc.parallelize(list_data,20)\ | ||
| 72 | - .mapValues(lambda data: [data] + SC.rddinfo_ILS(data))\ | ||
| 73 | - .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2))\ | 72 | + rdd_data = sparker.sc.parallelize(list_data, 40) \ |
| 73 | + .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \ | ||
| 74 | + .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \ | ||
| 74 | .mapValues(lambda items: SC.rddfeat_ILS(items)) | 75 | .mapValues(lambda items: SC.rddfeat_ILS(items)) |
| 75 | 76 | ||
| 76 | - rrr = rdd_data.collect() | ||
| 77 | - print "-----------------",len(rrr),"====================" | ||
| 78 | - print "+++++++++++++++++",rrr[0],"**********************" | ||
| 79 | - # try: | ||
| 80 | - # with table.batch(batch_size=5000) as b: | ||
| 81 | - # for imgname, imginfo in rdd_data.collect().items(): | ||
| 82 | - # b.put(imgname, | ||
| 83 | - # { | ||
| 84 | - # 'cf_pic:data': imginfo[0], | ||
| 85 | - # 'cf_info:width': str(imginfo[1]), | ||
| 86 | - # 'cf_info:height': str(imginfo[2]), | ||
| 87 | - # 'cf_info:size': str(imginfo[3]), | ||
| 88 | - # 'cf_info:capacity': str(imginfo[4]), | ||
| 89 | - # 'cf_info:quality': str(imginfo[5]), | ||
| 90 | - # 'cf_info:rate': str(imginfo[6]), | ||
| 91 | - # 'cf_tag:chosen': str(imginfo[7]), | ||
| 92 | - # 'cf_tag:class': str(imginfo[8]), | ||
| 93 | - # 'cf_feat:' + feattype: imginfo[9], | ||
| 94 | - # }) | ||
| 95 | - # except ValueError: | ||
| 96 | - # raise | 77 | + # rrr = rdd_data.collect() |
| 78 | + # print "-----------------", len(rrr), "====================" | ||
| 79 | + # print "+++++++++++++++++", rrr[0], "**********************" | ||
| 80 | + try: | ||
| 81 | + with table.batch(batch_size=5000) as b: | ||
| 82 | + for item in rdd_data.collect(): | ||
| 83 | + imgname, imginfo = item[0], item[1] | ||
| 84 | + b.put(imgname, | ||
| 85 | + { | ||
| 86 | + 'cf_pic:data': imginfo[0], | ||
| 87 | + 'cf_info:width': str(imginfo[1]), | ||
| 88 | + 'cf_info:height': str(imginfo[2]), | ||
| 89 | + 'cf_info:size': str(imginfo[3]), | ||
| 90 | + 'cf_info:capacity': str(imginfo[4]), | ||
| 91 | + 'cf_info:quality': str(imginfo[5]), | ||
| 92 | + 'cf_info:rate': str(imginfo[6]), | ||
| 93 | + 'cf_tag:chosen': str(imginfo[7]), | ||
| 94 | + 'cf_tag:class': str(imginfo[8]), | ||
| 95 | + 'cf_feat:ibd' : imginfo[9], | ||
| 96 | + }) | ||
| 97 | + except ValueError: | ||
| 98 | + raise | ||
| 97 | 99 | ||
| 98 | 100 | ||
| 99 | 101 |