Commit 5c9c44da22fd65b593e5b7b7cf5d788c5f11a930
1 parent
54e2adda
Exists in
master
and in
1 other branch
staged.
Showing
1 changed file
with
29 additions
and
27 deletions
Show diff stats
test/test_whole.py
@@ -4,6 +4,7 @@ from ..mspark import SC | @@ -4,6 +4,7 @@ from ..mspark import SC | ||
4 | from pyspark.mllib.regression import LabeledPoint | 4 | from pyspark.mllib.regression import LabeledPoint |
5 | import happybase | 5 | import happybase |
6 | 6 | ||
7 | + | ||
7 | def test_whole(): | 8 | def test_whole(): |
8 | cols0 = [ | 9 | cols0 = [ |
9 | 'cf_pic:data', | 10 | 'cf_pic:data', |
@@ -33,7 +34,7 @@ def test_whole(): | @@ -33,7 +34,7 @@ def test_whole(): | ||
33 | 34 | ||
34 | # rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False) \ | 35 | # rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False) \ |
35 | # .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \ | 36 | # .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \ |
36 | - # .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \ | 37 | + # .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \ |
37 | # .mapValues(lambda items: SC.rddfeat_ILS(items)) | 38 | # .mapValues(lambda items: SC.rddfeat_ILS(items)) |
38 | 39 | ||
39 | rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False).mapValues( | 40 | rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False).mapValues( |
@@ -64,36 +65,37 @@ def test_whole_ext(): | @@ -64,36 +65,37 @@ def test_whole_ext(): | ||
64 | cols = ['cf_pic:data'] | 65 | cols = ['cf_pic:data'] |
65 | list_data = [] | 66 | list_data = [] |
66 | for key, data in table.scan(columns=cols): | 67 | for key, data in table.scan(columns=cols): |
67 | - data = data['cf_pic:data'] | ||
68 | - list_data.append((key,data)) | 68 | + data = data['cf_pic:data'] |
69 | + list_data.append((key, data)) | ||
69 | 70 | ||
70 | sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077') | 71 | sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077') |
71 | - rdd_data = sparker.sc.parallelize(list_data,20)\ | ||
72 | - .mapValues(lambda data: [data] + SC.rddinfo_ILS(data))\ | ||
73 | - .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2))\ | 72 | + rdd_data = sparker.sc.parallelize(list_data, 40) \ |
73 | + .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \ | ||
74 | + .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \ | ||
74 | .mapValues(lambda items: SC.rddfeat_ILS(items)) | 75 | .mapValues(lambda items: SC.rddfeat_ILS(items)) |
75 | 76 | ||
76 | - rrr = rdd_data.collect() | ||
77 | - print "-----------------",len(rrr),"====================" | ||
78 | - print "+++++++++++++++++",rrr[0],"**********************" | ||
79 | - # try: | ||
80 | - # with table.batch(batch_size=5000) as b: | ||
81 | - # for imgname, imginfo in rdd_data.collect().items(): | ||
82 | - # b.put(imgname, | ||
83 | - # { | ||
84 | - # 'cf_pic:data': imginfo[0], | ||
85 | - # 'cf_info:width': str(imginfo[1]), | ||
86 | - # 'cf_info:height': str(imginfo[2]), | ||
87 | - # 'cf_info:size': str(imginfo[3]), | ||
88 | - # 'cf_info:capacity': str(imginfo[4]), | ||
89 | - # 'cf_info:quality': str(imginfo[5]), | ||
90 | - # 'cf_info:rate': str(imginfo[6]), | ||
91 | - # 'cf_tag:chosen': str(imginfo[7]), | ||
92 | - # 'cf_tag:class': str(imginfo[8]), | ||
93 | - # 'cf_feat:' + feattype: imginfo[9], | ||
94 | - # }) | ||
95 | - # except ValueError: | ||
96 | - # raise | 77 | + # rrr = rdd_data.collect() |
78 | + # print "-----------------", len(rrr), "====================" | ||
79 | + # print "+++++++++++++++++", rrr[0], "**********************" | ||
80 | + try: | ||
81 | + with table.batch(batch_size=5000) as b: | ||
82 | + for item in rdd_data.collect(): | ||
83 | + imgname, imginfo = item[0], item[1] | ||
84 | + b.put(imgname, | ||
85 | + { | ||
86 | + 'cf_pic:data': imginfo[0], | ||
87 | + 'cf_info:width': str(imginfo[1]), | ||
88 | + 'cf_info:height': str(imginfo[2]), | ||
89 | + 'cf_info:size': str(imginfo[3]), | ||
90 | + 'cf_info:capacity': str(imginfo[4]), | ||
91 | + 'cf_info:quality': str(imginfo[5]), | ||
92 | + 'cf_info:rate': str(imginfo[6]), | ||
93 | + 'cf_tag:chosen': str(imginfo[7]), | ||
94 | + 'cf_tag:class': str(imginfo[8]), | ||
95 | + 'cf_feat:ibd' : imginfo[9], | ||
96 | + }) | ||
97 | + except ValueError: | ||
98 | + raise | ||
97 | 99 | ||
98 | 100 | ||
99 | 101 |