Commit d642d837fa065c133187e9c8f8dbc0283103bfe6
1 parent
489c5608
Exists in
master
and in
1 other branch
staged.
Showing
2 changed files
with
69 additions
and
13 deletions
Show diff stats
mspark/SC.py
... | ... | @@ -155,6 +155,7 @@ def rddembed_ILS(row, rate=None): |
155 | 155 | tmpf_src.close() |
156 | 156 | tmpf_dst.close() |
157 | 157 | |
158 | + | |
158 | 159 | def rddembed_ILS_EXT(row, rate=None): |
159 | 160 | """ |
160 | 161 | input: |
... | ... | @@ -188,7 +189,7 @@ def rddembed_ILS_EXT(row, rate=None): |
188 | 189 | raw = tmpf_dst.read() |
189 | 190 | index = md5(raw).hexdigest() |
190 | 191 | |
191 | - return [row,(index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1))] | |
192 | + return [row, (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1))] | |
192 | 193 | |
193 | 194 | except Exception as e: |
194 | 195 | print e |
... | ... | @@ -282,10 +283,10 @@ class Sparker(object): |
282 | 283 | """ |
283 | 284 | |
284 | 285 | hconf = { |
285 | - "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", | |
286 | - #"hbase.zookeeper.quorum": self.host, | |
287 | - "hbase.mapreduce.inputtable": table_name, | |
288 | - } | |
286 | + "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", | |
287 | + # "hbase.zookeeper.quorum": self.host, | |
288 | + "hbase.mapreduce.inputtable": table_name, | |
289 | + } | |
289 | 290 | |
290 | 291 | hbase_rdd = self.sc.newAPIHadoopRDD(inputFormatClass=hparams["inputFormatClass"], |
291 | 292 | keyClass=hparams["readKeyClass"], |
... | ... | @@ -315,14 +316,13 @@ class Sparker(object): |
315 | 316 | cols = ['cf_info:id', 'cf_info:size', 'cf_tag:desc'] |
316 | 317 | """ |
317 | 318 | hconf = { |
318 | - "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", | |
319 | - #"hbase.zookeeper.quorum": self.host, | |
320 | - "hbase.mapreduce.inputtable": table_name, | |
321 | - "hbase.mapred.outputtable": table_name, | |
322 | - "mapreduce.outputformat.class": hparams["outputFormatClass"], | |
323 | - "mapreduce.job.output.key.class": hparams["writeKeyClass"], | |
324 | - "mapreduce.job.output.value.class": hparams["writeValueClass"], | |
325 | - } | |
319 | + "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", # "hbase.zookeeper.quorum": self.host, | |
320 | + "hbase.mapreduce.inputtable": table_name, | |
321 | + "hbase.mapred.outputtable": table_name, | |
322 | + "mapreduce.outputformat.class": hparams["outputFormatClass"], | |
323 | + "mapreduce.job.output.key.class": hparams["writeKeyClass"], | |
324 | + "mapreduce.job.output.value.class": hparams["writeValueClass"], | |
325 | + } | |
326 | 326 | cols = [col.split(':') for col in columns] |
327 | 327 | if not fromrdd: |
328 | 328 | rdd_data = self.sc.parallelize(data) | ... | ... |
... | ... | @@ -0,0 +1,56 @@ |
1 | +__author__ = 'chunk' | |
2 | + | |
3 | +from ..mspark import SC | |
4 | +from pyspark.mllib.regression import LabeledPoint | |
5 | + | |
6 | + | |
7 | +cols0 = [ | |
8 | + 'cf_pic:data', | |
9 | + 'cf_info:width', | |
10 | + 'cf_info:height', | |
11 | + 'cf_info:size', | |
12 | + 'cf_info:capacity', | |
13 | + 'cf_info:quality', | |
14 | + 'cf_info:rate', | |
15 | + 'cf_tag:chosen', | |
16 | + 'cf_tag:class' | |
17 | +] | |
18 | +cols1 = [ | |
19 | + 'cf_pic:data', | |
20 | + 'cf_info:width', | |
21 | + 'cf_info:height', | |
22 | + 'cf_info:size', | |
23 | + 'cf_info:capacity', | |
24 | + 'cf_info:quality', | |
25 | + 'cf_info:rate', | |
26 | + 'cf_tag:chosen', | |
27 | + 'cf_tag:class', | |
28 | + 'cf_feat:bid', | |
29 | +] | |
30 | + | |
31 | +sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077') | |
32 | + | |
33 | +rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False) \ | |
34 | + .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \ | |
35 | + .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \ | |
36 | + .mapValues(lambda items: SC.rddfeat_ILS(items)) | |
37 | + | |
38 | +sparker.write_hbase("ILSVRC2013_DET_val-Test_1", rdd_data, fromrdd=True, columns=cols1, | |
39 | + withdata=True) | |
40 | + | |
41 | + | |
42 | + | |
43 | + | |
44 | + | |
45 | + | |
46 | + | |
47 | + | |
48 | + | |
49 | + | |
50 | + | |
51 | + | |
52 | + | |
53 | + | |
54 | + | |
55 | + | |
56 | + | ... | ... |