Commit d642d837fa065c133187e9c8f8dbc0283103bfe6
1 parent
489c5608
Exists in
master
and in
1 other branch
staged.
Showing
2 changed files
with
69 additions
and
13 deletions
Show diff stats
mspark/SC.py
| ... | ... | @@ -155,6 +155,7 @@ def rddembed_ILS(row, rate=None): |
| 155 | 155 | tmpf_src.close() |
| 156 | 156 | tmpf_dst.close() |
| 157 | 157 | |
| 158 | + | |
| 158 | 159 | def rddembed_ILS_EXT(row, rate=None): |
| 159 | 160 | """ |
| 160 | 161 | input: |
| ... | ... | @@ -188,7 +189,7 @@ def rddembed_ILS_EXT(row, rate=None): |
| 188 | 189 | raw = tmpf_dst.read() |
| 189 | 190 | index = md5(raw).hexdigest() |
| 190 | 191 | |
| 191 | - return [row,(index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1))] | |
| 192 | + return [row, (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1))] | |
| 192 | 193 | |
| 193 | 194 | except Exception as e: |
| 194 | 195 | print e |
| ... | ... | @@ -282,10 +283,10 @@ class Sparker(object): |
| 282 | 283 | """ |
| 283 | 284 | |
| 284 | 285 | hconf = { |
| 285 | - "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", | |
| 286 | - #"hbase.zookeeper.quorum": self.host, | |
| 287 | - "hbase.mapreduce.inputtable": table_name, | |
| 288 | - } | |
| 286 | + "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", | |
| 287 | + # "hbase.zookeeper.quorum": self.host, | |
| 288 | + "hbase.mapreduce.inputtable": table_name, | |
| 289 | + } | |
| 289 | 290 | |
| 290 | 291 | hbase_rdd = self.sc.newAPIHadoopRDD(inputFormatClass=hparams["inputFormatClass"], |
| 291 | 292 | keyClass=hparams["readKeyClass"], |
| ... | ... | @@ -315,14 +316,13 @@ class Sparker(object): |
| 315 | 316 | cols = ['cf_info:id', 'cf_info:size', 'cf_tag:desc'] |
| 316 | 317 | """ |
| 317 | 318 | hconf = { |
| 318 | - "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", | |
| 319 | - #"hbase.zookeeper.quorum": self.host, | |
| 320 | - "hbase.mapreduce.inputtable": table_name, | |
| 321 | - "hbase.mapred.outputtable": table_name, | |
| 322 | - "mapreduce.outputformat.class": hparams["outputFormatClass"], | |
| 323 | - "mapreduce.job.output.key.class": hparams["writeKeyClass"], | |
| 324 | - "mapreduce.job.output.value.class": hparams["writeValueClass"], | |
| 325 | - } | |
| 319 | + "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", # "hbase.zookeeper.quorum": self.host, | |
| 320 | + "hbase.mapreduce.inputtable": table_name, | |
| 321 | + "hbase.mapred.outputtable": table_name, | |
| 322 | + "mapreduce.outputformat.class": hparams["outputFormatClass"], | |
| 323 | + "mapreduce.job.output.key.class": hparams["writeKeyClass"], | |
| 324 | + "mapreduce.job.output.value.class": hparams["writeValueClass"], | |
| 325 | + } | |
| 326 | 326 | cols = [col.split(':') for col in columns] |
| 327 | 327 | if not fromrdd: |
| 328 | 328 | rdd_data = self.sc.parallelize(data) | ... | ... |
| ... | ... | @@ -0,0 +1,56 @@ |
| 1 | +__author__ = 'chunk' | |
| 2 | + | |
| 3 | +from ..mspark import SC | |
| 4 | +from pyspark.mllib.regression import LabeledPoint | |
| 5 | + | |
| 6 | + | |
| 7 | +cols0 = [ | |
| 8 | + 'cf_pic:data', | |
| 9 | + 'cf_info:width', | |
| 10 | + 'cf_info:height', | |
| 11 | + 'cf_info:size', | |
| 12 | + 'cf_info:capacity', | |
| 13 | + 'cf_info:quality', | |
| 14 | + 'cf_info:rate', | |
| 15 | + 'cf_tag:chosen', | |
| 16 | + 'cf_tag:class' | |
| 17 | +] | |
| 18 | +cols1 = [ | |
| 19 | + 'cf_pic:data', | |
| 20 | + 'cf_info:width', | |
| 21 | + 'cf_info:height', | |
| 22 | + 'cf_info:size', | |
| 23 | + 'cf_info:capacity', | |
| 24 | + 'cf_info:quality', | |
| 25 | + 'cf_info:rate', | |
| 26 | + 'cf_tag:chosen', | |
| 27 | + 'cf_tag:class', | |
| 28 | + 'cf_feat:bid', | |
| 29 | +] | |
| 30 | + | |
| 31 | +sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077') | |
| 32 | + | |
| 33 | +rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False) \ | |
| 34 | + .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \ | |
| 35 | + .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \ | |
| 36 | + .mapValues(lambda items: SC.rddfeat_ILS(items)) | |
| 37 | + | |
| 38 | +sparker.write_hbase("ILSVRC2013_DET_val-Test_1", rdd_data, fromrdd=True, columns=cols1, | |
| 39 | + withdata=True) | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | ... | ... |