test_whole.py
1.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
__author__ = 'chunk'
from ..mspark import SC
from pyspark.mllib.regression import LabeledPoint
cols0 = [
'cf_pic:data',
'cf_info:width',
'cf_info:height',
'cf_info:size',
'cf_info:capacity',
'cf_info:quality',
'cf_info:rate',
'cf_tag:chosen',
'cf_tag:class'
]
cols1 = [
'cf_pic:data',
'cf_info:width',
'cf_info:height',
'cf_info:size',
'cf_info:capacity',
'cf_info:quality',
'cf_info:rate',
'cf_tag:chosen',
'cf_tag:class',
'cf_feat:bid',
]
sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077')
rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False) \
.mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \
.flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \
.mapValues(lambda items: SC.rddfeat_ILS(items))
sparker.write_hbase("ILSVRC2013_DET_val-Test_1", rdd_data, fromrdd=True, columns=cols1,
withdata=True)