test_whole.py 1.01 KB
__author__ = 'chunk'

from ..mspark import SC
from pyspark.mllib.regression import LabeledPoint


cols0 = [
    'cf_pic:data',
    'cf_info:width',
    'cf_info:height',
    'cf_info:size',
    'cf_info:capacity',
    'cf_info:quality',
    'cf_info:rate',
    'cf_tag:chosen',
    'cf_tag:class'
]
cols1 = [
    'cf_pic:data',
    'cf_info:width',
    'cf_info:height',
    'cf_info:size',
    'cf_info:capacity',
    'cf_info:quality',
    'cf_info:rate',
    'cf_tag:chosen',
    'cf_tag:class',
    'cf_feat:bid',
]

sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077')

rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False) \
    .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \
    .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \
    .mapValues(lambda items: SC.rddfeat_ILS(items))

sparker.write_hbase("ILSVRC2013_DET_val-Test_1", rdd_data, fromrdd=True, columns=cols1,
                    withdata=True)