Blame view

test/test_whole.py 3.53 KB
d642d837   Chunk   staged.
1
2
3
__author__ = 'chunk'

from ..mspark import SC
1821e0e3   Chunk   benchmarking...
4
5
6
from pyspark.mllib.regression import LabeledPoint
import happybase

d642d837   Chunk   staged.
7

54e2adda   Chunk   staged.
8
9
def test_whole():
    cols0 = [
5c9c44da   Chunk   staged.
10
        'cf_pic:data',
54e2adda   Chunk   staged.
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
        'cf_info:width',
        'cf_info:height',
        'cf_info:size',
        'cf_info:capacity',
        'cf_info:quality',
        'cf_info:rate',
        'cf_tag:chosen',
        'cf_tag:class'
    ]
    cols1 = [
        'cf_pic:data',
        'cf_info:width',
        'cf_info:height',
        'cf_info:size',
        'cf_info:capacity',
        'cf_info:quality',
        'cf_info:rate',
        'cf_tag:chosen',
        'cf_tag:class',
        'cf_feat:bid',
    ]

    sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077')

    # rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False) \
84648488   Chunk   reverted.
36
    # .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \
54e2adda   Chunk   staged.
37
38
39
    # .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \
    #     .mapValues(lambda items: SC.rddfeat_ILS(items))

5c9c44da   Chunk   staged.
40
    rdd_data = sparker.read_hbase("ILSVRC2013_DET_val-Test_1", func=SC.rddparse_data_ILS, collect=False).mapValues(
1821e0e3   Chunk   benchmarking...
41
        lambda data: [data] + SC.rddinfo_ILS(data))
54e2adda   Chunk   staged.
42
    rdd_data_ext = rdd_data.map(lambda x: SC.rddembed_ILS(x, rate=0.2)).filter(lambda x: x != None)
84648488   Chunk   reverted.
43

54e2adda   Chunk   staged.
44
45
46
47
48
49
50
51
    rdd_data = rdd_data.union(rdd_data_ext).mapValues(lambda items: SC.rddfeat_ILS(items))

    print len(rdd_data.collect())

    # sparker.write_hbase("ILSVRC2013_DET_val-Test_1", rdd_data, fromrdd=True, columns=cols1,
    #                     withdata=True)


f4fb4381   Chunk   staged.
52
def test_whole_ext():
54e2adda   Chunk   staged.
53
54
    table_name = "ILSVRC2013_DET_val-Test_1"
    connection = happybase.Connection('HPC-server')
1821e0e3   Chunk   benchmarking...
55
56
57
58
59
60
61
    tables = connection.tables()
    if table_name not in tables:
        families = {'cf_pic': dict(),
                    'cf_info': dict(max_versions=10),
                    'cf_tag': dict(),
                    'cf_feat': dict(),
                    }
84648488   Chunk   reverted.
62
        connection.create_table(name=table_name, families=families)
1821e0e3   Chunk   benchmarking...
63
    table = connection.table(name=table_name)
f4fb4381   Chunk   staged.
64

1821e0e3   Chunk   benchmarking...
65
66
67
68
69
70
    cols = ['cf_pic:data']
    list_data = []
    for key, data in table.scan(columns=cols):
        data = data['cf_pic:data']
        list_data.append((key, data))

54e2adda   Chunk   staged.
71
72
73
    sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', master='spark://HPC-server:7077')
    rdd_data = sparker.sc.parallelize(list_data, 40) \
        .mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) \
f4fb4381   Chunk   staged.
74
75
76
77
78
79
80
81
82
        .flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=0.2)) \
        .mapValues(lambda items: SC.rddfeat_ILS(items))

    # rrr = rdd_data.collect()
    # print "-----------------", len(rrr), "===================="
    # print "+++++++++++++++++", rrr[0], "**********************"
    try:
        with table.batch(batch_size=5000) as b:
            for item in rdd_data.collect():
54e2adda   Chunk   staged.
83
84
85
86
87
88
89
                imgname, imginfo = item[0], item[1]
                b.put(imgname,
                      {
                          'cf_pic:data': imginfo[0],
                          'cf_info:width': str(imginfo[1]),
                          'cf_info:height': str(imginfo[2]),
                          'cf_info:size': str(imginfo[3]),
5c9c44da   Chunk   staged.
90
91
                          'cf_info:capacity': str(imginfo[4]),
                          'cf_info:quality': str(imginfo[5]),
1821e0e3   Chunk   benchmarking...
92
                          'cf_info:rate': str(imginfo[6]),
54e2adda   Chunk   staged.
93
                          'cf_tag:chosen': str(imginfo[7]),
1821e0e3   Chunk   benchmarking...
94
95
                          'cf_tag:class': str(imginfo[8]),
                          'cf_feat:ibd' : imginfo[9],
84648488   Chunk   reverted.
96
                      })
f4fb4381   Chunk   staged.
97
    except ValueError:
5c9c44da   Chunk   staged.
98
        raise
51708346   Chunk   final experiments...

54e2adda   Chunk   staged.

f4fb4381   Chunk   staged.

1821e0e3   Chunk   benchmarking...

54e2adda   Chunk   staged.

1821e0e3   Chunk   benchmarking...

5c9c44da   Chunk   staged.

f4fb4381   Chunk   staged.

5c9c44da   Chunk   staged.

1821e0e3   Chunk   benchmarking...

5c9c44da   Chunk   staged.

1821e0e3   Chunk   benchmarking...

84648488   Chunk   reverted.

f4fb4381   Chunk   staged.

84648488   Chunk   reverted.