Blame view

test/test_model.py 4.44 KB
be12257b   Chunk   data-feat-model f...
1
2
__author__ = 'chunk'

9371f8fa   Chunk   SVM param engenee...
3
from ..common import *
f4fb4381   Chunk   staged.
4

2bf33465   Chunk   staged.
5
from ..mdata import MSR, CV, ILSVRC, ILSVRC_S
84648488   Chunk   reverted.
6
from ..mmodel import SVM
61e78eb3   Chunk   staged.
7
from ..mfeat import HOG
84648488   Chunk   reverted.
8

2bf33465   Chunk   staged.
9
from sklearn import cross_validation
5a469df5   Chunk   staged.
10
11
12

timer = Timer()

84648488   Chunk   reverted.
13

5a469df5   Chunk   staged.
14
15
def test_SVM_CV():
    timer.mark()
be12257b   Chunk   data-feat-model f...
16
    dcv = CV.DataCV()
d2603183   Chunk   staged.
17
    X, Y = dcv.load_data(mode='local')  # 90.468586s ->  5.392520s
84648488   Chunk   reverted.
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
    # X, Y = dcv.load_data(mode='hbase') # 21.682754s
    # X, Y = dcv.load_data(mode='spark') # 29.549597s
    timer.report()

    timer.mark()
    # msvm = SVM.ModelSVM(toolset='sklearn') # 3.030380s
    # msvm = SVM.ModelSVM(toolset='opencv') # 8.939880s
    # msvm = SVM.ModelSVM(toolset='libsvm') # 185.524023s
    msvm = SVM.ModelSVM(toolset='spark')

    msvm.train(X, Y)
    timer.report()

    timer.mark()
    for path, subdirs, files in os.walk('data/467/'):
        for name in files:
            imgpath = os.path.join(path, name)
            feat = dcv.get_feat(imgpath, 'hog')
            print name, msvm.predict(feat)
    timer.report()

    timer.mark()
    print msvm.test(X, Y)  # 0.948892561983 for svm_cv, 0.989024793388 for svm_sk, 0.9900826446280992 for svm_lib
    timer.report()  # 27.421949s for svm_lib


def test_SVM_ILSVRC():
    timer.mark()
    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test.0.2')
    X, Y = dil.load_data(mode='local')  #
d0be60e7   Chunk   jpeg update.
48
49
    # X, Y = dil.load_data(mode='hbase') #
    # X, Y = dil.load_data(mode='spark') #
84648488   Chunk   reverted.
50
    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0)
d0be60e7   Chunk   jpeg update.
51
    print np.array(Y).shape, np.array(X).shape
02528074   Chunk   staged.
52
53
    print np.array(X_train).shape, np.array(Y_train).shape
    print np.array(X_test).shape, np.array(Y_test).shape
84648488   Chunk   reverted.
54

6d6d75b8   Chunk   spider LOG system.
55
    timer.report()
02528074   Chunk   staged.
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    # msvm = SVM.ModelSVM(toolset='spark')
    msvm.train(X_train, Y_train)
    timer.report()

    timer.mark()
    print msvm.test(X_test, Y_test)  #
    timer.report()  #

    # timer.mark()
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()


def test_SVM_ILSVRC_HBASE():
    timer.mark()

    # dil = ILSVRC.DataILSVRC(base_dir='ILSVRC2013_DET_val', category='Train_3')
    # X, Y = dil.load_data(mode='hbase') # pass

    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Test_1')
    X, Y = dils.load_data(mode='hbase')  # pass

    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_1')
4f36b116   Chunk   staged.
86
    X1, Y1 = dil.load_data(mode='local')
02528074   Chunk   staged.
87

9371f8fa   Chunk   SVM param engenee...
88
    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0)
51708346   Chunk   final experiments...
89
90
    print Y,np.sum(np.array(Y)==0),np.sum(np.array(Y)==1)
    print np.array(Y).shape, np.array(X).shape
2bd3da3e   Chunk   staged.
91
    print np.array(X_train).shape, np.array(Y_train).shape
84648488   Chunk   reverted.
92
    print np.array(X_test).shape, np.array(Y_test).shape
d2603183   Chunk   staged.
93

02528074   Chunk   staged.
94
    timer.report()
9371f8fa   Chunk   SVM param engenee...
95
96
97

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
d0be60e7   Chunk   jpeg update.
98
99
100
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    # msvm = SVM.ModelSVM(toolset='spark')
4f36b116   Chunk   staged.
101
    msvm.train(X_train, Y_train)
51708346   Chunk   final experiments...
102
    timer.report()
9371f8fa   Chunk   SVM param engenee...
103

4f36b116   Chunk   staged.
104
    timer.mark()
9371f8fa   Chunk   SVM param engenee...
105
    print msvm.test(X_test, Y_test)  #
d0be60e7   Chunk   jpeg update.
106
107
108
    timer.report()  #

    timer.mark()
9371f8fa   Chunk   SVM param engenee...
109
    print msvm.test(X1, Y1)  #
d0be60e7   Chunk   jpeg update.
110
    timer.report()  #
6d6d75b8   Chunk   spider LOG system.
111
    # timer.mark()
51708346   Chunk   final experiments...
112
113
114
115
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()
e3e7e73a   Chunk   spider standalone...
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134


def test_SVM_ILSVRC_SPARK():
    timer.mark()
    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Test_1')
    rdd_dataset = dils.load_data(mode='spark')  # pass

    timer.report()

    timer.mark()
    # msvm = SVM.ModelSVM(toolset='sklearn')  #
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    msvm = SVM.ModelSVM(toolset='spark', sc=dils.sparker)
    msvm.train(rdd_dataset)
    timer.report()

    dataset = rdd_dataset.collect()
    length = len(dataset)
2bd3da3e   Chunk   staged.
135

02528074   Chunk   staged.
136
137
138
139
140
141
142
143
144
    X_test, Y_test = [dataset[i].features for i in range(length)], [dataset[i].label for i in range(length)]

    timer.mark()
    print msvm.test(dils.sparker.sc.parallelize(X_test), Y_test)  #
    timer.report()  #


def test_SVM_ILSVRC_S():
    test_SVM_ILSVRC_HBASE()
f4fb4381   Chunk   staged.
145
146
147
148
    # test_SVM_ILSVRC_SPARK()


if __name__ == '__main__':
02528074   Chunk   staged.
149
150
    # test_SVM_CV()
    test_SVM_ILSVRC()
9371f8fa   Chunk   SVM param engenee...
151
    print 'helllo'
02528074   Chunk   staged.

9371f8fa   Chunk   SVM param engenee...

02528074   Chunk   staged.

84648488   Chunk   reverted.

02528074   Chunk   staged.

51708346   Chunk   final experiments...

02528074   Chunk   staged.

9371f8fa   Chunk   SVM param engenee...

84648488   Chunk   reverted.

be12257b   Chunk   data-feat-model f...

d0be60e7   Chunk   jpeg update.

be12257b   Chunk   data-feat-model f...

84648488   Chunk   reverted.