Blame view

test/test_model.py 6.06 KB
be12257b   Chunk   data-feat-model f...
1
2
__author__ = 'chunk'

9371f8fa   Chunk   SVM param engenee...
3
from sklearn import cross_validation
f4fb4381   Chunk   staged.
4

2bf33465   Chunk   staged.
5
from ..common import *
84648488   Chunk   reverted.
6
from ..mdata import CV, ILSVRC, ILSVRC_S
61e78eb3   Chunk   staged.
7
from ..mmodel.svm import SVM
84648488   Chunk   reverted.
8
from ..mmodel.theano import THEANO
2bf33465   Chunk   staged.
9

5a469df5   Chunk   staged.
10
11
12
import gzip
import cPickle

84648488   Chunk   reverted.
13

5a469df5   Chunk   staged.
14
15
timer = Timer()
package_dir = os.path.dirname(os.path.abspath(__file__))
be12257b   Chunk   data-feat-model f...
16

d2603183   Chunk   staged.
17

84648488   Chunk   reverted.
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def test_SVM_CV():
    timer.mark()
    dcv = CV.DataCV()
    X, Y = dcv.load_data(mode='local')  # 90.468586s ->  5.392520s
    # X, Y = dcv.load_data(mode='hbase') # 21.682754s
    # X, Y = dcv.load_data(mode='spark') # 29.549597s
    timer.report()

    timer.mark()
    # msvm = SVM.ModelSVM(toolset='sklearn') # 3.030380s
    # msvm = SVM.ModelSVM(toolset='opencv') # 8.939880s
    # msvm = SVM.ModelSVM(toolset='libsvm') # 185.524023s
    msvm = SVM.ModelSVM(toolset='spark')

    msvm.train(X, Y)
    timer.report()

    timer.mark()
    for path, subdirs, files in os.walk('data/467/'):
        for name in files:
            imgpath = os.path.join(path, name)
            feat = dcv.get_feat(imgpath, 'hog')
            print name, msvm.predict(feat)
    timer.report()

    timer.mark()
    print msvm.test(X, Y)  # 0.948892561983 for svm_cv, 0.989024793388 for svm_sk, 0.9900826446280992 for svm_lib
    timer.report()  # 27.421949s for svm_lib


d0be60e7   Chunk   jpeg update.
48
49
def test_SVM_ILSVRC():
    timer.mark()
84648488   Chunk   reverted.
50
    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_0.05_orig')
d0be60e7   Chunk   jpeg update.
51
    X, Y = dil.load_data(mode='local')  #
02528074   Chunk   staged.
52
53
    # X, Y = dil.load_data(mode='hbase') #
    # X, Y = dil.load_data(mode='spark') #
84648488   Chunk   reverted.
54
    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0)
6d6d75b8   Chunk   spider LOG system.
55
    print np.array(Y).shape, np.array(X).shape
02528074   Chunk   staged.
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
    print np.array(X_train).shape, np.array(Y_train).shape
    print np.array(X_test).shape, np.array(Y_test).shape

    timer.report()

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    # msvm = SVM.ModelSVM(toolset='spark')
    msvm.train(X_train, Y_train)
    timer.report()

    timer.mark()
    print msvm.test(X_test, Y_test)  #
    timer.report()  #

    # timer.mark()
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()


def test_SVM_ILSVRC_HBASE():
    timer.mark()

    # dil = ILSVRC.DataILSVRC(base_dir='ILSVRC2013_DET_val', category='Train_3')
    # X, Y = dil.load_data(mode='hbase') # pass

4f36b116   Chunk   staged.
86
    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Train_5000')
02528074   Chunk   staged.
87
    X, Y = dils.load_data(mode='hbase')  # pass
9371f8fa   Chunk   SVM param engenee...
88

51708346   Chunk   final experiments...
89
90
    dil = ILSVRC_S.DataILSVRC_S(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val/', category='Train_5000_0.1_orig')
    X1, Y1 = dil.load_data(mode='local')
2bd3da3e   Chunk   staged.
91

84648488   Chunk   reverted.
92
    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0)
d2603183   Chunk   staged.
93
    print Y, np.sum(np.array(Y) == 0), np.sum(np.array(Y) == 1)
02528074   Chunk   staged.
94
    print np.array(Y).shape, np.array(X).shape
9371f8fa   Chunk   SVM param engenee...
95
96
97
    print np.array(X_train).shape, np.array(Y_train).shape
    print np.array(X_test).shape, np.array(Y_test).shape

d0be60e7   Chunk   jpeg update.
98
99
100
    timer.report()

    timer.mark()
4f36b116   Chunk   staged.
101
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
51708346   Chunk   final experiments...
102
    # msvm = SVM.ModelSVM(toolset='opencv') #
9371f8fa   Chunk   SVM param engenee...
103
    # msvm = SVM.ModelSVM(toolset='libsvm') #
4f36b116   Chunk   staged.
104
    # msvm = SVM.ModelSVM(toolset='spark')
9371f8fa   Chunk   SVM param engenee...
105
    msvm.train(X_train, Y_train)
d0be60e7   Chunk   jpeg update.
106
107
108
    timer.report()

    timer.mark()
9371f8fa   Chunk   SVM param engenee...
109
    print msvm.test(X_test, Y_test)  #
d0be60e7   Chunk   jpeg update.
110
    timer.report()  #
6d6d75b8   Chunk   spider LOG system.
111

51708346   Chunk   final experiments...
112
113
114
115
    timer.mark()
    print msvm.test(X1, Y1)  #(0.048868415782094936, 0.4924709948160948, 0.74568774878372401)
    timer.report()  #
    # timer.mark()
e3e7e73a   Chunk   spider standalone...
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()

def test_SVM_ILSVRC_TEST():
    timer.mark()

    dil = ILSVRC_S.DataILSVRC_S(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val/', category='Train_5000_0.1_orig')
    X1, Y1 = dil.load_data(mode='local')
    timer.report()

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
    timer.report()

    timer.mark()
    print msvm.test(X1, Y1)  #(0.048868415782094936, 0.4924709948160948, 0.74568774878372401)
    timer.report()  #
2bd3da3e   Chunk   staged.
135
    # timer.mark()
02528074   Chunk   staged.
136
137
138
139
140
141
142
143
144
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()


def test_SVM_ILSVRC_SPARK():
    timer.mark()
    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Test_1')
f4fb4381   Chunk   staged.
145
146
147
148
    rdd_dataset = dils.load_data(mode='spark')  # pass

    timer.report()

02528074   Chunk   staged.
149
150
    timer.mark()
    # msvm = SVM.ModelSVM(toolset='sklearn')  #
9371f8fa   Chunk   SVM param engenee...
151
    # msvm = SVM.ModelSVM(toolset='opencv') #
02528074   Chunk   staged.
152
153
154
155
156
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    msvm = SVM.ModelSVM(toolset='spark', sc=dils.sparker)
    msvm.train(rdd_dataset)
    timer.report()

9371f8fa   Chunk   SVM param engenee...
157
158
    dataset = rdd_dataset.collect()
    length = len(dataset)
02528074   Chunk   staged.
159
160
161

    X_test, Y_test = [dataset[i].features for i in range(length)], [dataset[i].label for i in range(length)]

84648488   Chunk   reverted.
162
    timer.mark()
02528074   Chunk   staged.
163
164
165
166
167
168
169
    print msvm.test(dils.sparker.sc.parallelize(X_test), Y_test)  #
    timer.report()  #


def test_SVM_ILSVRC_S():
    test_SVM_ILSVRC_HBASE()
    # test_SVM_ILSVRC_SPARK()
51708346   Chunk   final experiments...
170
171


02528074   Chunk   staged.
172
def test_THEANO_mnist():
9371f8fa   Chunk   SVM param engenee...
173
    mtheano = THEANO.ModelTHEANO(toolset='cnn')
84648488   Chunk   reverted.
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
    mtheano._train_cnn(learning_rate=0.1, n_epochs=200, dataset=os.path.join(package_dir, '../res/', 'mnist.pkl.gz'), nkerns=[20, 50], batch_size=500)


def test_THEANO_crop():
    timer.mark()
    dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil')
    X, Y = dilc.load_data(mode='local', feattype='coef')
    print X[0],Y
    timer.report()

    # X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
    # with open(os.path.join(package_dir,'../res/','ils_crop.pkl'),'wb') as f:
    # cPickle.dump([(X_train,Y_train),(X_test,Y_test)], f)

    timer.mark()
    mtheano = THEANO.ModelTHEANO(toolset='cnn')
    mtheano._train_cnn(X, Y)
    timer.report()


if __name__ == '__main__':
    # test_SVM_CV()
be12257b   Chunk   data-feat-model f...
196
    test_SVM_ILSVRC()
d0be60e7   Chunk   jpeg update.
197
    print 'helllo'
be12257b   Chunk   data-feat-model f...

84648488   Chunk   reverted.