__author__ = 'chunk' from sklearn import cross_validation from pyspark.mllib.regression import LabeledPoint from ..common import * from ..mdata import ILSVRC, ILSVRC_S from ..mmodel.svm import SVM import gzip import cPickle timer = Timer() package_dir = os.path.dirname(os.path.abspath(__file__)) def test_SVM_ILSVRC(): timer.mark() dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_0.05_orig') X, Y = dil.load_data(mode='local') # # X, Y = dil.load_data(mode='hbase') # # X, Y = dil.load_data(mode='spark') # X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0) print np.array(Y).shape, np.array(X).shape print np.array(X_train).shape, np.array(Y_train).shape print np.array(X_test).shape, np.array(Y_test).shape timer.report() timer.mark() msvm = SVM.ModelSVM(toolset='sklearn') # 4.884247s 0.777853030816 # msvm = SVM.ModelSVM(toolset='opencv') # # msvm = SVM.ModelSVM(toolset='libsvm') # # msvm = SVM.ModelSVM(toolset='spark') msvm.train(X_train, Y_train) timer.report() timer.mark() print msvm.test(X_test, Y_test) # timer.report() # # timer.mark() # print 'or like this:' # scores = cross_validation.cross_val_score(msvm.model, X, Y) # print scores # timer.report() def test_SVM_ILSVRC_HBASE(): timer.mark() # dil = ILSVRC.DataILSVRC(base_dir='ILSVRC2013_DET_val', category='Train_3') # X, Y = dil.load_data(mode='hbase') # pass dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Train_1000') X, Y = dils.load_data(mode='hbase') # pass # dil = ILSVRC_S.DataILSVRC_S(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val/', category='Train_5000_0.1_orig') # X1, Y1 = dil.load_data(mode='local') X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0) print Y, np.sum(np.array(Y) == 0), np.sum(np.array(Y) == 1) print np.array(Y).shape, np.array(X).shape print np.array(X_train).shape, np.array(Y_train).shape print np.array(X_test).shape, np.array(Y_test).shape timer.report() timer.mark() msvm = SVM.ModelSVM(toolset='sklearn') # 4.884247s 0.777853030816 # msvm = SVM.ModelSVM(toolset='opencv') # # msvm = SVM.ModelSVM(toolset='libsvm') # # msvm = SVM.ModelSVM(toolset='spark',sc=dils.sparker) msvm.train(X_train, Y_train) timer.report() timer.mark() print msvm.test(X_test, Y_test) # timer.report() # # timer.mark() # print msvm.test(X1, Y1) #(0.048868415782094936, 0.4924709948160948, 0.74568774878372401) # timer.report() # # timer.mark() # print 'or like this:' # scores = cross_validation.cross_val_score(msvm.model, X, Y) # print scores # timer.report() def test_SVM_ILSVRC_TEST(): timer.mark() dil = ILSVRC_S.DataILSVRC_S(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val/', category='Train_5000_0.1_orig') X1, Y1 = dil.load_data(mode='local') timer.report() timer.mark() msvm = SVM.ModelSVM(toolset='sklearn') # 4.884247s 0.777853030816 timer.report() timer.mark() print msvm.test(X1, Y1) # (0.048868415782094936, 0.4924709948160948, 0.74568774878372401) timer.report() # # timer.mark() # print 'or like this:' # scores = cross_validation.cross_val_score(msvm.model, X, Y) # print scores # timer.report() def test_SVM_ILSVRC_SPARK(): timer.mark() dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Train_5000') # rdd_dataset = dils.load_data(mode='spark') # pass X, Y = dils.load_data(mode='hbase') # pass rdd_dataset = dils.sparker.sc.parallelize(zip(Y, X), 30).map(lambda x: LabeledPoint(x[0], x[1])) timer.report() timer.mark() # msvm = SVM.ModelSVM(toolset='sklearn') # # msvm = SVM.ModelSVM(toolset='opencv') # # msvm = SVM.ModelSVM(toolset='libsvm') # msvm = SVM.ModelSVM(toolset='spark', sc=dils.sparker) msvm.train(rdd_dataset) timer.report() dataset = rdd_dataset.collect() length = len(dataset) X_test, Y_test = [dataset[i].features for i in range(length)], [dataset[i].label for i in range(length)] timer.mark() print msvm.test(dils.sparker.sc.parallelize(X_test), Y_test) # timer.report() # def test_SVM_ILSVRC_S(): test_SVM_ILSVRC_HBASE() # test_SVM_ILSVRC_SPARK() if __name__ == '__main__': # test_SVM_CV() test_SVM_ILSVRC() print 'helllo'