test_model.py 6.06 KB
__author__ = 'chunk'

from sklearn import cross_validation

from ..common import *
from ..mdata import CV, ILSVRC, ILSVRC_S
from ..mmodel.svm import SVM
from ..mmodel.theano import THEANO

import gzip
import cPickle


timer = Timer()
package_dir = os.path.dirname(os.path.abspath(__file__))


def test_SVM_CV():
    timer.mark()
    dcv = CV.DataCV()
    X, Y = dcv.load_data(mode='local')  # 90.468586s ->  5.392520s
    # X, Y = dcv.load_data(mode='hbase') # 21.682754s
    # X, Y = dcv.load_data(mode='spark') # 29.549597s
    timer.report()

    timer.mark()
    # msvm = SVM.ModelSVM(toolset='sklearn') # 3.030380s
    # msvm = SVM.ModelSVM(toolset='opencv') # 8.939880s
    # msvm = SVM.ModelSVM(toolset='libsvm') # 185.524023s
    msvm = SVM.ModelSVM(toolset='spark')

    msvm.train(X, Y)
    timer.report()

    timer.mark()
    for path, subdirs, files in os.walk('data/467/'):
        for name in files:
            imgpath = os.path.join(path, name)
            feat = dcv.get_feat(imgpath, 'hog')
            print name, msvm.predict(feat)
    timer.report()

    timer.mark()
    print msvm.test(X, Y)  # 0.948892561983 for svm_cv, 0.989024793388 for svm_sk, 0.9900826446280992 for svm_lib
    timer.report()  # 27.421949s for svm_lib


def test_SVM_ILSVRC():
    timer.mark()
    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_0.05_orig')
    X, Y = dil.load_data(mode='local')  #
    # X, Y = dil.load_data(mode='hbase') #
    # X, Y = dil.load_data(mode='spark') #
    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0)
    print np.array(Y).shape, np.array(X).shape
    print np.array(X_train).shape, np.array(Y_train).shape
    print np.array(X_test).shape, np.array(Y_test).shape

    timer.report()

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    # msvm = SVM.ModelSVM(toolset='spark')
    msvm.train(X_train, Y_train)
    timer.report()

    timer.mark()
    print msvm.test(X_test, Y_test)  #
    timer.report()  #

    # timer.mark()
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()


def test_SVM_ILSVRC_HBASE():
    timer.mark()

    # dil = ILSVRC.DataILSVRC(base_dir='ILSVRC2013_DET_val', category='Train_3')
    # X, Y = dil.load_data(mode='hbase') # pass

    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Train_5000')
    X, Y = dils.load_data(mode='hbase')  # pass

    dil = ILSVRC_S.DataILSVRC_S(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val/', category='Train_5000_0.1_orig')
    X1, Y1 = dil.load_data(mode='local')

    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0)
    print Y, np.sum(np.array(Y) == 0), np.sum(np.array(Y) == 1)
    print np.array(Y).shape, np.array(X).shape
    print np.array(X_train).shape, np.array(Y_train).shape
    print np.array(X_test).shape, np.array(Y_test).shape

    timer.report()

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    # msvm = SVM.ModelSVM(toolset='spark')
    msvm.train(X_train, Y_train)
    timer.report()

    timer.mark()
    print msvm.test(X_test, Y_test)  #
    timer.report()  #

    timer.mark()
    print msvm.test(X1, Y1)  #(0.048868415782094936, 0.4924709948160948, 0.74568774878372401)
    timer.report()  #
    # timer.mark()
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()

def test_SVM_ILSVRC_TEST():
    timer.mark()

    dil = ILSVRC_S.DataILSVRC_S(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val/', category='Train_5000_0.1_orig')
    X1, Y1 = dil.load_data(mode='local')
    timer.report()

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
    timer.report()

    timer.mark()
    print msvm.test(X1, Y1)  #(0.048868415782094936, 0.4924709948160948, 0.74568774878372401)
    timer.report()  #
    # timer.mark()
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()


def test_SVM_ILSVRC_SPARK():
    timer.mark()
    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Test_1')
    rdd_dataset = dils.load_data(mode='spark')  # pass

    timer.report()

    timer.mark()
    # msvm = SVM.ModelSVM(toolset='sklearn')  #
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    msvm = SVM.ModelSVM(toolset='spark', sc=dils.sparker)
    msvm.train(rdd_dataset)
    timer.report()

    dataset = rdd_dataset.collect()
    length = len(dataset)

    X_test, Y_test = [dataset[i].features for i in range(length)], [dataset[i].label for i in range(length)]

    timer.mark()
    print msvm.test(dils.sparker.sc.parallelize(X_test), Y_test)  #
    timer.report()  #


def test_SVM_ILSVRC_S():
    test_SVM_ILSVRC_HBASE()
    # test_SVM_ILSVRC_SPARK()


def test_THEANO_mnist():
    mtheano = THEANO.ModelTHEANO(toolset='cnn')
    mtheano._train_cnn(learning_rate=0.1, n_epochs=200, dataset=os.path.join(package_dir, '../res/', 'mnist.pkl.gz'), nkerns=[20, 50], batch_size=500)


def test_THEANO_crop():
    timer.mark()
    dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil')
    X, Y = dilc.load_data(mode='local', feattype='coef')
    print X[0],Y
    timer.report()

    # X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
    # with open(os.path.join(package_dir,'../res/','ils_crop.pkl'),'wb') as f:
    # cPickle.dump([(X_train,Y_train),(X_test,Y_test)], f)

    timer.mark()
    mtheano = THEANO.ModelTHEANO(toolset='cnn')
    mtheano._train_cnn(X, Y)
    timer.report()


if __name__ == '__main__':
    # test_SVM_CV()
    test_SVM_ILSVRC()
    print 'helllo'