test_data.py 5.28 KB
__author__ = 'chunk'

from ..common import *

from ..mdata import MSR, CV, ILSVRC, ILSVRC_S, crop

from ..mmodel.caffe.helper import *


def test_MSR():
    dmsr = MSR.DataMSR()
    # msrd.format()
    # msrd.build_list()

    # dmsr.store_image()
    # dmsr.store_tag()

    # dmsr.extract_feat(feattype='ibd')
    dmsr.store_feat(feattype='ibd')


def test_CV():
    dcv = CV.DataCV()
    # dcv.format()
    # dcv.build_list()
    # dcv.get_feat()
    # dcv.extract_feat()
    print dcv.get_feat("/home/hadoop/data/HeadShoulder/dst/Train/Img/132/7c5fe33bd194fc1ae7b0023956ebd.jpg", 'ibd')
    X, Y = dcv.load_data()
    print len(X), len(Y)


def test_ILSVRC(category='Train_100'):
    timer = Timer()

    # dil = ILSVRC.DataILSVRC(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train')
    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category=category)
    # dil = ILSVRC.DataILSVRC(base='/media/chunk/Elements/D/data/ImageNet/img/ILSVRC2013_DET_val', category='Train_1')
    print '[time]category:', category

    timer.mark()
    print '[time]formatting...'
    dil.format()
    timer.report()

    timer.mark()
    print '[time]embedding...'
    dil.embed(rate=0.2)
    timer.report()

    timer.mark()
    print '[time]extracting...'
    dil.extract_feat(feattype='ibd')
    timer.report()

    # dil.extract_feat(feattype='hog')

    # timer.mark()
    # dil.store_img()
    # timer.report()

    # timer.mark()
    # dil.store_tag()
    # timer.report()
    #
    # timer.mark()
    # dil.store_info()
    # timer.report()
    #
    # timer.mark()
    # dil.store_feat()
    # timer.report()


def test_ILSVRC_S_LOCAL():
    timer = Timer()

    timer.mark()
    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_2')
    dil.delete_table()
    dil.format()
    dil.store_img()
    timer.report()

    dils = ILSVRC_S.DataILSVRC_S(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_2')

    # dils._extract_data(mode='hbase', writeback=True)
    # dils._embed_data(mode='hbase', rate=0.1, readforward=True, writeback=True)
    # dils._extract_feat( mode='hbase', feattype='ibd', readforward=True, writeback=True)

    dils._extract_data(mode='hbase', writeback=False)
    dils._embed_data(mode='hbase', rate=0.1, readforward=False, writeback=False)
    dils._extract_feat(mode='hbase', feattype='ibd', readforward=False, writeback=True)

    timer.report()


def test_ILSVRC_S_SPARK(category='Train_1000'):
    timer = Timer()

    timer.mark()
    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category=category)
    dil.delete_table()
    dil.format()
    dil.store_img()
    timer.report()
    # return

    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category=category)

    timer.mark()
    dils._extract_data(mode='spark', writeback=False)
    timer.report()

    # print dils.rdd_data.count() # pass
    # return

    timer.mark()
    dils._embed_data(mode='spark', rate=0.2, readforward=False, writeback=False)
    timer.report()

    timer.mark()
    dils._extract_feat(mode='spark', feattype='ibd', readforward=False, writeback=True, withdata=True)
    timer.report()


def test_ILSVRC_S_ANALYSIS(category='Train_1000'):
    timer = Timer()

    # timer.mark()
    # dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category=category)
    # dil.delete_table()
    # dil.format()
    # dil.store_img()
    # timer.report()
    # # return

    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category=category)

    timer.mark()
    dils._extract_data(mode='analysis', writeback=False)
    timer.report()

    # print dils.rdd_data.count() # pass
    # return

    timer.mark()
    dils._extract_feat(mode='analysis', feattype='ibd', readforward=False, writeback=True, withdata=True)
    timer.report()


def test_ILSVRC_S():
    # test_ILSVRC_S_LOCAL()
    test_ILSVRC_S_SPARK()


def test_pipeline():
    timer = Timer()

    timer.mark()

    dils = ILSVRC_S.DataILSVRC_S(base='MSPIDER', category=None)

    dils._extract_data(mode='spark', writeback=False)
    dils._embed_data(mode='spark', rate=0.1, readforward=False, writeback=False)
    dils._extract_feat(mode='spark', feattype='ibd', readforward=False, writeback=True, withdata=True)

    timer.report()


def test_crop():
    # crop.crop_Test()

    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000')
    dil.crop(size=(200, 200))

    dil2 = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil')

    dil2.format()
    dil2.embed(rate=0.2)

    X, Y = dil2.load_data(mode='local', feattype='coef')
    print X[0]
    print Y
    print np.array(X).shape, np.array(Y).shape


def test_caffe():
    # read_lmdb(lmdb_name=os.path.join(caffe_root, 'examples/imager/data_lmdb'))
    # return

    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil')
    X = dil.load_data(mode='local', feattype='coef', shuffle=True)
    print X[0]
    print np.array(X).shape

    write_lmdb(X[7000:])


if __name__ == '__main__':
    # test_MSR()
    # test_CV()
    # test_ILSVRC()
    # test_ILSVRC_S()

    test_pipeline()

    print 'helllo'