SVM.py 3.5 KB
'''
SVM Model.

@author: chunk
chunkplus@gmail.com
2014 Dec
'''
from mfeat import *
from mmodel import *

from common import *
import numpy as np
import csv
import json
import pickle
import cv2
from sklearn import svm


dict_Train = {}
dict_databuf = {}
dict_tagbuf = {}
dict_featbuf = {}


class ModelSVM(ModelBase):
    def __init__(self):
        ModelBase.__init__(self)


    def load(self, file, mode='local'):
        timer = Timer()
        INDEX = []
        X = []
        Y = []

        base_dir = '/home/hadoop/data/HeadShoulder/'
        dir = base_dir + 'Img/'
        maplst = dir + 'images_map_Train.tsv'
        dict_tagbuf = {}
        with open(maplst, 'rb') as tsvfile:
            tsvfile = csv.reader(tsvfile, delimiter='\t')
            for line in tsvfile:
                imgname = line[0] + '.jpg'
                dict_tagbuf[imgname] = line[1]

        dir = base_dir + 'Feat/'
        dict_dataset = {}

        timer.mark()
        for path, subdirs, files in os.walk(dir + 'Train/'):
            for name in files:
                featpath = os.path.join(path, name)
                # print featpath
                with open(featpath, 'rb') as featfile:
                    imgname = path.split('/')[-1] + name.replace('.hog', '.jpg')
                    dict_dataset[imgname] = json.loads(featfile.read())
        timer.report()  # 5.122354s

        timer.mark()
        for imgname, tag in dict_tagbuf.items():
            tag = 1 if tag == 'True' else 0
            INDEX.append(imgname)
            X.append(dict_dataset[imgname])
            Y.append(tag)
        timer.report()  # 0.047625s

        return X, Y


    def model_svm_train_sk(self,X, Y):
        timer = Timer()
        timer.mark()
        lin_clf = svm.LinearSVC()
        lin_clf.fit(X, Y)
        with open('res/tmp.model', 'wb') as modelfile:
            model = pickle.dump(lin_clf, modelfile)

        timer.report()

        return lin_clf


    def model_svm_predict_sk(self,image, clf=None):
        if clf is None:
            with open('res/tmp.model', 'rb') as modelfile:
                clf = pickle.load(modelfile)
        desc = feat_HOG(image, size=(48, 48))
        return clf.predict(desc)


    def model_svm_train_cv(self,X, Y):
        svm_params = dict(kernel_type=cv2.SVM_LINEAR,
                          svm_type=cv2.SVM_C_SVC,
                          C=2.67, gamma=5.383)

        timer = Timer()
        timer.mark()
        svm = cv2.SVM()
        svm.train(X, Y, params=svm_params)
        svm.save('res/svm_data.model')

        return svm


    def model_svm_predict_cv(self,image, svm=None):
        if svm is None:
            svm = cv2.SVM()
            svm.load('res/svm_data.model')

        desc = feat_HOG(image, size=(48, 48))
        desc = np.float32(np.asarray(desc))
        return svm.predict(desc)


    def test_sk(self):
        X, Y = load_data()

        clf = model_svm_train_sk(X, Y)
        for path, subdirs, files in os.walk('data/467/'):
            for name in files:
                imgpath = os.path.join(path, name)
                print name, model_svm_predict_sk(imgpath, clf)
        print clf.coef_.shape, clf.coef_


    def test_cv(self):
        X, Y = load_data()
        X, Y = np.float32(np.asarray(X)), np.float32(np.asarray(Y))
        print X, Y
        svm = model_svm_train_cv(X, Y)
        for path, subdirs, files in os.walk('data/467/'):
            for name in files:
                imgpath = os.path.join(path, name)
                print name, model_svm_predict_cv(imgpath, svm)