''' SVM Model. @author: chunk chunkplus@gmail.com 2014 Dec ''' from mfeat import * from mmodel import * from common import * import numpy as np import csv import json import pickle import cv2 from sklearn import svm dict_Train = {} dict_databuf = {} dict_tagbuf = {} dict_featbuf = {} class ModelSVM(ModelBase): def __init__(self): ModelBase.__init__(self) def load(self, file, mode='local'): timer = Timer() INDEX = [] X = [] Y = [] base_dir = '/home/hadoop/data/HeadShoulder/' dir = base_dir + 'Img/' maplst = dir + 'images_map_Train.tsv' dict_tagbuf = {} with open(maplst, 'rb') as tsvfile: tsvfile = csv.reader(tsvfile, delimiter='\t') for line in tsvfile: imgname = line[0] + '.jpg' dict_tagbuf[imgname] = line[1] dir = base_dir + 'Feat/' dict_dataset = {} timer.mark() for path, subdirs, files in os.walk(dir + 'Train/'): for name in files: featpath = os.path.join(path, name) # print featpath with open(featpath, 'rb') as featfile: imgname = path.split('/')[-1] + name.replace('.hog', '.jpg') dict_dataset[imgname] = json.loads(featfile.read()) timer.report() # 5.122354s timer.mark() for imgname, tag in dict_tagbuf.items(): tag = 1 if tag == 'True' else 0 INDEX.append(imgname) X.append(dict_dataset[imgname]) Y.append(tag) timer.report() # 0.047625s return X, Y def model_svm_train_sk(self,X, Y): timer = Timer() timer.mark() lin_clf = svm.LinearSVC() lin_clf.fit(X, Y) with open('res/tmp.model', 'wb') as modelfile: model = pickle.dump(lin_clf, modelfile) timer.report() return lin_clf def model_svm_predict_sk(self,image, clf=None): if clf is None: with open('res/tmp.model', 'rb') as modelfile: clf = pickle.load(modelfile) desc = feat_HOG(image, size=(48, 48)) return clf.predict(desc) def model_svm_train_cv(self,X, Y): svm_params = dict(kernel_type=cv2.SVM_LINEAR, svm_type=cv2.SVM_C_SVC, C=2.67, gamma=5.383) timer = Timer() timer.mark() svm = cv2.SVM() svm.train(X, Y, params=svm_params) svm.save('res/svm_data.model') return svm def model_svm_predict_cv(self,image, svm=None): if svm is None: svm = cv2.SVM() svm.load('res/svm_data.model') desc = feat_HOG(image, size=(48, 48)) desc = np.float32(np.asarray(desc)) return svm.predict(desc) def test_sk(self): X, Y = load_data() clf = model_svm_train_sk(X, Y) for path, subdirs, files in os.walk('data/467/'): for name in files: imgpath = os.path.join(path, name) print name, model_svm_predict_sk(imgpath, clf) print clf.coef_.shape, clf.coef_ def test_cv(self): X, Y = load_data() X, Y = np.float32(np.asarray(X)), np.float32(np.asarray(Y)) print X, Y svm = model_svm_train_cv(X, Y) for path, subdirs, files in os.walk('data/467/'): for name in files: imgpath = os.path.join(path, name) print name, model_svm_predict_cv(imgpath, svm)