''' SVM Model. @author: chunk chunkplus@gmail.com 2014 Dec ''' import os, sys from ...mfeat import * from ...mmodel import * from ...mspark import SC2 from ...common import * import numpy as np import csv import json import pickle from sklearn import svm package_dir = os.path.dirname(os.path.abspath(__file__)) dict_Train = {} dict_databuf = {} dict_tagbuf = {} dict_featbuf = {} class ModelSVM(ModelBase): def __init__(self, toolset='sklearn', sc=None): ModelBase.__init__(self) self.toolset = toolset self.sparker = sc def _train_sklearn(self, X, Y): clf = svm.SVC(C=4, kernel='linear', shrinking=False, verbose=True) clf.fit(X, Y) with open('svm_sklearn.model', 'wb') as modelfile: model = pickle.dump(clf, modelfile) self.model = clf return clf def _predict_sklearn(self, feat, model=None): """N.B. sklearn.svm.base.predict : Perform classification on samples in X. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Returns ------- y_pred : array, shape = [n_samples] Class labels for samples in X. """ if model is None: if self.model != None: model = self.model else: print 'loading model ...' with open('svm_sklearn.model', 'rb') as modelfile: model = pickle.load(modelfile) return model.predict(feat) def __test_sklearn(self, X, Y, model=None): if model is None: if self.model != None: model = self.model else: print 'loading model ...' with open('svm_sklearn.model', 'rb') as modelfile: model = pickle.load(modelfile) result_Y = np.array(self._predict_sklearn(X, model)) fp = 0 tp = 0 sum = np.sum(np.array(Y) == 1) positive, negative = np.sum(np.array(Y) == 1), np.sum(np.array(Y) == 0) print positive, negative for i in range(len(Y)): if Y[i] == 0 and result_Y[i] == 1: fp += 1 elif Y[i] == 1 and result_Y[i] == 1: tp += 1 return float(fp) / negative, float(tp) / positive, np.mean(Y == result_Y) def _test_sklearn(self, X, Y, model=None): if model is None: if self.model != None: model = self.model else: print 'loading model ...' with open('svm_sklearn.model', 'rb') as modelfile: model = pickle.load(modelfile) return model.score(X, Y) def _train_spark(self, X, Y=None): if self.sparker == None: self.sparker = SC2.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') self.model = self.sparker.train_svm(X, Y) return svm def _predict_spark(self, feat, model=None): return self.sparker.predict_svm(feat, model) def _test_spark(self, X, Y, model=None): return self.sparker.test_svm(X, Y, model) def train(self, X, Y=None): if self.toolset == 'sklearn': return self._train_sklearn(X, Y) elif self.toolset == 'spark': return self._train_spark(X, Y) else: raise Exception("Unknown toolset!") def predict(self, feat, model=None): if self.toolset == 'sklearn': return self._predict_sklearn(feat, model) elif self.toolset == 'spark': return self._predict_spark(feat, model) else: raise Exception("Unknown toolset!") def test(self, X, Y=None, model=None): if self.toolset == 'sklearn': return self.__test_sklearn(X, Y, model) elif self.toolset == 'spark': return self._test_spark(X, Y, model) else: raise Exception("Unknown toolset!")