SVM.py 5.44 KB
'''
SVM Model.

@author: chunk
chunkplus@gmail.com
2014 Dec
'''
from ...mfeat import *
from ...mmodel import *
from ...mmodel.svm.svmutil import *
from ...mspark import SC
from ...common import *

import numpy as np
import pickle
from sklearn import svm

dict_Train = {}
dict_databuf = {}
dict_tagbuf = {}
dict_featbuf = {}


class ModelSVM(ModelBase):
    def __init__(self, toolset='sklearn', sc=None):
        ModelBase.__init__(self)
        self.toolset = toolset
        self.sparker = sc

    def _train_libsvm(self, X, Y):
        X, Y = list(X), list(Y)
        # X, Y = [float(i) for i in X], [float(i) for i in Y]
        prob = svm_problem(Y, X)
        param = svm_parameter('-t 0 -c 4 -b 1 -h 0')
        # param = svm_parameter(kernel_type=LINEAR, C=10)
        m = svm_train(prob, param)
        svm_save_model('res/svm_libsvm.model', m)

        self.model = m

        return m

    def _predict_libsvm(self, feat, model=None):
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
                model = svm_load_model('res/svm_libsvm.model')

        feat = [list(feat)]
        # print len(feat),[0] * len(feat)
        label, _, _ = svm_predict([0] * len(feat), feat, model)
        return label

    def _test_libsvm(self, X, Y, model=None):
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
                model = svm_load_model('res/svm_libsvm.model')

        X, Y = list(X), list(Y)
        p_labs, p_acc, p_vals = svm_predict(Y, X, model)
        # ACC, MSE, SCC = evaluations(Y, p_labs)

        return p_acc

    def _train_sklearn(self, X, Y):
        clf = svm.SVC(C=4, kernel='linear', shrinking=False, verbose=True)
        clf.fit(X, Y)
        with open('res/svm_sk.model', 'wb') as modelfile:
            model = pickle.dump(clf, modelfile)

        self.model = clf

        return clf

    def _predict_sklearn(self, feat, model=None):
        """N.B. sklearn.svm.base.predict :
            Perform classification on samples in X.
                Parameters
                ----------
                X : {array-like, sparse matrix}, shape = [n_samples, n_features]

                Returns
                -------
                y_pred : array, shape = [n_samples]
                    Class labels for samples in X.
        """
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
                with open('res/svm_sklearn.model', 'rb') as modelfile:
                    model = pickle.load(modelfile)

        return model.predict(feat)

    def __test_sklearn(self, X, Y, model=None):
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
                with open('res/svm_sklearn.model', 'rb') as modelfile:
                    model = pickle.load(modelfile)

        result_Y = np.array(self._predict_sklearn(X, model))

        fp = 0
        tp = 0
        sum = np.sum(np.array(Y) == 1)
        positive, negative = np.sum(np.array(Y) == 1), np.sum(np.array(Y) == 0)
        print positive, negative
        for i in range(len(Y)):
            if Y[i] == 0 and result_Y[i] == 1:
                fp += 1
            elif Y[i] == 1 and result_Y[i] == 1:
                tp += 1
        return float(fp) / negative, float(tp) / positive, np.mean(Y == result_Y)

    def _test_sklearn(self, X, Y, model=None):
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
                with open('res/svm_sklearn.model', 'rb') as modelfile:
                    model = pickle.load(modelfile)

        return model.score(X, Y)

    def _train_spark(self, X, Y=None):
        if self.sparker == None:
            self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV',
                                      master='spark://HPC-server:7077')

        self.model = self.sparker.train_svm(X, Y)

        return svm

    def _predict_spark(self, feat, model=None):
        return self.sparker.predict_svm(feat, model)

    def _test_spark(self, X, Y, model=None):
        return self.sparker.test_svm(X, Y, model)

    def train(self, X, Y=None):

        if self.toolset == 'sklearn':
            return self._train_sklearn(X, Y)
        elif self.toolset == 'libsvm':
            return self._train_libsvm(X, Y)
        elif self.toolset == 'spark':
            return self._train_spark(X, Y)
        else:
            raise Exception("Unknown toolset!")

    def predict(self, feat, model=None):

        if self.toolset == 'sklearn':
            return self._predict_sklearn(feat, model)
        elif self.toolset == 'libsvm':
            return self._predict_libsvm(feat, model)
        elif self.toolset == 'spark':
            return self._predict_spark(feat, model)
        else:
            raise Exception("Unknown toolset!")

    def test(self, X, Y=None, model=None):

        if self.toolset == 'sklearn':
            return self.__test_sklearn(X, Y, model)
        elif self.toolset == 'libsvm':
            return self._test_libsvm(X, Y, model)
        elif self.toolset == 'spark':
            return self._test_spark(X, Y, model)
        else:
            raise Exception("Unknown toolset!")