SVM.py 7.79 KB
'''
SVM Model.

@author: chunk
chunkplus@gmail.com
2014 Dec
'''
import os, sys
# from ...mfeat import *
from ...mmodel import *
# from ...mmodel.svm.svmutil import *
from ...mspark import SC
from ...common import *

import numpy as np
import csv
import json
import pickle
# import cv2
from sklearn import svm

package_dir = os.path.dirname(os.path.abspath(__file__))

dict_Train = {}
dict_databuf = {}
dict_tagbuf = {}
dict_featbuf = {}


class ModelSVM(ModelBase):
    def __init__(self, toolset='sklearn', sc=None):
        ModelBase.__init__(self)
        self.toolset = toolset
        self.sparker = sc


    def _train_sklearn(self, X, Y):
        clf = svm.SVC(C=4, kernel='linear', shrinking=False, verbose=True)
        clf.fit(X, Y)
        with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'wb') as modelfile:
            model = pickle.dump(clf, modelfile)

        self.model = clf

        return clf


    def _predict_sklearn(self, feat, model=None):
        """N.B. sklearn.svm.base.predict :
            Perform classification on samples in X.
                Parameters
                ----------
                X : {array-like, sparse matrix}, shape = [n_samples, n_features]

                Returns
                -------
                y_pred : array, shape = [n_samples]
                    Class labels for samples in X.
        """
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
                with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'rb') as modelfile:
                    model = pickle.load(modelfile)

        return model.predict(feat)

    def __test_sklearn(self, X, Y, model=None):
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
                with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'rb') as modelfile:
                    model = pickle.load(modelfile)

        result_Y = np.array(self._predict_sklearn(X, model))

        fp = 0
        tp = 0
        sum = np.sum(np.array(Y) == 1)
        positive, negative = np.sum(np.array(Y) == 1), np.sum(np.array(Y) == 0)
        print positive, negative
        for i in range(len(Y)):
            if Y[i] == 0 and result_Y[i] == 1:
                fp += 1
            elif Y[i] == 1 and result_Y[i] == 1:
                tp += 1
        return float(fp) / negative, float(tp) / positive, np.mean(Y == result_Y)

    def _test_sklearn(self, X, Y, model=None):
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
                with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'rb') as modelfile:
                    model = pickle.load(modelfile)

        return model.score(X, Y)

    # def _train_libsvm(self, X, Y):
    #     X, Y = list(X), list(Y)
    #     # X, Y = [float(i) for i in X], [float(i) for i in Y]
    #     prob = svm_problem(Y, X)
    #     param = svm_parameter('-t 0 -c 4 -b 1 -h 0')
    #     # param = svm_parameter(kernel_type=LINEAR, C=10)
    #     m = svm_train(prob, param)
    #     svm_save_model(os.path.join(package_dir, '../..', 'res/svm_libsvm.model'), m)
    #
    #     self.model = m
    #
    #     return m
    #
    # def _predict_libsvm(self, feat, model=None):
    #     if model is None:
    #         if self.model != None:
    #             model = self.model
    #         else:
    #             print 'loading model ...'
    #             model = svm_load_model(os.path.join(package_dir, '../..', 'res/svm_libsvm.model'))
    #
    #     feat = [list(feat)]
    #     # print len(feat),[0] * len(feat)
    #     label, _, _ = svm_predict([0] * len(feat), feat, model)
    #     return label
    #
    #
    # def _test_libsvm(self, X, Y, model=None):
    #     if model is None:
    #         if self.model != None:
    #             model = self.model
    #         else:
    #             print 'loading model ...'
    #             model = svm_load_model(os.path.join(package_dir, '../..', 'res/svm_libsvm.model'))
    #
    #     X, Y = list(X), list(Y)
    #     p_labs, p_acc, p_vals = svm_predict(Y, X, model)
    #     # ACC, MSE, SCC = evaluations(Y, p_labs)
    #
    #     return p_acc

    # def _train_opencv(self, X, Y):
    # svm_params = dict(kernel_type=cv2.SVM_LINEAR,
    #                       svm_type=cv2.SVM_C_SVC,
    #                       C=4)
    #
    #     X, Y = np.array(X, dtype=np.float32), np.array(Y, dtype=np.float32)
    #
    #     svm = cv2.SVM()
    #     svm.train(X, Y, params=svm_params)
    #     svm.save(os.path.join(package_dir, '../..', 'res/svm_opencv.model'))
    #
    #     self.model = svm
    #
    #     return svm
    #
    #
    # def _predict_opencv(self, feat, model=None):
    #     if model is None:
    #         if self.model != None:
    #             model = self.model
    #         else:
    #             print 'loading model ...'
    #             with open(os.path.join(package_dir, '../..', 'res/svm_opencv.model'), 'rb') as modelfile:
    #                 model = pickle.load(modelfile)
    #     feat = np.array(feat, dtype=np.float32)
    #
    #     return model.predict(feat)
    #
    #
    # def _test_opencv(self, X, Y, model=None):
    #     if model is None:
    #         if self.model != None:
    #             model = self.model
    #         else:
    #             print 'loading model ...'
    #             with open(os.path.join(package_dir, '../..', 'res/svm_opencv.model'), 'rb') as modelfile:
    #                 model = pickle.load(modelfile)
    #
    #     X, Y = np.array(X, dtype=np.float32), np.array(Y, dtype=np.float32)
    #
    #     # result_Y = np.array([self._predict_opencv(x, model) for x in X])
    #     result_Y = np.array(model.predict_all(X)).ravel()
    #     # print X[0]
    #     # print result_Y,Y
    #     return np.mean(Y == result_Y)


    def _train_spark(self, X, Y=None):
        if self.sparker == None:
            self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077')

        self.model = self.sparker.train_svm(X, Y)

        return svm

    def _predict_spark(self, feat, model=None):
        return self.sparker.predict_svm(feat, model)

    def _test_spark(self, X, Y, model=None):
        return self.sparker.test_svm(X, Y, model)


    def train(self, X, Y=None):

        if self.toolset == 'sklearn':
            return self._train_sklearn(X, Y)
        # elif self.toolset == 'opencv':
        #     return self._train_opencv(X, Y)
        # elif self.toolset == 'libsvm':
        #     return self._train_libsvm(X, Y)
        elif self.toolset == 'spark':
            return self._train_spark(X, Y)
        else:
            raise Exception("Unknown toolset!")

    def predict(self, feat, model=None):

        if self.toolset == 'sklearn':
            return self._predict_sklearn(feat, model)
        # elif self.toolset == 'opencv':
        #     return self._predict_opencv(feat, model)
        # elif self.toolset == 'libsvm':
        #     return self._predict_libsvm(feat, model)
        elif self.toolset == 'spark':
            return self._predict_spark(feat, model)
        else:
            raise Exception("Unknown toolset!")


    def test(self, X, Y=None, model=None):

        if self.toolset == 'sklearn':
            return self.__test_sklearn(X, Y, model)
        # elif self.toolset == 'opencv':
        #     return self._test_opencv(X, Y, model)
        # elif self.toolset == 'libsvm':
        #     return self._test_libsvm(X, Y, model)
        elif self.toolset == 'spark':
            return self._test_spark(X, Y, model)
        else:
            raise Exception("Unknown toolset!")