Blame view

mmodel/svm/SVM2.py 4.14 KB
e3e7e73a   Chunk   spider standalone...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
'''
SVM Model.

@author: chunk
chunkplus@gmail.com
2014 Dec
'''
import os, sys
from ...mfeat import *
from ...mmodel import *
from ...mspark import SC2
from ...common import *

import numpy as np
import csv
import json
import pickle
from sklearn import svm

package_dir = os.path.dirname(os.path.abspath(__file__))

dict_Train = {}
dict_databuf = {}
dict_tagbuf = {}
dict_featbuf = {}


class ModelSVM(ModelBase):
    def __init__(self, toolset='sklearn', sc=None):
        ModelBase.__init__(self)
        self.toolset = toolset
        self.sparker = sc

    def _train_sklearn(self, X, Y):
        clf = svm.SVC(C=4, kernel='linear', shrinking=False, verbose=True)
        clf.fit(X, Y)
e3ec1f74   Chunk   staged.
37
        with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'wb') as modelfile:
e3e7e73a   Chunk   spider standalone...
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
            model = pickle.dump(clf, modelfile)

        self.model = clf

        return clf


    def _predict_sklearn(self, feat, model=None):
        """N.B. sklearn.svm.base.predict :
            Perform classification on samples in X.
                Parameters
                ----------
                X : {array-like, sparse matrix}, shape = [n_samples, n_features]

                Returns
                -------
                y_pred : array, shape = [n_samples]
                    Class labels for samples in X.
        """
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
e3ec1f74   Chunk   staged.
62
                with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'rb') as modelfile:
e3e7e73a   Chunk   spider standalone...
63
64
65
66
67
68
69
70
71
72
                    model = pickle.load(modelfile)

        return model.predict(feat)

    def __test_sklearn(self, X, Y, model=None):
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
e3ec1f74   Chunk   staged.
73
                with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'rb') as modelfile:
e3e7e73a   Chunk   spider standalone...
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
                    model = pickle.load(modelfile)

        result_Y = np.array(self._predict_sklearn(X, model))

        fp = 0
        tp = 0
        sum = np.sum(np.array(Y) == 1)
        positive, negative = np.sum(np.array(Y) == 1), np.sum(np.array(Y) == 0)
        print positive, negative
        for i in range(len(Y)):
            if Y[i] == 0 and result_Y[i] == 1:
                fp += 1
            elif Y[i] == 1 and result_Y[i] == 1:
                tp += 1
        return float(fp) / negative, float(tp) / positive, np.mean(Y == result_Y)

    def _test_sklearn(self, X, Y, model=None):
        if model is None:
            if self.model != None:
                model = self.model
            else:
                print 'loading model ...'
e3ec1f74   Chunk   staged.
96
                with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'rb') as modelfile:
e3e7e73a   Chunk   spider standalone...
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
                    model = pickle.load(modelfile)

        return model.score(X, Y)


    def _train_spark(self, X, Y=None):
        if self.sparker == None:
            self.sparker = SC2.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077')

        self.model = self.sparker.train_svm(X, Y)

        return svm

    def _predict_spark(self, feat, model=None):
        return self.sparker.predict_svm(feat, model)

    def _test_spark(self, X, Y, model=None):
        return self.sparker.test_svm(X, Y, model)


    def train(self, X, Y=None):

        if self.toolset == 'sklearn':
            return self._train_sklearn(X, Y)
        elif self.toolset == 'spark':
            return self._train_spark(X, Y)
        else:
            raise Exception("Unknown toolset!")

    def predict(self, feat, model=None):

        if self.toolset == 'sklearn':
            return self._predict_sklearn(feat, model)
        elif self.toolset == 'spark':
            return self._predict_spark(feat, model)
        else:
            raise Exception("Unknown toolset!")


    def test(self, X, Y=None, model=None):

        if self.toolset == 'sklearn':
            return self.__test_sklearn(X, Y, model)
        elif self.toolset == 'spark':
            return self._test_spark(X, Y, model)
        else:
            raise Exception("Unknown toolset!")