e3e7e73a
Chunk
spider standalone...
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
'''
SVM Model.
@author: chunk
chunkplus@gmail.com
2014 Dec
'''
import os, sys
from ...mfeat import *
from ...mmodel import *
from ...mspark import SC2
from ...common import *
import numpy as np
import csv
import json
import pickle
from sklearn import svm
package_dir = os.path.dirname(os.path.abspath(__file__))
dict_Train = {}
dict_databuf = {}
dict_tagbuf = {}
dict_featbuf = {}
class ModelSVM(ModelBase):
def __init__(self, toolset='sklearn', sc=None):
ModelBase.__init__(self)
self.toolset = toolset
self.sparker = sc
def _train_sklearn(self, X, Y):
clf = svm.SVC(C=4, kernel='linear', shrinking=False, verbose=True)
clf.fit(X, Y)
|
e3ec1f74
Chunk
staged.
|
37
|
with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'wb') as modelfile:
|
e3e7e73a
Chunk
spider standalone...
|
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
model = pickle.dump(clf, modelfile)
self.model = clf
return clf
def _predict_sklearn(self, feat, model=None):
"""N.B. sklearn.svm.base.predict :
Perform classification on samples in X.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Returns
-------
y_pred : array, shape = [n_samples]
Class labels for samples in X.
"""
if model is None:
if self.model != None:
model = self.model
else:
print 'loading model ...'
|
e3ec1f74
Chunk
staged.
|
62
|
with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'rb') as modelfile:
|
e3e7e73a
Chunk
spider standalone...
|
63
64
65
66
67
68
69
70
71
72
|
model = pickle.load(modelfile)
return model.predict(feat)
def __test_sklearn(self, X, Y, model=None):
if model is None:
if self.model != None:
model = self.model
else:
print 'loading model ...'
|
e3ec1f74
Chunk
staged.
|
73
|
with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'rb') as modelfile:
|
e3e7e73a
Chunk
spider standalone...
|
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
model = pickle.load(modelfile)
result_Y = np.array(self._predict_sklearn(X, model))
fp = 0
tp = 0
sum = np.sum(np.array(Y) == 1)
positive, negative = np.sum(np.array(Y) == 1), np.sum(np.array(Y) == 0)
print positive, negative
for i in range(len(Y)):
if Y[i] == 0 and result_Y[i] == 1:
fp += 1
elif Y[i] == 1 and result_Y[i] == 1:
tp += 1
return float(fp) / negative, float(tp) / positive, np.mean(Y == result_Y)
def _test_sklearn(self, X, Y, model=None):
if model is None:
if self.model != None:
model = self.model
else:
print 'loading model ...'
|
e3ec1f74
Chunk
staged.
|
96
|
with open(os.path.join(package_dir, '../..', 'res/svm_sklearn.model'), 'rb') as modelfile:
|
e3e7e73a
Chunk
spider standalone...
|
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
model = pickle.load(modelfile)
return model.score(X, Y)
def _train_spark(self, X, Y=None):
if self.sparker == None:
self.sparker = SC2.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077')
self.model = self.sparker.train_svm(X, Y)
return svm
def _predict_spark(self, feat, model=None):
return self.sparker.predict_svm(feat, model)
def _test_spark(self, X, Y, model=None):
return self.sparker.test_svm(X, Y, model)
def train(self, X, Y=None):
if self.toolset == 'sklearn':
return self._train_sklearn(X, Y)
elif self.toolset == 'spark':
return self._train_spark(X, Y)
else:
raise Exception("Unknown toolset!")
def predict(self, feat, model=None):
if self.toolset == 'sklearn':
return self._predict_sklearn(feat, model)
elif self.toolset == 'spark':
return self._predict_spark(feat, model)
else:
raise Exception("Unknown toolset!")
def test(self, X, Y=None, model=None):
if self.toolset == 'sklearn':
return self.__test_sklearn(X, Y, model)
elif self.toolset == 'spark':
return self._test_spark(X, Y, model)
else:
raise Exception("Unknown toolset!")
|