test_spider.py
1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/home/hadoop/.virtualenvs/env1/bin/python
__author__ = 'chunk'
from ..common import *
from ..mdata import CV
from ..mspark import SC, SSC
timer = Timer()
def test_spark_io():
timer.mark()
dcv = CV.DataCV()
X, Y = dcv.load_data(mode='spark')
# X, Y = dcv.load_data()
timer.report()
def test_spark_ml():
sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077')
dcv = CV.DataCV()
X, Y = dcv.load_data()
sparker.train_svm(X, Y)
for path, subdirs, files in os.walk('data/467/'):
for name in files:
imgpath = os.path.join(path, name)
feat = dcv.get_feat(imgpath, 'hog')
print name, sparker.predict_svm(feat)
def test_spark_stream():
# ssparker = SSC.StreamSparker()
# ssparker.set_datasource('192.168.2.118',9999)
# print ssparker.source
# ssparker._word_count
ssparker = SSC.StreamSparker()
ssparker.set_datasource('192.168.2.118',9999)
ssparker._word_count()
if __name__ == '__main__':
test_spark_io()
# test_spark_ml()
print 'helllo'