test_model.py 4.49 KB
Edit Raw Blame History



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148


__author__ = 'chunk'

from sklearn import cross_validation
from pyspark.mllib.regression import LabeledPoint
from ..common import *
from ..mdata import ILSVRC, ILSVRC_S
from ..mmodel.svm import SVM

import gzip
import cPickle


timer = Timer()
package_dir = os.path.dirname(os.path.abspath(__file__))


def test_SVM_ILSVRC():
    timer.mark()
    dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_0.05_orig')
    X, Y = dil.load_data(mode='local')  #
    # X, Y = dil.load_data(mode='hbase') #
    # X, Y = dil.load_data(mode='spark') #
    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0)
    print np.array(Y).shape, np.array(X).shape
    print np.array(X_train).shape, np.array(Y_train).shape
    print np.array(X_test).shape, np.array(Y_test).shape

    timer.report()

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    # msvm = SVM.ModelSVM(toolset='spark')
    msvm.train(X_train, Y_train)
    timer.report()

    timer.mark()
    print msvm.test(X_test, Y_test)  #
    timer.report()  #

    # timer.mark()
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()


def test_SVM_ILSVRC_HBASE():
    timer.mark()

    # dil = ILSVRC.DataILSVRC(base_dir='ILSVRC2013_DET_val', category='Train_3')
    # X, Y = dil.load_data(mode='hbase') # pass

    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Train_1000')
    X, Y = dils.load_data(mode='hbase')  # pass

    # dil = ILSVRC_S.DataILSVRC_S(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val/', category='Train_5000_0.1_orig')
    # X1, Y1 = dil.load_data(mode='local')

    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.4, random_state=0)
    print Y, np.sum(np.array(Y) == 0), np.sum(np.array(Y) == 1)
    print np.array(Y).shape, np.array(X).shape
    print np.array(X_train).shape, np.array(Y_train).shape
    print np.array(X_test).shape, np.array(Y_test).shape

    timer.report()

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    # msvm = SVM.ModelSVM(toolset='spark',sc=dils.sparker)
    msvm.train(X_train, Y_train)
    timer.report()

    timer.mark()
    print msvm.test(X_test, Y_test)  #
    timer.report()  #

    # timer.mark()
    # print msvm.test(X1, Y1)  #(0.048868415782094936, 0.4924709948160948, 0.74568774878372401)
    # timer.report()  #

    # timer.mark()
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()


def test_SVM_ILSVRC_TEST():
    timer.mark()

    dil = ILSVRC_S.DataILSVRC_S(base='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val/', category='Train_5000_0.1_orig')
    X1, Y1 = dil.load_data(mode='local')
    timer.report()

    timer.mark()
    msvm = SVM.ModelSVM(toolset='sklearn')  # 4.884247s 0.777853030816
    timer.report()

    timer.mark()
    print msvm.test(X1, Y1)  # (0.048868415782094936, 0.4924709948160948, 0.74568774878372401)
    timer.report()  #
    # timer.mark()
    # print 'or like this:'
    # scores = cross_validation.cross_val_score(msvm.model, X, Y)
    # print scores
    # timer.report()


def test_SVM_ILSVRC_SPARK():
    timer.mark()
    dils = ILSVRC_S.DataILSVRC_S(base='ILSVRC2013_DET_val', category='Train_5000')
    # rdd_dataset = dils.load_data(mode='spark')  # pass
    X, Y = dils.load_data(mode='hbase')  # pass
    rdd_dataset = dils.sparker.sc.parallelize(zip(Y, X), 30).map(lambda x: LabeledPoint(x[0], x[1]))
    timer.report()

    timer.mark()
    # msvm = SVM.ModelSVM(toolset='sklearn')  #
    # msvm = SVM.ModelSVM(toolset='opencv') #
    # msvm = SVM.ModelSVM(toolset='libsvm') #
    msvm = SVM.ModelSVM(toolset='spark', sc=dils.sparker)
    msvm.train(rdd_dataset)
    timer.report()

    dataset = rdd_dataset.collect()
    length = len(dataset)

    X_test, Y_test = [dataset[i].features for i in range(length)], [dataset[i].label for i in range(length)]

    timer.mark()
    print msvm.test(dils.sparker.sc.parallelize(X_test), Y_test)  #
    timer.report()  #


def test_SVM_ILSVRC_S():
    test_SVM_ILSVRC_HBASE()
    # test_SVM_ILSVRC_SPARK()


if __name__ == '__main__':
    # test_SVM_CV()
    test_SVM_ILSVRC()
    print 'helllo'