__init__.py 2.44 KB
Edit Raw Blame History



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110


# -*- coding: utf-8 -*-
__author__ = 'chunk'

__all__ = ['DataDumperBase']


class DataDumperBase(object):
    """
    Base class for image data dumping & retrieving.
    A regular directory pattern would be like this:

        dst
        ├── Dev (category)
            ├── file-tag.tsv (list_file)
            │
            ├── Feat (feat_dir)
            │   ├── 0a1
            │   └── 53e
            │   └── ...
            |
            └── Img (img_dir)
                ├── 0a1
                └── 53e
                └── ...
        ├── Train (category)
            ├── file-tag.tsv (list_file)
            │
            ├── Feat
            │   ├── 032
            │   └── a21
            │   └── ...
            |
            └── Img
                ├── 032
                └── a21
                └── ...
        .
        .
        .

    It can be refractored from the original pattern which is supposed to be generated from web crawlers:

        ├── Neg
        │   ├── aaa.jpg
        │   └── bbb.jpg
        │   └── ...
        |
        └── Pos
            ├── ccc.jpg
            └── ddd.jpg
            └── ...


    Convention:

        'im' or 'img' is for image file data while 'image' or 'image_path' for file path;

    """

    def __init__(self, base_dir, category):
        """
        base: e.g. '/home/hadoop/data/MSR-IRC2014/'
        list_file: not data_file! e.g. 'file-tag.tsv'
        dict_data: e.g. {'filename':rawdata} or {'filename':tag}
        """
        self.base = base_dir
        self.category = category
        self.dst_dir = self.base + 'dst/'
        if self.category != None:
            self.dst_dir += (self.category + '/')

        self.list_file = self.dst_dir + 'file-tag.tsv'
        self.feat_dir = self.dst_dir + 'Feat/'
        self.img_dir = self.dst_dir + 'Img/'

        self.table_name = None
        self.table = None
        self.connection = None

    def format(self):
        pass


    def get_table(self):
        pass

    def store_img(self):
        pass

    def store_tag(self, tagtype):
        pass

    def store_feat(self, feattype):
        pass


    def get_feat(self, image, feattype):
        pass

    def extract_feat(self, feattype):
        pass


    def load_data(self, mode):
        pass