Commit 712861f0014b007d5e6bf52ced1d57d2ff644ecc
1 parent
f005aa4a
Exists in
refactor
extract rdd from SC.
Showing
5 changed files
with
309 additions
and
371 deletions
Show diff stats
mdata/ILSVRC.py
| 1 | __author__ = 'chunk' | 1 | __author__ = 'chunk' |
| 2 | 2 | ||
| 3 | from . import * | 3 | from . import * |
| 4 | -from ..mfeat import HOG, IntraBlockDiff | ||
| 5 | -from ..mspark import SC | 4 | +from ..mfeat import IntraBlockDiff |
| 5 | +from ..mspark import rdd, SC | ||
| 6 | from ..common import * | 6 | from ..common import * |
| 7 | 7 | ||
| 8 | import os, sys | 8 | import os, sys |
| @@ -83,11 +83,11 @@ class DataILSVRC(DataDumperBase): | @@ -83,11 +83,11 @@ class DataILSVRC(DataDumperBase): | ||
| 83 | pass | 83 | pass |
| 84 | 84 | ||
| 85 | def get_feat(self, image, feattype='ibd', **kwargs): | 85 | def get_feat(self, image, feattype='ibd', **kwargs): |
| 86 | - size = kwargs.get('size', (48, 48)) | ||
| 87 | - | ||
| 88 | - if feattype == 'hog': | ||
| 89 | - feater = HOG.FeatHOG(size=size) | ||
| 90 | - elif feattype == 'ibd': | 86 | + # size = kwargs.get('size', (48, 48)) |
| 87 | + # | ||
| 88 | + # if feattype == 'hog': | ||
| 89 | + # feater = HOG.FeatHOG(size=size) | ||
| 90 | + if feattype == 'ibd': | ||
| 91 | feater = IntraBlockDiff.FeatIntraBlockDiff() | 91 | feater = IntraBlockDiff.FeatIntraBlockDiff() |
| 92 | else: | 92 | else: |
| 93 | raise Exception("Unknown feature type!") | 93 | raise Exception("Unknown feature type!") |
| @@ -99,9 +99,9 @@ class DataILSVRC(DataDumperBase): | @@ -99,9 +99,9 @@ class DataILSVRC(DataDumperBase): | ||
| 99 | 99 | ||
| 100 | def extract_feat(self, feattype='ibd'): | 100 | def extract_feat(self, feattype='ibd'): |
| 101 | print "extracting feat..." | 101 | print "extracting feat..." |
| 102 | - if feattype == 'hog': | ||
| 103 | - feater = HOG.FeatHOG(size=(48, 48)) | ||
| 104 | - elif feattype == 'ibd': | 102 | + # if feattype == 'hog': |
| 103 | + # feater = HOG.FeatHOG(size=(48, 48)) | ||
| 104 | + if feattype == 'ibd': | ||
| 105 | feater = IntraBlockDiff.FeatIntraBlockDiff() | 105 | feater = IntraBlockDiff.FeatIntraBlockDiff() |
| 106 | else: | 106 | else: |
| 107 | raise Exception("Unknown feature type!") | 107 | raise Exception("Unknown feature type!") |
| @@ -307,7 +307,7 @@ class DataILSVRC(DataDumperBase): | @@ -307,7 +307,7 @@ class DataILSVRC(DataDumperBase): | ||
| 307 | # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) | 307 | # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) |
| 308 | # except Exception as e: | 308 | # except Exception as e: |
| 309 | # print '[EXCPT]', e | 309 | # print '[EXCPT]', e |
| 310 | - # pass | 310 | + # pass |
| 311 | 311 | ||
| 312 | 312 | ||
| 313 | def get_table(self): | 313 | def get_table(self): |
| @@ -322,10 +322,10 @@ class DataILSVRC(DataDumperBase): | @@ -322,10 +322,10 @@ class DataILSVRC(DataDumperBase): | ||
| 322 | tables = self.connection.tables() | 322 | tables = self.connection.tables() |
| 323 | if self.table_name not in tables: | 323 | if self.table_name not in tables: |
| 324 | families_compressed = {'cf_pic': dict(compression='LZO'), | 324 | families_compressed = {'cf_pic': dict(compression='LZO'), |
| 325 | - 'cf_info': dict(max_versions=10,compression='LZO'), | ||
| 326 | - 'cf_tag': dict(compression='LZO'), | ||
| 327 | - 'cf_feat': dict(compression='LZO'), | ||
| 328 | - } | 325 | + 'cf_info': dict(max_versions=10, compression='LZO'), |
| 326 | + 'cf_tag': dict(compression='LZO'), | ||
| 327 | + 'cf_feat': dict(compression='LZO'), | ||
| 328 | + } | ||
| 329 | families = {'cf_pic': dict(), | 329 | families = {'cf_pic': dict(), |
| 330 | 'cf_info': dict(max_versions=10), | 330 | 'cf_info': dict(max_versions=10), |
| 331 | 'cf_tag': dict(), | 331 | 'cf_tag': dict(), |
mdata/ILSVRC_S.py
| 1 | __author__ = 'chunk' | 1 | __author__ = 'chunk' |
| 2 | 2 | ||
| 3 | from . import * | 3 | from . import * |
| 4 | -from ..mfeat import HOG, IntraBlockDiff | ||
| 5 | -from ..mspark import SC | 4 | +from ..mfeat import IntraBlockDiff |
| 5 | +from ..mspark import rdd, SC | ||
| 6 | from pyspark.mllib.regression import LabeledPoint | 6 | from pyspark.mllib.regression import LabeledPoint |
| 7 | from ..common import * | 7 | from ..common import * |
| 8 | 8 | ||
| @@ -135,11 +135,11 @@ class DataILSVRC_S(DataDumperBase): | @@ -135,11 +135,11 @@ class DataILSVRC_S(DataDumperBase): | ||
| 135 | tmpf.close() | 135 | tmpf.close() |
| 136 | 136 | ||
| 137 | def _get_feat(self, image, feattype='ibd', **kwargs): | 137 | def _get_feat(self, image, feattype='ibd', **kwargs): |
| 138 | - size = kwargs.get('size', (48, 48)) | ||
| 139 | - | ||
| 140 | - if feattype == 'hog': | ||
| 141 | - feater = HOG.FeatHOG(size=size) | ||
| 142 | - elif feattype == 'ibd': | 138 | + # size = kwargs.get('size', (48, 48)) |
| 139 | + # | ||
| 140 | + # if feattype == 'hog': | ||
| 141 | + # feater = HOG.FeatHOG(size=size) | ||
| 142 | + if feattype == 'ibd': | ||
| 143 | feater = IntraBlockDiff.FeatIntraBlockDiff() | 143 | feater = IntraBlockDiff.FeatIntraBlockDiff() |
| 144 | else: | 144 | else: |
| 145 | raise Exception("Unknown feature type!") | 145 | raise Exception("Unknown feature type!") |
| @@ -267,16 +267,16 @@ class DataILSVRC_S(DataDumperBase): | @@ -267,16 +267,16 @@ class DataILSVRC_S(DataDumperBase): | ||
| 267 | ] | 267 | ] |
| 268 | 268 | ||
| 269 | # # Debug | 269 | # # Debug |
| 270 | - # tmp_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, | 270 | + # tmp_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_data_ILS, |
| 271 | # collect=False) | 271 | # collect=False) |
| 272 | - # # tmp_data = tmp_data.mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) | 272 | + # # tmp_data = tmp_data.mapValues(lambda data: [data] + rdd.rddinfo_ILS(data)) |
| 273 | # print tmp_data.collect()[0][1] | 273 | # print tmp_data.collect()[0][1] |
| 274 | # return | 274 | # return |
| 275 | 275 | ||
| 276 | 276 | ||
| 277 | - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, | 277 | + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_data_ILS, |
| 278 | collect=False).mapValues( | 278 | collect=False).mapValues( |
| 279 | - lambda data: [data] + SC.rddinfo_ILS(data)) | 279 | + lambda data: [data] + rdd.rddinfo_ILS(data)) |
| 280 | 280 | ||
| 281 | if not writeback: | 281 | if not writeback: |
| 282 | return self.rdd_data | 282 | return self.rdd_data |
| @@ -293,14 +293,14 @@ class DataILSVRC_S(DataDumperBase): | @@ -293,14 +293,14 @@ class DataILSVRC_S(DataDumperBase): | ||
| 293 | ] | 293 | ] |
| 294 | 294 | ||
| 295 | # # Debug | 295 | # # Debug |
| 296 | - # tmp_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, | 296 | + # tmp_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_data_ILS, |
| 297 | # collect=False) | 297 | # collect=False) |
| 298 | - # # tmp_data = tmp_data.mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) | 298 | + # # tmp_data = tmp_data.mapValues(lambda data: [data] + rdd.rddinfo_ILS(data)) |
| 299 | # print tmp_data.collect()[0][1] | 299 | # print tmp_data.collect()[0][1] |
| 300 | # return | 300 | # return |
| 301 | 301 | ||
| 302 | 302 | ||
| 303 | - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_data_ILS, | 303 | + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_data_ILS, |
| 304 | collect=False).mapValues( | 304 | collect=False).mapValues( |
| 305 | lambda data: [data]) | 305 | lambda data: [data]) |
| 306 | 306 | ||
| @@ -417,12 +417,12 @@ class DataILSVRC_S(DataDumperBase): | @@ -417,12 +417,12 @@ class DataILSVRC_S(DataDumperBase): | ||
| 417 | ] | 417 | ] |
| 418 | 418 | ||
| 419 | if readforward: | 419 | if readforward: |
| 420 | - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_all_ILS, collect=False) | 420 | + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_all_ILS, collect=False) |
| 421 | 421 | ||
| 422 | - # rdd_data_ext = self.rdd_data.map(lambda x: SC.rddembed_ILS(x, rate=rate)).filter(lambda x: x != None) | 422 | + # rdd_data_ext = self.rdd_data.map(lambda x: rdd.rddembed_ILS(x, rate=rate)).filter(lambda x: x != None) |
| 423 | # self.rdd_data = self.rdd_data.union(rdd_data_ext) | 423 | # self.rdd_data = self.rdd_data.union(rdd_data_ext) |
| 424 | 424 | ||
| 425 | - self.rdd_data = self.rdd_data.flatMap(lambda x: SC.rddembed_ILS_EXT(x, rate=rate)) | 425 | + self.rdd_data = self.rdd_data.flatMap(lambda x: rdd.rddembed_ILS_EXT(x, rate=rate)) |
| 426 | if not writeback: | 426 | if not writeback: |
| 427 | return self.rdd_data | 427 | return self.rdd_data |
| 428 | else: | 428 | else: |
| @@ -513,9 +513,9 @@ class DataILSVRC_S(DataDumperBase): | @@ -513,9 +513,9 @@ class DataILSVRC_S(DataDumperBase): | ||
| 513 | ] | 513 | ] |
| 514 | 514 | ||
| 515 | if readforward: | 515 | if readforward: |
| 516 | - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_all_ILS, collect=False) | 516 | + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_all_ILS, collect=False) |
| 517 | 517 | ||
| 518 | - self.rdd_data = self.rdd_data.mapValues(lambda items: SC.rddfeat_ILS(items, feattype)) | 518 | + self.rdd_data = self.rdd_data.mapValues(lambda items: rdd.rddfeat_ILS(items, feattype)) |
| 519 | 519 | ||
| 520 | # print self.rdd_data.collect()[0] | 520 | # print self.rdd_data.collect()[0] |
| 521 | # return | 521 | # return |
| @@ -541,9 +541,9 @@ class DataILSVRC_S(DataDumperBase): | @@ -541,9 +541,9 @@ class DataILSVRC_S(DataDumperBase): | ||
| 541 | ] | 541 | ] |
| 542 | 542 | ||
| 543 | if readforward: | 543 | if readforward: |
| 544 | - self.rdd_data = self.sparker.read_hbase(self.table_name, func=SC.rddparse_all_ILS, collect=False) | 544 | + self.rdd_data = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_all_ILS, collect=False) |
| 545 | 545 | ||
| 546 | - self.rdd_data = self.rdd_data.mapValues(lambda items: SC.rddanalysis_ILS(items)) | 546 | + self.rdd_data = self.rdd_data.mapValues(lambda items: rdd.rddanalysis_ILS(items)) |
| 547 | 547 | ||
| 548 | # print self.rdd_data.collect()[0] | 548 | # print self.rdd_data.collect()[0] |
| 549 | # return | 549 | # return |
| @@ -621,7 +621,7 @@ class DataILSVRC_S(DataDumperBase): | @@ -621,7 +621,7 @@ class DataILSVRC_S(DataDumperBase): | ||
| 621 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', | 621 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageILSVRC-S', |
| 622 | master='spark://HPC-server:7077') | 622 | master='spark://HPC-server:7077') |
| 623 | 623 | ||
| 624 | - rdd_dataset = self.sparker.read_hbase(self.table_name, func=SC.rddparse_dataset_ILS, collect=False) | 624 | + rdd_dataset = self.sparker.read_hbase(self.table_name, func=rdd.rddparse_dataset_ILS, collect=False) |
| 625 | if not collect: | 625 | if not collect: |
| 626 | rdd_dataset = rdd_dataset.map(lambda x: LabeledPoint(x[0], x[1])) | 626 | rdd_dataset = rdd_dataset.map(lambda x: LabeledPoint(x[0], x[1])) |
| 627 | return rdd_dataset | 627 | return rdd_dataset |
mmodel/svm/SVM.py
| @@ -9,7 +9,7 @@ import os, sys | @@ -9,7 +9,7 @@ import os, sys | ||
| 9 | from ...mfeat import * | 9 | from ...mfeat import * |
| 10 | from ...mmodel import * | 10 | from ...mmodel import * |
| 11 | from ...mmodel.svm.svmutil import * | 11 | from ...mmodel.svm.svmutil import * |
| 12 | -from ...mspark import SC2 | 12 | +from ...mspark import SC |
| 13 | from ...common import * | 13 | from ...common import * |
| 14 | 14 | ||
| 15 | import numpy as np | 15 | import numpy as np |
| @@ -191,7 +191,7 @@ class ModelSVM(ModelBase): | @@ -191,7 +191,7 @@ class ModelSVM(ModelBase): | ||
| 191 | 191 | ||
| 192 | def _train_spark(self, X, Y=None): | 192 | def _train_spark(self, X, Y=None): |
| 193 | if self.sparker == None: | 193 | if self.sparker == None: |
| 194 | - self.sparker = SC2.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') | 194 | + self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') |
| 195 | 195 | ||
| 196 | self.model = self.sparker.train_svm(X, Y) | 196 | self.model = self.sparker.train_svm(X, Y) |
| 197 | 197 |
mspark/SC.py
| @@ -4,349 +4,20 @@ __author__ = 'chunk' | @@ -4,349 +4,20 @@ __author__ = 'chunk' | ||
| 4 | from ..common import * | 4 | from ..common import * |
| 5 | from .dependencies import * | 5 | from .dependencies import * |
| 6 | from . import * | 6 | from . import * |
| 7 | -# from ..mdata import MSR, CV, ILSVRC, ILSVRC_S | ||
| 8 | - | ||
| 9 | -from ..mjpeg import * | ||
| 10 | -from ..msteg import * | ||
| 11 | -from ..msteg.steganography import LSB, F3, F4, F5 | ||
| 12 | -from ..mfeat import IntraBlockDiff | ||
| 13 | -from ..mmodel.svm import SVM2 | 7 | +from .rdd import * |
| 14 | 8 | ||
| 15 | import sys | 9 | import sys |
| 16 | from pyspark import RDD | 10 | from pyspark import RDD |
| 17 | from pyspark import SparkConf, SparkContext | 11 | from pyspark import SparkConf, SparkContext |
| 18 | from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD | 12 | from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD |
| 19 | from pyspark.mllib.regression import LabeledPoint | 13 | from pyspark.mllib.regression import LabeledPoint |
| 20 | -from numpy import array | ||
| 21 | -import json | ||
| 22 | -import pickle | ||
| 23 | -import tempfile | 14 | + |
| 24 | 15 | ||
| 25 | import numpy as np | 16 | import numpy as np |
| 26 | -from scipy import stats | ||
| 27 | -from hashlib import md5 | 17 | + |
| 28 | 18 | ||
| 29 | np.random.seed(sum(map(ord, "whoami"))) | 19 | np.random.seed(sum(map(ord, "whoami"))) |
| 30 | package_dir = os.path.dirname(os.path.abspath(__file__)) | 20 | package_dir = os.path.dirname(os.path.abspath(__file__)) |
| 31 | -classifier = SVM2.ModelSVM(toolset='sklearn') | ||
| 32 | - | ||
| 33 | - | ||
| 34 | -def rddparse_data_CV(raw_row): | ||
| 35 | - """ | ||
| 36 | - input: (u'key0',u'cf_feat:hog:[0.056273,...]--%--cf_pic:data:\ufffd\ufffd\...--%--cf_tag:hog:True') | ||
| 37 | - return: ([0.056273,...],1) | ||
| 38 | - """ | ||
| 39 | - data = raw_row[1].split('--%--') | ||
| 40 | - feat = json.loads(data[0].split(':')[-1]) | ||
| 41 | - tag = 1 if data[-1].split(':')[-1] == 'True' else 0 | ||
| 42 | - return (feat, tag) | ||
| 43 | - | ||
| 44 | - | ||
| 45 | -def rddparse_data_ILS(raw_row): | ||
| 46 | - """ | ||
| 47 | - input: (u'key0',u'cf_feat:hog:[0.056273,...]--%--cf_pic:data:\ufffd\ufffd\...--%--cf_tag:hog:True') | ||
| 48 | - return: ([0.056273,...],1) | ||
| 49 | - | ||
| 50 | - In fact we can also use mapValues. | ||
| 51 | - """ | ||
| 52 | - key = raw_row[0] | ||
| 53 | - # if key == '04650c488a2b163ca8a1f52da6022f03.jpg': | ||
| 54 | - # with open('/tmp/hhhh','wb') as f: | ||
| 55 | - # f.write(raw_row[1].decode('unicode-escape')).encode('latin-1') | ||
| 56 | - items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') | ||
| 57 | - data = items[0].split('cf_pic:data:')[-1] | ||
| 58 | - return (key, data) | ||
| 59 | - | ||
| 60 | - | ||
| 61 | -def rddparse_all_ILS(raw_row): | ||
| 62 | - """ | ||
| 63 | - Deprecated | ||
| 64 | - """ | ||
| 65 | - key = raw_row[0] | ||
| 66 | - items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') | ||
| 67 | - | ||
| 68 | - # @TODO | ||
| 69 | - # N.B "ValueError: No JSON object could be decoded" Because the spark-hbase IO is based on strings. | ||
| 70 | - # And the order of items is not as expected. See ../res/row-sample.txt or check in hbase shell for that. | ||
| 71 | - | ||
| 72 | - data = [items[0].split('cf_pic:data:')[-1]] + [json.loads(item.split(':')[-1]) for item in | ||
| 73 | - items[1:]] | ||
| 74 | - | ||
| 75 | - return (key, data) | ||
| 76 | - | ||
| 77 | - | ||
| 78 | -def rddparse_dataset_ILS(raw_row): | ||
| 79 | - if raw_row[0] == '04650c488a2b163ca8a1f52da6022f03.jpg': | ||
| 80 | - print raw_row | ||
| 81 | - items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') | ||
| 82 | - # tag = int(items[-2].split('cf_tag:' + tagtype)[-1]) | ||
| 83 | - # feat = [item for sublist in json.loads(items[-1].split('cf_feat:' + feattype)[-1]) for subsublist in sublist for item in subsublist] | ||
| 84 | - tag = int(items[-1].split(':')[-1]) | ||
| 85 | - feat = [item for sublist in json.loads(items[0].split(':')[-1]) for subsublist in sublist for | ||
| 86 | - item in subsublist] | ||
| 87 | - | ||
| 88 | - return (tag, feat) | ||
| 89 | - | ||
| 90 | - | ||
| 91 | -def rddinfo_ILS(img, info_rate=None, tag_chosen=None, tag_class=None): | ||
| 92 | - """ | ||
| 93 | - Tempfile is our friend. (?) | ||
| 94 | - """ | ||
| 95 | - info_rate = info_rate if info_rate != None else 0.0 | ||
| 96 | - tag_chosen = tag_chosen if tag_chosen != None else stats.bernoulli.rvs(0.8) | ||
| 97 | - tag_class = tag_class if tag_class != None else 0 | ||
| 98 | - try: | ||
| 99 | - tmpf = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b', delete=True) | ||
| 100 | - tmpf.write(img) | ||
| 101 | - tmpf.seek(0) | ||
| 102 | - im = Jpeg(tmpf.name, key=sample_key) | ||
| 103 | - info = [ | ||
| 104 | - im.image_width, | ||
| 105 | - im.image_height, | ||
| 106 | - im.image_width * im.image_height, | ||
| 107 | - im.getCapacity(), | ||
| 108 | - im.getQuality(), | ||
| 109 | - info_rate, | ||
| 110 | - tag_chosen, | ||
| 111 | - tag_class | ||
| 112 | - ] | ||
| 113 | - return info | ||
| 114 | - except Exception as e: | ||
| 115 | - print e | ||
| 116 | - raise | ||
| 117 | - finally: | ||
| 118 | - tmpf.close() | ||
| 119 | - | ||
| 120 | - | ||
| 121 | -def rddembed_ILS(row, rate=None): | ||
| 122 | - """ | ||
| 123 | - input: | ||
| 124 | - e.g. row =('row1',[1,3400,'hello']) | ||
| 125 | - return: | ||
| 126 | - newrow = ('row2',[34,5400,'embeded']) | ||
| 127 | - """ | ||
| 128 | - items = row[1] | ||
| 129 | - capacity, chosen = int(items[4]), int(items[7]) | ||
| 130 | - if chosen == 0: | ||
| 131 | - return None | ||
| 132 | - try: | ||
| 133 | - tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 134 | - tmpf_src.write(items[0]) | ||
| 135 | - tmpf_src.seek(0) | ||
| 136 | - tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 137 | - | ||
| 138 | - steger = F5.F5(sample_key, 1) | ||
| 139 | - | ||
| 140 | - if rate == None: | ||
| 141 | - embed_rate = steger.embed_raw_data(tmpf_src.name, | ||
| 142 | - os.path.join(package_dir, '../res/toembed'), | ||
| 143 | - tmpf_dst.name) | ||
| 144 | - else: | ||
| 145 | - assert (rate >= 0 and rate < 1) | ||
| 146 | - # print capacity | ||
| 147 | - hidden = np.random.bytes(int(int(capacity) * rate) / 8) | ||
| 148 | - embed_rate = steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True) | ||
| 149 | - | ||
| 150 | - tmpf_dst.seek(0) | ||
| 151 | - raw = tmpf_dst.read() | ||
| 152 | - index = md5(raw).hexdigest() | ||
| 153 | - | ||
| 154 | - return (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1)) | ||
| 155 | - | ||
| 156 | - except Exception as e: | ||
| 157 | - print e | ||
| 158 | - raise | ||
| 159 | - finally: | ||
| 160 | - tmpf_src.close() | ||
| 161 | - tmpf_dst.close() | ||
| 162 | - | ||
| 163 | - | ||
| 164 | -def rddembed_ILS_EXT(row, rate=None): | ||
| 165 | - """ | ||
| 166 | - input: | ||
| 167 | - e.g. row =('row1',[1,3400,'hello']) | ||
| 168 | - return: | ||
| 169 | - newrow = ('row2',[34,5400,'embeded']) or NULL | ||
| 170 | - [row,newrow] | ||
| 171 | - """ | ||
| 172 | - items = row[1] | ||
| 173 | - capacity, chosen = int(items[4]), int(items[7]) | ||
| 174 | - if chosen == 0: | ||
| 175 | - return [row] | ||
| 176 | - try: | ||
| 177 | - tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 178 | - tmpf_src.write(items[0]) | ||
| 179 | - tmpf_src.seek(0) | ||
| 180 | - tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 181 | - | ||
| 182 | - steger = F5.F5(sample_key, 2) | ||
| 183 | - | ||
| 184 | - if rate == None: | ||
| 185 | - embed_rate = steger.embed_raw_data(tmpf_src.name, | ||
| 186 | - os.path.join(package_dir, '../res/toembed'), | ||
| 187 | - tmpf_dst.name) | ||
| 188 | - else: | ||
| 189 | - assert (rate >= 0 and rate < 1) | ||
| 190 | - # print capacity | ||
| 191 | - hidden = np.random.bytes(int(int(capacity) * rate) / 8) | ||
| 192 | - embed_rate = steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True) | ||
| 193 | - | ||
| 194 | - tmpf_dst.seek(0) | ||
| 195 | - raw = tmpf_dst.read() | ||
| 196 | - index = md5(raw).hexdigest() | ||
| 197 | - | ||
| 198 | - return [row, (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1))] | ||
| 199 | - | ||
| 200 | - except Exception as e: | ||
| 201 | - print e | ||
| 202 | - raise | ||
| 203 | - finally: | ||
| 204 | - tmpf_src.close() | ||
| 205 | - tmpf_dst.close() | ||
| 206 | - | ||
| 207 | - | ||
| 208 | -def _get_feat(image, feattype='ibd', **kwargs): | ||
| 209 | - if feattype == 'ibd': | ||
| 210 | - feater = IntraBlockDiff.FeatIntraBlockDiff() | ||
| 211 | - else: | ||
| 212 | - raise Exception("Unknown feature type!") | ||
| 213 | - | ||
| 214 | - desc = feater.feat(image) | ||
| 215 | - | ||
| 216 | - return desc | ||
| 217 | - | ||
| 218 | - | ||
| 219 | -def rddfeat_ILS(items, feattype='ibd', **kwargs): | ||
| 220 | - try: | ||
| 221 | - tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 222 | - tmpf_src.write(items[0]) | ||
| 223 | - tmpf_src.seek(0) | ||
| 224 | - | ||
| 225 | - desc = json.dumps(_get_feat(tmpf_src.name, feattype=feattype).tolist()) | ||
| 226 | - # print 'desccccccccccccccccccc',desc | ||
| 227 | - return items + [desc] | ||
| 228 | - | ||
| 229 | - except Exception as e: | ||
| 230 | - print e | ||
| 231 | - raise | ||
| 232 | - finally: | ||
| 233 | - tmpf_src.close() | ||
| 234 | - | ||
| 235 | - | ||
| 236 | -def rddanalysis_ILS(items, feattype='ibd', **kwargs): | ||
| 237 | - head = np.fromstring(items[0][:2], dtype=np.uint8) | ||
| 238 | - if not np.array_equal(head, [255, 216]): | ||
| 239 | - return items + [0] | ||
| 240 | - try: | ||
| 241 | - tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 242 | - tmpf_src.write(items[0]) | ||
| 243 | - tmpf_src.seek(0) | ||
| 244 | - | ||
| 245 | - desc = _get_feat(tmpf_src.name, feattype=feattype) | ||
| 246 | - tag = classifier.predict(desc.ravel())[0] | ||
| 247 | - # print 'desccccccccccccccccccc',desc | ||
| 248 | - return items + [tag] | ||
| 249 | - | ||
| 250 | - except Exception as e: | ||
| 251 | - print e | ||
| 252 | - raise | ||
| 253 | - finally: | ||
| 254 | - tmpf_src.close() | ||
| 255 | - | ||
| 256 | - # return items + classifier.predict(items[-1]) | ||
| 257 | - | ||
| 258 | - | ||
| 259 | -def format_out(row, cols, withdata=False): | ||
| 260 | - """ | ||
| 261 | - input: | ||
| 262 | - e.g. row =('row1',[1,3400,'hello']) | ||
| 263 | - cols = [['cf_info', 'id'], ['cf_info', 'size'], ['cf_tag', 'desc']] | ||
| 264 | - return: | ||
| 265 | - [('row1',['row1', 'cf_info', 'id', '1']),('row1',['row1', 'cf_info', 'size', '3400']),('row1',['row1', 'cf_tag', 'desc', 'hello'])] | ||
| 266 | - """ | ||
| 267 | - puts = [] | ||
| 268 | - key = row[0] | ||
| 269 | - # if key == '04650c488a2b163ca8a1f52da6022f03.jpg': | ||
| 270 | - # print row | ||
| 271 | - if not withdata: | ||
| 272 | - for data, col in zip(row[1][1:], cols[1:]): | ||
| 273 | - puts.append((key, [key] + col + [str(data)])) | ||
| 274 | - else: | ||
| 275 | - for data, col in zip(row[1], cols): | ||
| 276 | - puts.append((key, [key] + col + [str(data)])) | ||
| 277 | - return puts | ||
| 278 | - | ||
| 279 | - | ||
| 280 | -# scconf = SparkConf() | ||
| 281 | -# scconf.setSparkHome("HPC-server") \ | ||
| 282 | -# .setMaster("spark://HPC-server:7077") \ | ||
| 283 | -# .setAppName("example") | ||
| 284 | -# sc = SparkContext(conf=scconf) | ||
| 285 | -# | ||
| 286 | -# | ||
| 287 | -# def read_hbase(table_name, func=None, collect=False): | ||
| 288 | -# """ | ||
| 289 | -# ref - http://happybase.readthedocs.org/en/latest/user.html#retrieving-data | ||
| 290 | -# | ||
| 291 | -# Filter format: | ||
| 292 | -# columns=['cf1:col1', 'cf1:col2'] | ||
| 293 | -# or | ||
| 294 | -# columns=['cf1'] | ||
| 295 | -# | ||
| 296 | -# """ | ||
| 297 | -# | ||
| 298 | -# hconf = { | ||
| 299 | -# "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", | ||
| 300 | -# # "hbase.zookeeper.quorum": self.host, | ||
| 301 | -# "hbase.mapreduce.inputtable": table_name, | ||
| 302 | -# } | ||
| 303 | -# | ||
| 304 | -# hbase_rdd = sc.newAPIHadoopRDD(inputFormatClass=hparams["inputFormatClass"], | ||
| 305 | -# keyClass=hparams["readKeyClass"], | ||
| 306 | -# valueClass=hparams["readValueClass"], | ||
| 307 | -# keyConverter=hparams["readKeyConverter"], | ||
| 308 | -# valueConverter=hparams["readValueConverter"], | ||
| 309 | -# conf=hconf) | ||
| 310 | -# | ||
| 311 | -# parser = func if func != None else rddparse_data_CV | ||
| 312 | -# hbase_rdd = hbase_rdd.map(lambda x: parser(x)) | ||
| 313 | -# | ||
| 314 | -# if collect: | ||
| 315 | -# return hbase_rdd.collect() | ||
| 316 | -# else: | ||
| 317 | -# return hbase_rdd | ||
| 318 | -# | ||
| 319 | -# | ||
| 320 | -# def write_hbase(table_name, data, fromrdd=False, columns=None, withdata=False): | ||
| 321 | -# """ | ||
| 322 | -# Data Format: (Deprecated) | ||
| 323 | -# e.g. [["row8", "f1", "", "caocao cao"], ["row9", "f1", "c1", "asdfg hhhh"]] | ||
| 324 | -# | ||
| 325 | -# Data(from dictionary): | ||
| 326 | -# e.g. data ={'row1':[1,3400,'hello'], 'row2':[34,5000,'here in mine']}, | ||
| 327 | -# cols = ['cf_info:id', 'cf_info:size', 'cf_tag:desc'] | ||
| 328 | -# Data(from Rdd): | ||
| 329 | -# e.g. data =[('row1',[1,3400,'hello']), ('row2',[34,5000,'here in mine'])], | ||
| 330 | -# cols = ['cf_info:id', 'cf_info:size', 'cf_tag:desc'] | ||
| 331 | -# """ | ||
| 332 | -# hconf = { | ||
| 333 | -# "hbase.zookeeper.quorum": "HPC-server, HPC, HPC2", # "hbase.zookeeper.quorum": self.host, | ||
| 334 | -# "hbase.mapreduce.inputtable": table_name, | ||
| 335 | -# "hbase.mapred.outputtable": table_name, | ||
| 336 | -# "mapreduce.outputformat.class": hparams["outputFormatClass"], | ||
| 337 | -# "mapreduce.job.output.key.class": hparams["writeKeyClass"], | ||
| 338 | -# "mapreduce.job.output.value.class": hparams["writeValueClass"], | ||
| 339 | -# } | ||
| 340 | -# cols = [col.split(':') for col in columns] | ||
| 341 | -# if not fromrdd: | ||
| 342 | -# rdd_data = sc.parallelize(data) | ||
| 343 | -# else: | ||
| 344 | -# rdd_data = data | ||
| 345 | -# | ||
| 346 | -# rdd_data.flatMap(lambda x: format_out(x, cols, withdata=withdata)).saveAsNewAPIHadoopDataset( | ||
| 347 | -# conf=hconf, | ||
| 348 | -# keyConverter=hparams["writeKeyConverter"], | ||
| 349 | -# valueConverter=hparams["writeValueConverter"]) | ||
| 350 | 21 | ||
| 351 | 22 | ||
| 352 | class Sparker(object): | 23 | class Sparker(object): |
| @@ -0,0 +1,267 @@ | @@ -0,0 +1,267 @@ | ||
| 1 | +__author__ = 'hadoop' | ||
| 2 | + | ||
| 3 | +from ..common import * | ||
| 4 | + | ||
| 5 | +from ..mjpeg import * | ||
| 6 | +from ..msteg import * | ||
| 7 | +from ..msteg.steganography import LSB, F3, F4, F5 | ||
| 8 | +from ..mfeat import IntraBlockDiff | ||
| 9 | +from ..mmodel.svm import SVM | ||
| 10 | + | ||
| 11 | +from numpy import array | ||
| 12 | +import json | ||
| 13 | +import pickle | ||
| 14 | +import tempfile | ||
| 15 | + | ||
| 16 | +import numpy as np | ||
| 17 | +from scipy import stats | ||
| 18 | +from hashlib import md5 | ||
| 19 | + | ||
| 20 | +np.random.seed(sum(map(ord, "whoami"))) | ||
| 21 | +package_dir = os.path.dirname(os.path.abspath(__file__)) | ||
| 22 | +classifier = SVM.ModelSVM(toolset='sklearn') | ||
| 23 | + | ||
| 24 | +def rddparse_data_CV(raw_row): | ||
| 25 | + """ | ||
| 26 | + input: (u'key0',u'cf_feat:hog:[0.056273,...]--%--cf_pic:data:\ufffd\ufffd\...--%--cf_tag:hog:True') | ||
| 27 | + return: ([0.056273,...],1) | ||
| 28 | + """ | ||
| 29 | + data = raw_row[1].split('--%--') | ||
| 30 | + feat = json.loads(data[0].split(':')[-1]) | ||
| 31 | + tag = 1 if data[-1].split(':')[-1] == 'True' else 0 | ||
| 32 | + return (feat, tag) | ||
| 33 | + | ||
| 34 | + | ||
| 35 | +def rddparse_data_ILS(raw_row): | ||
| 36 | + """ | ||
| 37 | + input: (u'key0',u'cf_feat:hog:[0.056273,...]--%--cf_pic:data:\ufffd\ufffd\...--%--cf_tag:hog:True') | ||
| 38 | + return: ([0.056273,...],1) | ||
| 39 | + | ||
| 40 | + In fact we can also use mapValues. | ||
| 41 | + """ | ||
| 42 | + key = raw_row[0] | ||
| 43 | + # if key == '04650c488a2b163ca8a1f52da6022f03.jpg': | ||
| 44 | + # with open('/tmp/hhhh','wb') as f: | ||
| 45 | + # f.write(raw_row[1].decode('unicode-escape')).encode('latin-1') | ||
| 46 | + items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') | ||
| 47 | + data = items[0].split('cf_pic:data:')[-1] | ||
| 48 | + return (key, data) | ||
| 49 | + | ||
| 50 | + | ||
| 51 | +def rddparse_all_ILS(raw_row): | ||
| 52 | + """ | ||
| 53 | + Deprecated | ||
| 54 | + """ | ||
| 55 | + key = raw_row[0] | ||
| 56 | + items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') | ||
| 57 | + | ||
| 58 | + # @TODO | ||
| 59 | + # N.B "ValueError: No JSON object could be decoded" Because the spark-hbase IO is based on strings. | ||
| 60 | + # And the order of items is not as expected. See ../res/row-sample.txt or check in hbase shell for that. | ||
| 61 | + | ||
| 62 | + data = [items[0].split('cf_pic:data:')[-1]] + [json.loads(item.split(':')[-1]) for item in | ||
| 63 | + items[1:]] | ||
| 64 | + | ||
| 65 | + return (key, data) | ||
| 66 | + | ||
| 67 | + | ||
| 68 | +def rddparse_dataset_ILS(raw_row): | ||
| 69 | + if raw_row[0] == '04650c488a2b163ca8a1f52da6022f03.jpg': | ||
| 70 | + print raw_row | ||
| 71 | + items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') | ||
| 72 | + # tag = int(items[-2].split('cf_tag:' + tagtype)[-1]) | ||
| 73 | + # feat = [item for sublist in json.loads(items[-1].split('cf_feat:' + feattype)[-1]) for subsublist in sublist for item in subsublist] | ||
| 74 | + tag = int(items[-1].split(':')[-1]) | ||
| 75 | + feat = [item for sublist in json.loads(items[0].split(':')[-1]) for subsublist in sublist for | ||
| 76 | + item in subsublist] | ||
| 77 | + | ||
| 78 | + return (tag, feat) | ||
| 79 | + | ||
| 80 | + | ||
| 81 | +def rddinfo_ILS(img, info_rate=None, tag_chosen=None, tag_class=None): | ||
| 82 | + """ | ||
| 83 | + Tempfile is our friend. (?) | ||
| 84 | + """ | ||
| 85 | + info_rate = info_rate if info_rate != None else 0.0 | ||
| 86 | + tag_chosen = tag_chosen if tag_chosen != None else stats.bernoulli.rvs(0.8) | ||
| 87 | + tag_class = tag_class if tag_class != None else 0 | ||
| 88 | + try: | ||
| 89 | + tmpf = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b', delete=True) | ||
| 90 | + tmpf.write(img) | ||
| 91 | + tmpf.seek(0) | ||
| 92 | + im = Jpeg(tmpf.name, key=sample_key) | ||
| 93 | + info = [ | ||
| 94 | + im.image_width, | ||
| 95 | + im.image_height, | ||
| 96 | + im.image_width * im.image_height, | ||
| 97 | + im.getCapacity(), | ||
| 98 | + im.getQuality(), | ||
| 99 | + info_rate, | ||
| 100 | + tag_chosen, | ||
| 101 | + tag_class | ||
| 102 | + ] | ||
| 103 | + return info | ||
| 104 | + except Exception as e: | ||
| 105 | + print e | ||
| 106 | + raise | ||
| 107 | + finally: | ||
| 108 | + tmpf.close() | ||
| 109 | + | ||
| 110 | + | ||
| 111 | +def rddembed_ILS(row, rate=None): | ||
| 112 | + """ | ||
| 113 | + input: | ||
| 114 | + e.g. row =('row1',[1,3400,'hello']) | ||
| 115 | + return: | ||
| 116 | + newrow = ('row2',[34,5400,'embeded']) | ||
| 117 | + """ | ||
| 118 | + items = row[1] | ||
| 119 | + capacity, chosen = int(items[4]), int(items[7]) | ||
| 120 | + if chosen == 0: | ||
| 121 | + return None | ||
| 122 | + try: | ||
| 123 | + tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 124 | + tmpf_src.write(items[0]) | ||
| 125 | + tmpf_src.seek(0) | ||
| 126 | + tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 127 | + | ||
| 128 | + steger = F5.F5(sample_key, 1) | ||
| 129 | + | ||
| 130 | + if rate == None: | ||
| 131 | + embed_rate = steger.embed_raw_data(tmpf_src.name, | ||
| 132 | + os.path.join(package_dir, '../res/toembed'), | ||
| 133 | + tmpf_dst.name) | ||
| 134 | + else: | ||
| 135 | + assert (rate >= 0 and rate < 1) | ||
| 136 | + # print capacity | ||
| 137 | + hidden = np.random.bytes(int(int(capacity) * rate) / 8) | ||
| 138 | + embed_rate = steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True) | ||
| 139 | + | ||
| 140 | + tmpf_dst.seek(0) | ||
| 141 | + raw = tmpf_dst.read() | ||
| 142 | + index = md5(raw).hexdigest() | ||
| 143 | + | ||
| 144 | + return (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1)) | ||
| 145 | + | ||
| 146 | + except Exception as e: | ||
| 147 | + print e | ||
| 148 | + raise | ||
| 149 | + finally: | ||
| 150 | + tmpf_src.close() | ||
| 151 | + tmpf_dst.close() | ||
| 152 | + | ||
| 153 | + | ||
| 154 | +def rddembed_ILS_EXT(row, rate=None): | ||
| 155 | + """ | ||
| 156 | + input: | ||
| 157 | + e.g. row =('row1',[1,3400,'hello']) | ||
| 158 | + return: | ||
| 159 | + newrow = ('row2',[34,5400,'embeded']) or NULL | ||
| 160 | + [row,newrow] | ||
| 161 | + """ | ||
| 162 | + items = row[1] | ||
| 163 | + capacity, chosen = int(items[4]), int(items[7]) | ||
| 164 | + if chosen == 0: | ||
| 165 | + return [row] | ||
| 166 | + try: | ||
| 167 | + tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 168 | + tmpf_src.write(items[0]) | ||
| 169 | + tmpf_src.seek(0) | ||
| 170 | + tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 171 | + | ||
| 172 | + steger = F5.F5(sample_key, 2) | ||
| 173 | + | ||
| 174 | + if rate == None: | ||
| 175 | + embed_rate = steger.embed_raw_data(tmpf_src.name, | ||
| 176 | + os.path.join(package_dir, '../res/toembed'), | ||
| 177 | + tmpf_dst.name) | ||
| 178 | + else: | ||
| 179 | + assert (rate >= 0 and rate < 1) | ||
| 180 | + # print capacity | ||
| 181 | + hidden = np.random.bytes(int(int(capacity) * rate) / 8) | ||
| 182 | + embed_rate = steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True) | ||
| 183 | + | ||
| 184 | + tmpf_dst.seek(0) | ||
| 185 | + raw = tmpf_dst.read() | ||
| 186 | + index = md5(raw).hexdigest() | ||
| 187 | + | ||
| 188 | + return [row, (index + '.jpg', [raw] + rddinfo_ILS(raw, embed_rate, 0, 1))] | ||
| 189 | + | ||
| 190 | + except Exception as e: | ||
| 191 | + print e | ||
| 192 | + raise | ||
| 193 | + finally: | ||
| 194 | + tmpf_src.close() | ||
| 195 | + tmpf_dst.close() | ||
| 196 | + | ||
| 197 | + | ||
| 198 | +def _get_feat(image, feattype='ibd', **kwargs): | ||
| 199 | + if feattype == 'ibd': | ||
| 200 | + feater = IntraBlockDiff.FeatIntraBlockDiff() | ||
| 201 | + else: | ||
| 202 | + raise Exception("Unknown feature type!") | ||
| 203 | + | ||
| 204 | + desc = feater.feat(image) | ||
| 205 | + | ||
| 206 | + return desc | ||
| 207 | + | ||
| 208 | + | ||
| 209 | +def rddfeat_ILS(items, feattype='ibd', **kwargs): | ||
| 210 | + try: | ||
| 211 | + tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 212 | + tmpf_src.write(items[0]) | ||
| 213 | + tmpf_src.seek(0) | ||
| 214 | + | ||
| 215 | + desc = json.dumps(_get_feat(tmpf_src.name, feattype=feattype).tolist()) | ||
| 216 | + # print 'desccccccccccccccccccc',desc | ||
| 217 | + return items + [desc] | ||
| 218 | + | ||
| 219 | + except Exception as e: | ||
| 220 | + print e | ||
| 221 | + raise | ||
| 222 | + finally: | ||
| 223 | + tmpf_src.close() | ||
| 224 | + | ||
| 225 | + | ||
| 226 | +def rddanalysis_ILS(items, feattype='ibd', **kwargs): | ||
| 227 | + head = np.fromstring(items[0][:2], dtype=np.uint8) | ||
| 228 | + if not np.array_equal(head, [255, 216]): | ||
| 229 | + return items + [0] | ||
| 230 | + try: | ||
| 231 | + tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') | ||
| 232 | + tmpf_src.write(items[0]) | ||
| 233 | + tmpf_src.seek(0) | ||
| 234 | + | ||
| 235 | + desc = _get_feat(tmpf_src.name, feattype=feattype) | ||
| 236 | + tag = classifier.predict(desc.ravel())[0] | ||
| 237 | + # print 'desccccccccccccccccccc',desc | ||
| 238 | + return items + [tag] | ||
| 239 | + | ||
| 240 | + except Exception as e: | ||
| 241 | + print e | ||
| 242 | + raise | ||
| 243 | + finally: | ||
| 244 | + tmpf_src.close() | ||
| 245 | + | ||
| 246 | + # return items + classifier.predict(items[-1]) | ||
| 247 | + | ||
| 248 | + | ||
| 249 | +def format_out(row, cols, withdata=False): | ||
| 250 | + """ | ||
| 251 | + input: | ||
| 252 | + e.g. row =('row1',[1,3400,'hello']) | ||
| 253 | + cols = [['cf_info', 'id'], ['cf_info', 'size'], ['cf_tag', 'desc']] | ||
| 254 | + return: | ||
| 255 | + [('row1',['row1', 'cf_info', 'id', '1']),('row1',['row1', 'cf_info', 'size', '3400']),('row1',['row1', 'cf_tag', 'desc', 'hello'])] | ||
| 256 | + """ | ||
| 257 | + puts = [] | ||
| 258 | + key = row[0] | ||
| 259 | + # if key == '04650c488a2b163ca8a1f52da6022f03.jpg': | ||
| 260 | + # print row | ||
| 261 | + if not withdata: | ||
| 262 | + for data, col in zip(row[1][1:], cols[1:]): | ||
| 263 | + puts.append((key, [key] + col + [str(data)])) | ||
| 264 | + else: | ||
| 265 | + for data, col in zip(row[1], cols): | ||
| 266 | + puts.append((key, [key] + col + [str(data)])) | ||
| 267 | + return puts |