Chunk / ImageR

Blame view

mdata/ILSVRC_S.py 19.4 KB

ea1eb31a Chunk spark is privileg...	1 2 3	`__author__ = 'chunk' from . import *`
84648488 Chunk reverted.	4	`from ..mfeat import HOG, IntraBlockDiff`
ea1eb31a Chunk spark is privileg...	5	`from ..mspark import SC`
02528074 Chunk staged.	6	`from ..common import *`
ea1eb31a Chunk spark is privileg...	7 8 9	`import os, sys from hashlib import md5`
ea1eb31a Chunk spark is privileg...	10 11	`import csv import json`
ea1eb31a Chunk spark is privileg...	12	`import happybase`
ea1eb31a Chunk spark is privileg...	13 14 15 16 17 18 19	`from ..mjpeg import * from ..msteg import * from ..msteg.steganography import LSB, F3, F4, F5 import numpy as np from scipy import stats`
ea1eb31a Chunk spark is privileg...	20 21	`import tempfile`
f25fd27c Chunk staged. 'hbase' m...	22
ea1eb31a Chunk spark is privileg...	23 24 25 26 27 28	`np.random.seed(sum(map(ord, "whoami"))) package_dir = os.path.dirname(os.path.abspath(__file__)) class DataILSVRC_S(DataDumperBase):`
24768a99 Chunk mode 'hbase' fini...	29	`"""`
f25fd27c Chunk staged. 'hbase' m...	30 31 32 33 34 35 36 37 38	This module is specially for ILSVRC data processing under spark & hbase. We posit that the DB(e.g. HBase) has only the images data with md5 name as id. The task is to gennerate info(size,capacity,quality,etc.) and class & chosen tags, and then to perform embedding and finally to calcculate ibd features. Each step includes reading from & writing to Hbase (though PC). And each step must have a 'spark' mode option, which means that the operation is performed by spark with reading & wrting through RDDs. copyright(c) 2015 chunkplus@gmail.com
35cf2e3a Chunk staged.	39	`"""`
f25fd27c Chunk staged. 'hbase' m...	40 41	`def __init__(self, base='ILSVRC2013_DET_val', category='Train_1'):`
4f36b116 Chunk staged.	42	`DataDumperBase.__init__(self, base, category)`
1dc7c44b Chunk crawler-hbase-spa...	43
ea1eb31a Chunk spark is privileg...	44	`self.base = base`
1dc7c44b Chunk crawler-hbase-spa...	45	`self.category = category`
ea1eb31a Chunk spark is privileg...	46
ea1eb31a Chunk spark is privileg...	47 48	`self.dict_data = {} self.rdd_data = None`
0fbc087e Chunk staged.	49
ea1eb31a Chunk spark is privileg...	50	`self.table_name = self.base.strip('/').split('/')[-1]`
4f36b116 Chunk staged.	51 52 53 54 55 56	`if category != None: self.table_name += ('-' + self.category) self.sparkcontex = None self.steger = F5.F5(sample_key, 1)`
1dc7c44b Chunk crawler-hbase-spa...	57
f4fb4381 Chunk staged.	58 59	`def get_table(self): if self.table != None:`
ea1eb31a Chunk spark is privileg...	60	`return self.table`
0fbc087e Chunk staged.	61
ea1eb31a Chunk spark is privileg...	62	`if self.connection is None:`
24768a99 Chunk mode 'hbase' fini...	63	`c = happybase.Connection('HPC-server')`
4f36b116 Chunk staged.	64 65	`self.connection = c`
ea1eb31a Chunk spark is privileg...	66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87	tables = self.connection.tables() if self.table_name not in tables: families = {'cf_pic': dict(), 'cf_info': dict(max_versions=10), 'cf_tag': dict(), 'cf_feat': dict(), } self.connection.create_table(name=self.table_name, families=families) table = self.connection.table(name=self.table_name) self.table = table return table def delete_table(self, table_name=None, disable=True): if table_name == None: table_name = self.table_name if self.connection is None: c = happybase.Connection('HPC-server') self.connection = c
d47ae6ce Chunk staged.	88
f1fa5b17 Chunk review & streaming.	89	`tables = self.connection.tables()`
d47ae6ce Chunk staged.	90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107	`if table_name not in tables: return False else: try: self.connection.delete_table(table_name, disable) except: print 'Exception when deleting table.' raise return True def _get_info(self, img, info_rate=None, tag_chosen=None, tag_class=None): """ Tempfile is our friend. (?) """ info_rate = info_rate if info_rate != None else 0.0 tag_chosen = tag_chosen if tag_chosen != None else stats.bernoulli.rvs(0.8) tag_class = tag_class if tag_class != None else 0`
f25fd27c Chunk staged. 'hbase' m...	108 109 110 111 112 113 114	`try: tmpf = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') tmpf.write(img) tmpf.seek(0) im = Jpeg(tmpf.name, key=sample_key) info = [ im.image_width,`
1c2a3fa0 Chunk staged.	115	`im.image_height,`
f25fd27c Chunk staged. 'hbase' m...	116 117 118	`im.image_width * im.image_height, im.getCapacity(), im.getQuality(),`
24768a99 Chunk mode 'hbase' fini...	119	`info_rate,`
f25fd27c Chunk staged. 'hbase' m...	120	`tag_chosen,`
1c2a3fa0 Chunk staged.	121 122 123 124 125 126 127 128 129 130	`tag_class ] return info except Exception as e: print e finally: tmpf.close() def _get_feat(self, image, feattype='ibd', **kwargs): size = kwargs.get('size', (48, 48))`
f25fd27c Chunk staged. 'hbase' m...	131 132 133 134 135 136 137	`if feattype == 'hog': feater = HOG.FeatHOG(size=size) elif feattype == 'ibd': feater = IntraBlockDiff.FeatIntraBlockDiff() else: raise Exception("Unknown feature type!")`
84648488 Chunk reverted.	138 139 140 141 142	`desc = feater.feat(image) return desc`
f25fd27c Chunk staged. 'hbase' m...	143 144 145	`def _rddparse_data(raw_row): """`
ea1eb31a Chunk spark is privileg...	146	`input: (u'key0',u'cf_feat:hog:[0.056273,...]--%--cf_pic:data:\ufffd\ufffd\...--%--cf_tag:hog:True')`
f25fd27c Chunk staged. 'hbase' m...	147	`return: ([0.056273,...],1)`
ea1eb31a Chunk spark is privileg...	148
f25fd27c Chunk staged. 'hbase' m...	149	`In fact we can also use mapValues.`
ea1eb31a Chunk spark is privileg...	150	`"""`
84648488 Chunk reverted.	151	`key = raw_row[0]`
1c2a3fa0 Chunk staged.	152	`# if key == '04650c488a2b163ca8a1f52da6022f03.jpg':`
0fbc087e Chunk staged.	153 154 155 156 157 158	`# with open('/tmp/hhhh','wb') as f: # f.write(raw_row[1].decode('unicode-escape')).encode('latin-1') items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') data = items[0].split('cf_pic:data:')[-1] return (key, data)`
0fbc087e Chunk staged.	159
1c2a3fa0 Chunk staged.	160 161 162 163 164	`def _rddparse_all(raw_row): key = raw_row[0] items = raw_row[1].decode('unicode-escape').encode('latin-1').split('--%--') data = [items[0].split('cf_pic:data:')[-1]] + [json.loads(item.split(':')[-1]) for item in items[1:]] return (key, data)`
0fbc087e Chunk staged.	165 166
84648488 Chunk reverted.	167	`def _rdd_embed(self, row):`
1c2a3fa0 Chunk staged.	168	`"""`
0fbc087e Chunk staged.	169	`input:`
1c2a3fa0 Chunk staged.	170	`e.g. row =('row1',[1,3400,'hello'])`
84648488 Chunk reverted.	171	`return:`
0fbc087e Chunk staged.	172 173	`newrow = ('row2',[34,5400,'embeded']) """`
84648488 Chunk reverted.	174	`items = row[1]`
0fbc087e Chunk staged.	175 176 177 178 179 180 181 182 183	`capacity, rate, chosen = items[4], items[6], items[7] if chosen == 0: return None try: tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b') tmpf_src.write(items[0]) tmpf_src.seek(0) tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b')`
1c2a3fa0 Chunk staged.	184
0fbc087e Chunk staged.	185 186 187 188 189 190 191 192 193	`if rate == None: embed_rate = self.steger.embed_raw_data(tmpf_src.name, os.path.join(package_dir, '../res/toembed'), tmpf_dst.name) else: assert (rate >= 0 and rate < 1) # print capacity hidden = np.random.bytes(int(int(capacity) * rate) / 8) embed_rate = self.steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True)`
84648488 Chunk reverted.	194	`tmpf_dst.seek(0)`
0fbc087e Chunk staged.	195 196 197 198 199	`raw = tmpf_dst.read() index = md5(raw).hexdigest() return (index + '.jpg', [raw] + self._get_info(raw, embed_rate, 0, 1))`
84648488 Chunk reverted.	200	`except Exception as e:`
0fbc087e Chunk staged.	201 202 203 204 205 206 207 208 209 210 211 212 213 214	`print e raise finally: tmpf_src.close() tmpf_dst.close() def _extract_data(self, mode='hbase', writeback=False, withdata=True): """ Get info barely out of image data. """ if mode == 'hbase': if self.table == None: self.table = self.get_table()`
84648488 Chunk reverted.	215
1dc7c44b Chunk crawler-hbase-spa...	216	`cols = ['cf_pic:data']`
f25fd27c Chunk staged. 'hbase' m...	217 218 219	`for key, data in self.table.scan(columns=cols): data = data['cf_pic:data'] self.dict_data[key] = [data] + self._get_info(data)`
f1fa5b17 Chunk review & streaming.	220
f25fd27c Chunk staged. 'hbase' m...	221 222 223 224 225	`if not writeback: return self.dict_data else: try: with self.table.batch(batch_size=5000) as b:`
24768a99 Chunk mode 'hbase' fini...	226 227	`for imgname, imginfo in self.dict_data.items(): b.put(imgname,`
f25fd27c Chunk staged. 'hbase' m...	228 229 230 231 232 233 234 235 236 237 238	`{ # 'cf_pic:data': imginfo[0], 'cf_info:width': str(imginfo[1]), 'cf_info:height': str(imginfo[2]), 'cf_info:size': str(imginfo[3]), 'cf_info:capacity': str(imginfo[4]), 'cf_info:quality': str(imginfo[5]), 'cf_info:rate': str(imginfo[6]), 'cf_tag:chosen': str(imginfo[7]), 'cf_tag:class': str(imginfo[8]), })`
1c2a3fa0 Chunk staged.	239 240 241 242 243 244 245 246	`except ValueError: raise elif mode == 'spark': if self.sparkcontex == None: self.sparkcontex = SC.Sparker(host='HPC-server', appname='ImageILSVRC', master='spark://HPC-server:7077')`
24768a99 Chunk mode 'hbase' fini...	247
f25fd27c Chunk staged. 'hbase' m...	248 249 250 251 252	`cols = [ 'cf_pic:data', 'cf_info:width', 'cf_info:height', 'cf_info:size',`
02528074 Chunk staged.	253 254	`'cf_info:capacity', 'cf_info:quality',`
0bd44a28 Chunk staged.	255	`'cf_info:rate',`
0fbc087e Chunk staged.	256	`'cf_tag:chosen',`
1c2a3fa0 Chunk staged.	257 258 259 260 261 262 263 264 265 266 267	`'cf_tag:class' ] # # Debug # tmp_data = self.sparkcontex.read_hbase(self.table_name, func=SC.rddparse_data_ILS, # collect=False) # # tmp_data = tmp_data.mapValues(lambda data: [data] + SC.rddinfo_ILS(data)) # print tmp_data.collect()[0][1] # return`
0fbc087e Chunk staged.	268	`self.rdd_data = self.sparkcontex.read_hbase(self.table_name, func=SC.rddparse_data_ILS,`
3b4e250d Chunk staged.	269	`collect=False).mapValues(`
02528074 Chunk staged.	270	`lambda data: [data] + SC.rddinfo_ILS(data))`
1c2a3fa0 Chunk staged.	271 272 273 274	`if not writeback: return self.rdd_data else:`
3b4e250d Chunk staged.	275 276	`self.sparkcontex.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, withdata=withdata)`
02528074 Chunk staged.	277
0bd44a28 Chunk staged.	278	`else:`
1c2a3fa0 Chunk staged.	279	`raise Exception("Unknown mode!")`
3b4e250d Chunk staged.	280
0fbc087e Chunk staged.	281 282 283	`def _embed_data(self, mode='hbase', rate=None, readforward=False, writeback=False, withdata=True): if mode == 'hbase':`
02528074 Chunk staged.	284	`if self.table == None:`
0bd44a28 Chunk staged.	285	`self.table = self.get_table()`
e3e7e73a Chunk spider standalone...	286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311	`if readforward: self.dict_data = {} cols = [ 'cf_pic:data', 'cf_info:width', 'cf_info:height', 'cf_info:size', 'cf_info:capacity', 'cf_info:quality', 'cf_info:rate', 'cf_tag:chosen', 'cf_tag:class' ] for key, data in self.table.scan(columns=cols): data = [data[k] for k in cols] self.dict_data[key] = data dict_data_ext = {} for imgname, imgdata in self.dict_data.items(): capacity, chosen = int(imgdata[4]), int(imgdata[7]) if chosen == 0:`
0fbc087e Chunk staged.	312	`continue`
ea1eb31a Chunk spark is privileg...	313
f25fd27c Chunk staged. 'hbase' m...	314	`try:`
ea1eb31a Chunk spark is privileg...	315	`tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b')`
84648488 Chunk reverted.	316 317	`tmpf_src.write(imgdata[0]) tmpf_src.seek(0)`
f1fa5b17 Chunk review & streaming.	318	`tmpf_dst = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b')`
f25fd27c Chunk staged. 'hbase' m...	319 320 321 322 323 324	`if rate == None: embed_rate = self.steger.embed_raw_data(tmpf_src.name, os.path.join(package_dir, '../res/toembed'), tmpf_dst.name) else:`
1c2a3fa0 Chunk staged.	325 326 327 328 329 330 331 332 333 334 335 336 337	`assert (rate >= 0 and rate < 1) # print capacity hidden = np.random.bytes(int(capacity * rate) / 8) embed_rate = self.steger.embed_raw_data(tmpf_src.name, hidden, tmpf_dst.name, frommem=True) tmpf_dst.seek(0) raw = tmpf_dst.read() index = md5(raw).hexdigest() dict_data_ext[index + '.jpg'] = [raw] + self._get_info(raw, embed_rate, 0, 1) except Exception as e: print e`
24768a99 Chunk mode 'hbase' fini...	338 339	`raise finally:`
f25fd27c Chunk staged. 'hbase' m...	340 341 342 343 344	`tmpf_src.close() tmpf_dst.close() self.dict_data.update(dict_data_ext)`
1dc7c44b Chunk crawler-hbase-spa...	345 346 347 348 349	`if not writeback: return self.dict_data else: try: with self.table.batch(batch_size=5000) as b:`
f25fd27c Chunk staged. 'hbase' m...	350 351 352	`for imgname, imginfo in dict_data_ext.items(): b.put(imgname, {`
24768a99 Chunk mode 'hbase' fini...	353	`'cf_pic:data': imginfo[0],`
f25fd27c Chunk staged. 'hbase' m...	354 355 356	`'cf_info:width': str(imginfo[1]), 'cf_info:height': str(imginfo[2]), 'cf_info:size': str(imginfo[3]),`
0fbc087e Chunk staged.	357	`'cf_info:capacity': str(imginfo[4]),`
84648488 Chunk reverted.	358	`'cf_info:quality': str(imginfo[5]),`
0fbc087e Chunk staged.	359	`'cf_info:rate': str(imginfo[6]),`
f25fd27c Chunk staged. 'hbase' m...	360 361 362	`'cf_tag:chosen': str(imginfo[7]), 'cf_tag:class': str(imginfo[8]), })`
1dc7c44b Chunk crawler-hbase-spa...	363	`except ValueError:`
84648488 Chunk reverted.	364	`raise`
f25fd27c Chunk staged. 'hbase' m...	365 366 367 368 369	`elif mode == 'spark': if self.sparkcontex == None: self.sparkcontex = SC.Sparker(host='HPC-server', appname='ImageILSVRC', master='spark://HPC-server:7077')`
ea1eb31a Chunk spark is privileg...	370
ea1eb31a Chunk spark is privileg...	371	`cols = [`
f25fd27c Chunk staged. 'hbase' m...	372 373	`'cf_pic:data', 'cf_info:width',`
24768a99 Chunk mode 'hbase' fini...	374	`'cf_info:height',`
f25fd27c Chunk staged. 'hbase' m...	375 376 377	`'cf_info:size', 'cf_info:capacity', 'cf_info:quality',`
ea1eb31a Chunk spark is privileg...	378	`'cf_info:rate',`
f25fd27c Chunk staged. 'hbase' m...	379 380 381 382 383 384 385 386 387 388 389	`'cf_tag:chosen', 'cf_tag:class' ] if readforward: self.rdd_data = self.sparkcontex.read_hbase(self.table_name, func=SC.rddparse_all_ILS, collect=False) rdd_data_ext = self.rdd_data.map(lambda x: SC.rddembed_ILS(x, rate=rate)).filter(lambda x: x != None) self.rdd_data = self.rdd_data.union(rdd_data_ext) if not writeback:`
1c2a3fa0 Chunk staged.	390 391 392 393 394 395 396 397 398	`return self.rdd_data else: self.sparkcontex.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, withdata=withdata) else: raise Exception("Unknown mode!")`
f25fd27c Chunk staged. 'hbase' m...	399 400 401 402	`def _extract_feat(self, mode='hbase', feattype='ibd', readforward=False, writeback=False, withdata=False): if mode == 'hbase': if self.table == None: self.table = self.get_table()`
02528074 Chunk staged.	403 404	`if readforward:`
0bd44a28 Chunk staged.	405	`self.dict_data = {}`
0fbc087e Chunk staged.	406	`cols = [`
1c2a3fa0 Chunk staged.	407 408 409 410 411 412 413 414 415 416 417	`'cf_pic:data', 'cf_info:width', 'cf_info:height', 'cf_info:size', 'cf_info:capacity', 'cf_info:quality', 'cf_info:rate', 'cf_tag:chosen', 'cf_tag:class' ] for key, data in self.table.scan(columns=cols):`
0fbc087e Chunk staged.	418 419	`data = [data[k] for k in cols] self.dict_data[key] = data`
84648488 Chunk reverted.	420
0fbc087e Chunk staged.	421	`for imgname, imgdata in self.dict_data.items():`
489c5608 Chunk debugging...	422 423	`try: tmpf_src = tempfile.NamedTemporaryFile(suffix='.jpg', mode='w+b')`
0fbc087e Chunk staged.	424	`tmpf_src.write(imgdata[0])`
489c5608 Chunk debugging...	425	`tmpf_src.seek(0)`
0fbc087e Chunk staged.	426
1c2a3fa0 Chunk staged.	427	`desc = json.dumps(self._get_feat(tmpf_src.name, feattype=feattype).tolist())`
0fbc087e Chunk staged.	428
02528074 Chunk staged.	429	`self.dict_data[imgname].append(desc)`
0bd44a28 Chunk staged.	430
0fbc087e Chunk staged.	431	`except Exception as e:`
ea1eb31a Chunk spark is privileg...	432	`print e`
f25fd27c Chunk staged. 'hbase' m...	433 434	`raise finally:`
84648488 Chunk reverted.	435 436	`tmpf_src.close()`
f1fa5b17 Chunk review & streaming.	437	`if not writeback:`
f25fd27c Chunk staged. 'hbase' m...	438 439 440 441 442 443	`return self.dict_data else: try: with self.table.batch(batch_size=5000) as b: for imgname, imginfo in self.dict_data.items(): b.put(imgname,`
1c2a3fa0 Chunk staged.	444 445 446 447 448 449 450 451 452 453 454	`{ 'cf_pic:data': imginfo[0], 'cf_info:width': str(imginfo[1]), 'cf_info:height': str(imginfo[2]), 'cf_info:size': str(imginfo[3]), 'cf_info:capacity': str(imginfo[4]), 'cf_info:quality': str(imginfo[5]), 'cf_info:rate': str(imginfo[6]), 'cf_tag:chosen': str(imginfo[7]), 'cf_tag:class': str(imginfo[8]), 'cf_feat:' + feattype: imginfo[9],`
24768a99 Chunk mode 'hbase' fini...	455 456	`}) except ValueError:`
f25fd27c Chunk staged. 'hbase' m...	457 458 459 460 461 462	`raise elif mode == 'spark': if self.sparkcontex == None: self.sparkcontex = SC.Sparker(host='HPC-server', appname='ImageILSVRC', master='spark://HPC-server:7077')`
24768a99 Chunk mode 'hbase' fini...	463
ea1eb31a Chunk spark is privileg...	464	`cols = [`
24768a99 Chunk mode 'hbase' fini...	465	`'cf_pic:data',`
ea1eb31a Chunk spark is privileg...	466	`'cf_info:width',`
f25fd27c Chunk staged. 'hbase' m...	467	`'cf_info:height',`
ea1eb31a Chunk spark is privileg...	468	`'cf_info:size',`
f25fd27c Chunk staged. 'hbase' m...	469 470	`'cf_info:capacity', 'cf_info:quality',`
24768a99 Chunk mode 'hbase' fini...	471	`'cf_info:rate',`
f25fd27c Chunk staged. 'hbase' m...	472 473	`'cf_tag:chosen', 'cf_tag:class',`
ea1eb31a Chunk spark is privileg...	474	`'cf_feat:' + feattype,`
f25fd27c Chunk staged. 'hbase' m...	475 476 477 478 479 480 481 482 483	`] if readforward: self.rdd_data = self.sparkcontex.read_hbase(self.table_name, func=SC.rddparse_all_ILS, collect=False) self.rdd_data = self.rdd_data.mapValues(lambda items: SC.rddfeat_ILS(items)) # print self.rdd_data.collect()[0] # return`
1c2a3fa0 Chunk staged.	484 485 486 487 488 489 490 491 492 493	`if not writeback: return self.rdd_data else: self.sparkcontex.write_hbase(self.table_name, self.rdd_data, fromrdd=True, columns=cols, withdata=withdata) else: raise Exception("Unknown mode!")`
f25fd27c Chunk staged. 'hbase' m...	494 495 496 497	`def format(self): self._extract_data(mode='hbase', writeback=False)`
02528074 Chunk staged.	498 499	`self._embed_data(mode='hbase', rate=0.1, readforward=False, writeback=False) self._extract_feat(mode='hbase', feattype='ibd', readforward=False, writeback=True)`
0bd44a28 Chunk staged.	500
2c507774 Chunk staged.	501
1c2a3fa0 Chunk staged.	502 503 504 505 506 507 508 509 510 511 512 513	`def load_data(self, mode='local', feattype='ibd', tagtype='class'): INDEX = [] X = [] Y = [] if mode == "local": dict_dataset = {} with open(self.list_file, 'rb') as tsvfile: tsvfile = csv.reader(tsvfile, delimiter='\t') for line in tsvfile:`
2c507774 Chunk staged.	514 515	`hash = line[0] tag = line[-1]`
84648488 Chunk reverted.	516	`path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype)`
2c507774 Chunk staged.	517	`if path_feat:`
e3e7e73a Chunk spider standalone...	518	`with open(path_feat, 'rb') as featfile:`
8bddd8b3 Chunk You guess what? T...	519 520 521	`dict_dataset[hash] = (tag, json.loads(featfile.read())) for tag, feat in dict_dataset.values():`
2c507774 Chunk staged.	522 523	`X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) Y.append(int(tag))`
1c2a3fa0 Chunk staged.	524
2c507774 Chunk staged.	525	`elif mode == "remote" or mode == "hbase":`
f1fa5b17 Chunk review & streaming.	526	`if self.table == None:`
02528074 Chunk staged.	527	`self.table = self.get_table()`
0bd44a28 Chunk staged.	528
f25fd27c Chunk staged. 'hbase' m...	529 530 531	`col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype for key, data in self.table.scan(columns=[col_feat, col_tag]): X.append(json.loads(data[col_feat]))`
e3ec1f74 Chunk staged.	532	`Y.append(1 if data[col_tag] == 'True' else 0)`
e3e7e73a Chunk spider standalone...	533 534 535 536 537 538 539	`elif mode == "spark" or mode == "cluster": if self.sparkcontex == None: self.sparkcontex = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') result = self.sparkcontex.read_hbase(self.table_name) # result = {key:[feat,tag],...} for feat, tag in result:`
e3e7e73a Chunk spider standalone...	540 541 542 543 544 545 546	`X.append(feat) Y.append(tag) else: raise Exception("Unknown mode!") return X, Y`
84648488 Chunk reverted.
f25fd27c Chunk staged. 'hbase' m...
f1fa5b17 Chunk review & streaming.
f25fd27c Chunk staged. 'hbase' m...
ea1eb31a Chunk spark is privileg...
84648488 Chunk reverted.
02528074 Chunk staged.
f1fa5b17 Chunk review & streaming.
ea1eb31a Chunk spark is privileg...
0bd44a28 Chunk staged.
e3e7e73a Chunk spider standalone...
0bd44a28 Chunk staged.
ea1eb31a Chunk spark is privileg...
0bd44a28 Chunk staged.
ea1eb31a Chunk spark is privileg...
0bd44a28 Chunk staged.
02528074 Chunk staged.
ea1eb31a Chunk spark is privileg...
02528074 Chunk staged.
0bd44a28 Chunk staged.
02528074 Chunk staged.
84648488 Chunk reverted.
02528074 Chunk staged.
ea1eb31a Chunk spark is privileg...
02528074 Chunk staged.
ea1eb31a Chunk spark is privileg...
ea1eb31a Chunk spark is privileg...
84648488 Chunk reverted.