spider1.sh 945 Bytes
#!/bin/env sh
# chunk @ 2014
########################################################################################
##
## F**k World!
##
########################################################################################
export export TERM=linux
source /home/hadoop/.bashrc
workon envtmp

#SPIDER_DIR=/home/hadoop/workspace/pycharm/test/ImageR/mspider
SPIDER_DIR=/home/hadoop/Programs/newdist/mspider
FILE_PATH=/home/hadoop/Programs/newdist/mspider/mspider/table_name
PRE_TABLE=$(head -1 $FILE_PATH)
ln -sf $SPIDER_DIR/$1 $SPIDER_DIR/scrapy.cfg
grep -lr $PRE_TABLE $SPIDER_DIR | xargs sed -i "s/$PRE_TABLE/$2/g"
echo $2 > $FILE_PATH
cd $SPIDER_DIR
echo "vailable spiders:"
scrapy list
echo "\ncrawling:"
time scrapy crawl larvae

#scan 'MSPIDER2', { COLUMNS => 'cf_tag:class', LIMIT => 10, FILTER => "ValueFilter( =, 'binaryprefix:1' )" }
#scan 'MSPIDER2', { COLUMNS => 'cf_tag:class',  FILTER => "ValueFilter( =, 'binaryprefix:1' )" }