Blame view

mspider/scrapy.cfg 1.09 KB
4737e166   Chunk   staged.
1
2
3
4
5
6
7
8
9
10
# Automatically created by: scrapy startproject
#
# For more information about the [deploy] section see:
# http://doc.scrapy.org/en/latest/topics/scrapyd.html

[settings]
default = mspider.settings

[deploy]
#url = http://localhost:6800/
85492ba2   Chunk   Larvae finished.
11

4737e166   Chunk   staged.
12
project = mspider
85492ba2   Chunk   Larvae finished.
13
14
15
16
17
18
19

[custom]
## N.B.:
## NO trailing comma or single quote is allowed!
## e.g.
## start_urls = ["http://image.baidu.com/","http://v.baidu.com/"]
## allowed_domains = ["baidu.com", "bdstatic.com"]
1dc7c44b   Chunk   crawler-hbase-spa...
20
## cookies = {"userid" : "kitt232qsh", "BAIDUID":"3E4101AB69C6EDBDE3A677BC7B8F0794", "BAIDUPSID" : "3E4101AB69C6EDBDE3A677BC7B8F0794"}
85492ba2   Chunk   Larvae finished.
21
22
23
24
## regexes = ["http://image.baidu.com/channel\\S+", "http://image.baidu.com/channel\\S+"]

start_urls = ["http://image.baidu.com/","http://v.baidu.com/"]
allowed_domains = ["baidu.com", "bdstatic.com"]
1dc7c44b   Chunk   crawler-hbase-spa...
25
cookies = {"userid" : "kitt232qsh", "BAIDUID":"3E4101AB69C6EDBDE3A677BC7B8F0794", "BAIDUPSID" : "3E4101AB69C6EDBDE3A677BC7B8F0794"}
85492ba2   Chunk   Larvae finished.
26
27
regexes = ["http://image.baidu.com/channel\\S+", "http://image.baidu.com/channel\\S+"]
xpaths = ["//img[contains(@src,'jpg') and (contains(@src,'hiphotos') or contains(@src,'bdstatic'))]/@src"]