WeiboSpider
/
config
/
spider.yaml

# coding:utf-8
time_out: 200                   # timeout for crawling and storing user info
min_crawl_interal: 10           # min interal of http request
max_crawl_interal: 20           # max interal of http request
excp_interal: 5*60              # time for sleeping when crawling raises exceptions

# TODO set a default value for max_value of crawling
max_search_page: 50            # max search page for crawling
max_home_page: 50              # max user home page for crawling
max_comment_page: 2000         # max comment page for crawling
max_repost_page: 2000          # max repost page for crawling
max_dialogue_page: 2000        # max dialogue page for crawling
max_retries: 5                 # retry times for crawling

# You should set the args below if you login from uncommon place
# It's for verification code indentified
yundama_username: xxxxxx           # account for yundama
yundama_passwd: xxxxxx             # password for yundama

# only crawl weibo(bowen) after
# only affect to home crawler
time_after: '1970-01-01 08:00:00'

# whether account follows the uid below
# if yes rows in wbuser will have 1 at isFan column
samefollow_uid: ''

# The value of running_mode can be normal or quick.
# In normal mode, it will be more stable, while in quick mode, the crawling speed will
# be much faster, and the weibo account almostly will be banned
running_mode: normal

# The value of crawling mode can be accurate or normal
# In normal mode, the spider won't crawl the weibo content of "展开全文" when execute home crawl tasks or search crawl
# tasks, so the speed will be much quicker.
# In accurate mode,the spider will crawl the info of "展开全文",which will be slower, but more details will be given.
crawling_mode: normal


# the max number of each cookie can be shared
# if you choose quick mode, your cookie will be used util it's banned
share_host_count: 5
# the expire time(hours) of each weibo cookies
cookie_expire_time: 23

# 1 for allow download images, otherwise set it to 0
images_allow: 1

# the default image path is '${user.home}/weibospider/images'
# if you want to change another directory for download image, just set the path below
images_path: ''

# the value can be large or thumbnail
# in large type, you will download the large image
# in thumbnail type, you will download the thumbnail image
image_type: large

db:
    host: 127.0.0.1
    port: 3306
    user: root
    password: 123456
    db_name: weibo
    db_type: mysql

redis:
    host: 127.0.0.1
    port: 6379
    password: ''
    cookies: 1                   # store and fetch cookies
    # store fetched urls and results,so you can decide whether retry to crawl the urls or not
    urls: 2
    broker: 5                    # broker for celery
    backend: 6                   # backed for celery
    id_name: 8                   # user id and names，for repost info analysis. Could be safely deleted after repost tasks
    # expire_time (hours) for redis db2, if they are useless to you, you can set the value smaller
    expire_time: 48
    # redis sentinel for ha. if you neet it, just add sentinel host and port below the sentinel args,like this:
    ###############################
    #sentinel:                    #
    #    - host: 2.2.2.2          #
    #      port: 26379            #
    #    - host: 3.3.3.3          #
    #      port: 26379            #
    #                             #
    ###############################
    sentinel: ''
    master: ''             # redis sentinel master name, if you don't need it, just set master: ''
    socket_timeout: 5            # sockt timeout for redis sentinel, if you don't need it, just set master: ''


# warning by email
email:
    # your email must open smtp & pop3 service
    server: smtp.sina.com
    port: 587
    from: xxxxx@sina.com         #sendingemailaccount
    password: xxxx               #youremailpasswd
    to: xxxxxxxx@139.com         #bind 139 email,so your phone will receive the warning message
    subject: Warning Of Weibo Spider
    warning_info: Please find out the reason why the spider stops working