7.30

5 years ago · 62d4136d59
16 changed files with 36 additions and 707 deletions
--- a/all_demo1.egg
+++ b/all_demo1.egg
--- a/demo1/pycache/middlewares.cpython-37.pyc
+++ b/demo1/pycache/middlewares.cpython-37.pyc
--- a/demo1/pycache/pipelines.cpython-37.pyc
+++ b/demo1/pycache/pipelines.cpython-37.pyc
--- a/demo1/pycache/settings.cpython-37.pyc
+++ b/demo1/pycache/settings.cpython-37.pyc
--- a/demo1/logs/2020_7_25.log
+++ b/demo1/logs/2020_7_25.log
@ -1,342 +0,0 @@
-2020-07-25 09:09:10 [scrapy.utils.log] INFO: Scrapy 2.2.0 started (bot: demo1)
-2020-07-25 09:09:10 [scrapy.utils.log] INFO: Versions: lxml 4.5.1.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.7.8 (tags/v3.7.8:4b47a5b6ba, Jun 28 2020, 08:53:46) [MSC v.1916 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1g  21 Apr 2020), cryptography 2.9.2, Platform Windows-10-10.0.18362-SP0
-2020-07-25 09:09:10 [scrapy.crawler] INFO: Overridden settings:
-{'BOT_NAME': 'demo1',
- 'CONCURRENT_REQUESTS': 1,
- 'DOWNLOAD_DELAY': 8,
- 'LOG_ENABLED': False,
- 'LOG_FILE': 'logs/2020_7_25.log',
- 'LOG_LEVEL': 'INFO',
- 'LOG_STDOUT': True,
- 'NEWSPIDER_MODULE': 'demo1.spiders',
- 'RETRY_HTTP_CODES': [500, 502, 503, 504, 400, 403, 404, 408],
- 'RETRY_TIMES': True,
- 'SPIDER_MODULES': ['demo1.spiders']}
-2020-07-25 09:09:10 [scrapy.extensions.telnet] INFO: Telnet Password: 356dd55a959a2996
-2020-07-25 09:09:10 [scrapy.middleware] INFO: Enabled extensions:
-['scrapy.extensions.corestats.CoreStats',
- 'scrapy.extensions.telnet.TelnetConsole',
- 'scrapy.extensions.logstats.LogStats']
-2020-07-25 09:09:10 [stdout] INFO: 我是RundomUserAgentMiddleware
-2020-07-25 09:09:10 [scrapy.middleware] INFO: Enabled downloader middlewares:
-['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
- 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
- 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
- 'demo1.middlewares.RundomUserAgentMiddleware',
- 'demo1.middlewares.DingZhiCookieMiddleware',
- 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
- 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
- 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
- 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
- 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
- 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
- 'scrapy.downloadermiddlewares.stats.DownloaderStats']
-2020-07-25 09:09:10 [scrapy.middleware] INFO: Enabled spider middlewares:
-['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
- 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
- 'scrapy.spidermiddlewares.referer.RefererMiddleware',
- 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
- 'scrapy.spidermiddlewares.depth.DepthMiddleware']
-2020-07-25 09:09:10 [stdout] INFO: 我也不知道啊
-2020-07-25 09:09:10 [scrapy.middleware] INFO: Enabled item pipelines:
-['demo1.pipelines.MysqlYiBUPipeline']
-2020-07-25 09:09:10 [scrapy.core.engine] INFO: Spider opened
-2020-07-25 09:09:10 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
-2020-07-25 09:09:10 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66899
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66901
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66875
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66916
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66903
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66917
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66904
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66497
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66907
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66905
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66486
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66498
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66014
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65854
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65861
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65869
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65637
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65515
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65853
-2020-07-25 09:09:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65516
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65867
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65904
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65903
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65855
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65889
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65850
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64663
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65870
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65908
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/63857
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/63631
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/63242
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62685
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62398
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65888
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62107
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61458
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61395
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61389
-2020-07-25 09:09:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61396
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65396
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65517
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65518
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65864
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65873
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65872
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64665
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65907
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65875
-2020-07-25 09:09:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65876
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64246
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64243
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64244
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65877
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65399
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64242
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/63843
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/63632
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65879
-2020-07-25 09:09:29 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65884
-2020-07-25 09:09:36 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/59300
-2020-07-25 09:09:36 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58975
-2020-07-25 09:09:36 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/59298
-2020-07-25 09:09:36 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58832
-2020-07-25 09:09:36 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58833
-2020-07-25 09:09:36 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58091
-2020-07-25 09:09:36 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57600
-2020-07-25 09:09:36 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57589
-2020-07-25 09:09:36 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/56711
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/56521
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/56730
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/56513
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/56734
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55638
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55707
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55829
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55709
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/53756
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/53743
-2020-07-25 09:09:37 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55773
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65884
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65882
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64241
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64240
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62693
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65887
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62416
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64239
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62267
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62272
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64245
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61772
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61381
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61457
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61399
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60714
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60713
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60731
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/64174
-2020-07-25 09:09:46 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60745
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55856
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55773
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/53746
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55724
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/53174
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55781
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/50444
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/50445
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/50447
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/50448
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/49738
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/50451
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/49933
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/49737
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/49746
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/49739
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57523
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/49740
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/49741
-2020-07-25 09:09:57 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/49742
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66918
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/66500
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65858
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65890
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65909
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65678
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65295
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65862
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65893
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65896
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65898
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65900
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65865
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65902
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/63856
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65857
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/65866
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/63386
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62269
-2020-07-25 09:10:05 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62268
-2020-07-25 09:10:10 [scrapy.extensions.logstats] INFO: Crawled 7 pages (at 7 pages/min), scraped 0 items (at 0 items/min)
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55839
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55841
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55836
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/55831
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/46922
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/46923
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/46924
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/42979
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/42994
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40859
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/42997
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40330
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40327
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/39293
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40306
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40307
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40338
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40309
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40341
-2020-07-25 09:10:14 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40340
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62271
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62120
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62109
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/62115
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61388
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/61391
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60721
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60732
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60725
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60734
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60728
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60246
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60242
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60717
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60237
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60240
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/59695
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/60718
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/59303
-2020-07-25 09:10:26 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/59307
-2020-07-25 09:10:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40310
-2020-07-25 09:10:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40342
-2020-07-25 09:10:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40312
-2020-07-25 09:10:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40345
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40346
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40317
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40356
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/32286
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31963
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/40357
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31044
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31051
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31040
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31050
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31048
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31047
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31041
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31046
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31049
-2020-07-25 09:10:35 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31052
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58968
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58971
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58831
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58838
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58961
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58837
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58841
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58503
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58505
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58103
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/58258
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57828
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57461
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57598
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57593
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57465
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57156
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57601
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/57158
-2020-07-25 09:10:45 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/56926
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31041
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31055
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31056
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31011
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/29854
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/31402
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/29869
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/29871
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/28194
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/26630
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/25042
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/23115
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/23117
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/21870
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/20963
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/20721
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/19099
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/18143
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/17298
-2020-07-25 09:10:55 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/17297
-2020-07-25 09:11:00 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6188
-2020-07-25 09:11:00 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=1&citycode=RegisterArea_HBDQ_Shanxi_DaTongShi&more=False----这个就是一页啊
-2020-07-25 09:11:10 [scrapy.extensions.logstats] INFO: Crawled 13 pages (at 6 pages/min), scraped 0 items (at 0 items/min)
-2020-07-25 09:11:11 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/7275
-2020-07-25 09:11:11 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=2&citycode=RegisterArea_HBDQ_Shanxi_DaTongShi&more=False----这个就是一页啊
-2020-07-25 09:11:23 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=3&citycode=RegisterArea_HBDQ_Shanxi_DaTongShi&more=False----没有发布过内容
-2020-07-25 09:11:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6193
-2020-07-25 09:11:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6192
-2020-07-25 09:11:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6190
-2020-07-25 09:11:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6194
-2020-07-25 09:11:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6189
-2020-07-25 09:11:34 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6195
-2020-07-25 09:11:34 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=4&citycode=RegisterArea_HBDQ_Shanxi_DaTongShi&more=False----这个就是一页啊
-2020-07-25 09:11:44 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=1&citycode=RegisterArea_HBDQ_Shanxi_YangQuanShi&more=False----没有发布过内容
-2020-07-25 09:11:56 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6205
-2020-07-25 09:11:56 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=2&citycode=RegisterArea_HBDQ_Shanxi_YangQuanShi&more=False----这个就是一页啊
-2020-07-25 09:12:06 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=3&citycode=RegisterArea_HBDQ_Shanxi_YangQuanShi&more=False----没有发布过内容
-2020-07-25 09:12:10 [scrapy.extensions.logstats] INFO: Crawled 19 pages (at 6 pages/min), scraped 0 items (at 0 items/min)
-2020-07-25 09:12:16 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=4&citycode=RegisterArea_HBDQ_Shanxi_YangQuanShi&more=False----没有发布过内容
-2020-07-25 09:12:28 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/9357
-2020-07-25 09:12:28 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=1&citycode=RegisterArea_HBDQ_Shanxi_JinZhongShi&more=False----这个就是一页啊
-2020-07-25 09:12:38 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/7283
-2020-07-25 09:12:38 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/7279
-2020-07-25 09:12:38 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6523
-2020-07-25 09:12:38 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/6296
-2020-07-25 09:12:38 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=2&citycode=RegisterArea_HBDQ_Shanxi_JinZhongShi&more=False----这个就是一页啊
-2020-07-25 09:12:50 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=3&citycode=RegisterArea_HBDQ_Shanxi_JinZhongShi&more=False----没有发布过内容
-2020-07-25 09:12:58 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=4&citycode=RegisterArea_HBDQ_Shanxi_JinZhongShi&more=False----没有发布过内容
-2020-07-25 09:13:07 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=1&citycode=RegisterArea_HBDQ_Shanxi_XinZhouShi&more=False----没有发布过内容
-2020-07-25 09:13:10 [scrapy.extensions.logstats] INFO: Crawled 25 pages (at 6 pages/min), scraped 0 items (at 0 items/min)
-2020-07-25 09:13:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/7697
-2020-07-25 09:13:18 [stdout] INFO: 这个链接已经爬过了-----：https://www.chacewang.com/news/NewsDetail/5416
-2020-07-25 09:13:18 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=2&citycode=RegisterArea_HBDQ_Shanxi_XinZhouShi&more=False----这个就是一页啊
-2020-07-25 09:13:26 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=3&citycode=RegisterArea_HBDQ_Shanxi_XinZhouShi&more=False----没有发布过内容
-2020-07-25 09:13:36 [stdout] INFO: url:https://www.chacewang.com/news/PIndex_New?chaPlate=4&citycode=RegisterArea_HBDQ_Shanxi_XinZhouShi&more=False----没有发布过内容
-2020-07-25 09:13:36 [scrapy.core.engine] INFO: Closing spider (finished)
-2020-07-25 09:13:36 [stdout] INFO: 爬虫运行完毕了
-2020-07-25 09:13:36 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
-{'downloader/request_bytes': 12478,
- 'downloader/request_count': 28,
- 'downloader/request_method_count/GET': 28,
- 'downloader/response_bytes': 337611,
- 'downloader/response_count': 28,
- 'downloader/response_status_count/200': 28,
- 'elapsed_time_seconds': 265.967696,
- 'finish_reason': 'finished',
- 'finish_time': datetime.datetime(2020, 7, 25, 1, 13, 36, 488601),
- 'log_count/INFO': 289,
- 'request_depth_max': 2,
- 'response_received_count': 28,
- 'scheduler/dequeued': 28,
- 'scheduler/dequeued/memory': 28,
- 'scheduler/enqueued': 28,
- 'scheduler/enqueued/memory': 28,
- 'start_time': datetime.datetime(2020, 7, 25, 1, 9, 10, 520905)}
-2020-07-25 09:13:36 [scrapy.core.engine] INFO: Spider closed (finished)
--- a/demo1/main.py
+++ b/demo1/main.py
@ -3,5 +3,5 @@ from scrapy.utils.project import get_project_settings

 if __name__ == '__main__':
    process = CrawlerProcess(get_project_settings())
-    process.crawl('chacewang')    #  你需要将此处的spider_name替换为你自己的爬虫名称
+    process.crawl('chacewangSpider')    #  你需要将此处的spider_name替换为你自己的爬虫名称
    process.start()
--- a/demo1/middlewares.py
+++ b/demo1/middlewares.py
@ -6,7 +6,7 @@ from scrapy.http import HtmlResponse
 from selenium.webdriver import Chrome
 from selenium.webdriver.chrome.options import Options
 from scrapy import signals
-
+import logging
 # useful for handling different item types with a single interface
 from itemadapter import is_item, ItemAdapter
 from scrapy.utils.project import get_project_settings as settings
@ -107,7 +107,7 @@ class Demo1DownloaderMiddleware:
 class RundomUserAgentMiddleware(object):
    list=settings().get('USER_AGENT_LIST')
    def __init__(self):
-        print('我是RundomUserAgentMiddleware')
+        logging.info('我是RundomUserAgentMiddleware')
    def process_request(self,request,spider):
        random_head=random.choice(self.list)
        request.headers['User-Agent'] = random_head
@ -133,6 +133,8 @@ class RundomProxyIPMiddleware(object):
 #定制cookie
 class DingZhiCookieMiddleware(object):
    city=settings().get('CURRENT_CITY')
+    def __init__(self):
+        logging.info('我是DingZhiCookieMiddleware')
    def process_request(self,request,spider):
        url=str(request._get_url())
        if ('/news/NewsDetail/'.lower() not in str(url).lower()) and ('cityCode'.lower() in str(url).lower()):
--- a/demo1/pipelines.py
+++ b/demo1/pipelines.py
@ -147,7 +147,7 @@ class ProcessMysqlPipeline(object):

    def handle_error(self, failure):
        # 处理异步插入时的异常
-        print(failure)
+        logging.info(failure)

    def do_insert(self, cursor, item):
        # 执行具体的插入
@ -182,7 +182,6 @@ class MysqlYiBUPipeline(object):
        # 连接数据池ConnectionPool，使用pymysql或者Mysqldb连接
        dbpool = adbapi.ConnectionPool('pymysql', **adbparams)
        # 返回实例化参数
-        print('我也不知道啊')
        return cls(dbpool)
    def get_date(self,items):
        print(items)
@ -219,7 +218,7 @@ class MysqlYiBUPipeline(object):
        #      'name1','name2','name3','name1','name2','name3','name1','name2','name3'
        #  ]
        #  }
-        print(item)
+        logging.info(item)
        if item['leixing']=='申报通知':
            item['leixing']=str(1)
        elif item['leixing']=='公示公告':
@ -252,7 +251,7 @@ class MysqlYiBUPipeline(object):
        insert_sql2='''
                    insert into t_policy(title,title_url,img_url,publish_depart_id,publish_time,type,content,intro) 
                                values('%s','%s','%s','%s','%s','%s','%s','%s')
-                    ''' % (str(item["biaoti"]),str(item['lianjie']),str(random.randint(0,9)),str(laiyuan_id),item["shijian"],item["leixing"],item["xiangqing"],item["jianjie"])
+                    ''' % (str(item["biaoti"]),str(item['lianjie']),get_project_settings().get('TITLE_IMAGE')+str(random.randint(0,9))+'.png',str(laiyuan_id),item["shijian"],item["leixing"],item["xiangqing"],item["jianjie"])

        sel_sql2='''
        select id from t_policy where title_url='%s'
@ -271,10 +270,10 @@ class MysqlYiBUPipeline(object):
            insert_sql4 = 'insert into t_policy_file_crawl(policy_id,file_name,file_url,file_location) values' + \
                          str((('("' + str(xinwen_id) + '","{}","{}","{}"),') * down_list_num).rstrip(',')).format(*item.get('wenjian'))
            cursor.execute(insert_sql4)
-        print('插入完成')
+        logging.info('插入完成')
    def handle_error(self, failure):
        if failure:
            # 打印错误信息
-            print('数据库插入异常信息--------：'+failure)
+            logging.info('数据库插入异常信息--------：'+failure)
    def close_spider(self, spider):
-        print('爬虫运行完毕了')
+        logging.info('爬虫运行完毕了')
--- a/demo1/settings.py
+++ b/demo1/settings.py
@ -9,7 +9,8 @@
 import datetime
 import random
 #如果要整个网站续爬就，把参数设置为False ，如果想整个网站都遍历一次那么就把参数改为True
-RGODIC=False
+RGODIC=True
+#scrapyd-deploy -p chacewang -v 0.0.0 --build-egg=noall_demo1.egg
 BOT_NAME = 'demo1'
 #我们文件下载的位置，这里我们就一个要求，我们得带上路径中最后的 / ，因为在代码中我们已经已经添加上了这个符号
 DOWNLOAD_FILE_PATH='E:/file/'
@ -147,13 +148,16 @@ MYSQL_PASSWORD = "sdfe@#$QW"
 MYSQL_CHARSET = "utf8"
 #日志
 current_day = datetime.datetime.now()
-LOG_ENABLED = False  # 启用日志，默认不启用
+LOG_ENABLED = True  # 启用日志，默认不启用
 LOG_ENCODING = 'utf-8'
 LOG_FILE = "logs/{}_{}_{}.log".format(current_day.year, current_day.month, current_day.day)
-LOG_LEVEL = "INFO"
-LOG_STDOUT = True  # 输出重定向至log日志，比如print
-
-
+LOG_LEVEL = "DEBUG"
+LOG_STDOUT = False  # 输出重定向至log日志，比如print
+#文件地址前缀
+FILE_PATH="http://49.232.6.143/file/download/know?path=/home/enterprise/staticrec/policy"
+#图片地址前缀
+MESSAGE="http://49.232.6.143/staticrec/policy"
+TITLE_IMAGE="http://49.232.6.143/staticrec/policy/image/"
 # 这个设置项的意思是遇到这些错误码就重新发送请求，但是如果错误码不在这里就不会重新请求，所以一定要填写所有需要重新请求的情况。
 # 如果想要遇到错误就忽略掉，从来都不重新请求，就把它设成等于[]就好了。
 #这个框架中自己带的重试方法，我们先得开启重试功能
--- a/demo1/spiders/pycache/chacewangSpider.cpython-37.pyc
+++ b/demo1/spiders/pycache/chacewangSpider.cpython-37.pyc
--- a/demo1/spiders/chacewangSpider.py
+++ b/demo1/spiders/chacewangSpider.py
@ -7,8 +7,9 @@ import datetime
 from demo1.items import Shouyelianjie
 from scrapy.utils.project import get_project_settings
 import pymysql
-class kjtSpider(scrapy.Spider):
-    name = 'chacewang'
+import logging
+class ChacewangSpider(scrapy.Spider):
+    name = 'chacewangSpider'
    settings=get_project_settings()
    '''
    太原：currentCity :15DB1C93-DBD4-42B2-ACDC-09891EDD9F80
@ -17,7 +18,7 @@ class kjtSpider(scrapy.Spider):
    晋中：currentCity :ACAC83A3-6809-4532-98A3-4EDE0F4D5FC7
    忻州：currentCity :18B4119A-8390-4233-BDC5-F01F66CF8804
    '''
-    allowed_domains = ['www.chacewang.com']
+    allowed_domains = ['chacewang.com']

    def __init__(self, name=None, **kwargs):
        self.db = pymysql.connect(
@ -118,7 +119,7 @@ class kjtSpider(scrapy.Spider):
                                         method='GET'
                                        )
                else:
-                    print('这个链接已经爬过了-----：'+url)
+                    logging.info('这个链接已经爬过了-----：'+url)
            else:
                #全爬
                yield scrapy.FormRequest(url=item['lianjie'],
@ -129,11 +130,11 @@ class kjtSpider(scrapy.Spider):
        try:
            req_url = response.url
            if len(news_list)==0:
-                print('url:'+response.url+'----没有发布过内容')
+                logging.info('url:'+response.url+'----没有发布过内容')
            else:
                next_urls = response.xpath('//*[@class="page-item "]')
                if len(next_urls)==0:
-                    print('url:'+response.url+'----这个就是一页啊')
+                    logging.info('url:'+response.url+'----这个就是一页啊')
                else:
                    next_url=next_urls[-2].xpath('.//a/@href').extract_first()
                    if 'javascript:void(0)' not in next_url:
@ -152,8 +153,8 @@ class kjtSpider(scrapy.Spider):
                                    method='GET')

        except Exception as e:
-            print(e)
-            print('没有下一页或者是下一页出错,应该是没有下一页')
+            logging.info(e)
+            logging.info('没有下一页或者是下一页出错,应该是没有下一页')
    def parse_url(self,response):
        item=response.meta['item']
        code=response.encoding
@ -214,7 +215,9 @@ class kjtSpider(scrapy.Spider):
            new_url = '/'+year + '/' + mouth + '/' + self.short_uuid() + '_' + houzui
            dange.append(new_url)
            if old_url !='_':
-               txt=txt.replace(old_url,new_url)#这里替换了a标签
+                #href="/2020/07/ksCN7yDS_国家科学技术学术著作出版基金项目资助申请指南（2021年度）.doc">国家科学技术学术著作出版基金项目资助申请指南（2021年度）</a>
+
+               txt=txt.replace(old_url,self.settings.get('FILE_PATH')+new_url)#这里替换了a标签
            j=j+1
            try:
                item['wenjian']+=dange
@ -232,7 +235,7 @@ class kjtSpider(scrapy.Spider):
            new_src1= '/'+year + '/' + mouth + '/' + self.short_uuid() + '_' + houzui1
            dange_tu.append(new_src1)
            if  '_' !=str(old_src):
-                txt=txt.replace(old_src,new_src1)
+                txt=txt.replace(old_src,self.settings.get('MESSAGE')+new_src1)
            try:
                item['wenjian']+=dange_tu
            except:
--- a/demo1/spiders/kjtSxSpider.py
+++ b/demo1/spiders/kjtSxSpider.py
@ -1,44 +0,0 @@
-import scrapy
-import Scripts
-from demo1.items import kejitingItem
-import datetime
-import time
-import random
-class kjtSpider(scrapy.Spider):
-    name = 'kejitingSxSpider'
-    start_urls = ['http://kjt.shanxi.gov.cn/public/index.jhtml']
-    allowed_domains = ['kjt.shanxi.gov.cn']
-    def __init__(self):
-        print('我是kjtSpider')
-    def jiexi_tr(self,tr,items):
-        result_list=[]
-        tds=tr.xpath('.//td')
-        items['xuhao'] = tds[0].xpath('.//text()')[0].extract()
-        items['mingcheng'] = tds[1].xpath('.//a/text()')[0].extract()
-        items['lianjie'] = tds[1].xpath('.//a/@href')[0].extract()
-        items['pushtime'] = tds[2].xpath('.//text()')[0].extract()
-        items['messwhere'] = tds[3].xpath('.//text()')[0].extract()
-        try:
-           items['wenhao'] = tds[4].xpath('.//text()')[0].extract()
-        except:
-           items['wenhao'] ='-'
-        items['paqutime']=str( datetime.date.today())
-        return items
-    def parse(self, response):
-        items=kejitingItem()
-        div=response.xpath('//body//div[@class="main"]/div[last()]')
-        trs=div.xpath('.//table//tr[not(@class="bgtitle")]')
-        for tr in trs:
-            items=self.jiexi_tr(tr,items)
-            yield items
-        next_url=response.xpath('//div[@class="page"]')
-
-        try:
-            a='http://kjt.shanxi.gov.cn/public/'+next_url.xpath('.//a[last()-1]/@href')[0].extract()
-            time.sleep(random.randint(3,6))
-            yield scrapy.Request(url=a)
-        except:
-            print('爬取完毕了')
-
-
-
--- a/noall_demo1.egg
+++ b/noall_demo1.egg
--- a/scrapy.cfg
+++ b/scrapy.cfg
@ -6,6 +6,6 @@
 [settings]
 default = demo1.settings

-[deploy:chachewang]
+[deploy:demo1]
 url = http://49.232.6.143:6800/
-project = demo1
+project = chacewang
--- a/292
+++ b/292
@ -1,292 +0,0 @@
-#!c:\e\python3\python3.exe
-
-import sys
-import os
-import glob
-import tempfile
-import shutil
-import time
-from six.moves.urllib.request import (build_opener, install_opener,
-                                      HTTPRedirectHandler as UrllibHTTPRedirectHandler,
-                                      Request, urlopen)
-from six.moves.urllib.error import HTTPError, URLError
-import netrc
-import json
-from optparse import OptionParser
-from six.moves.urllib.parse import urlparse, urljoin
-from subprocess import Popen, PIPE, check_call
-
-from w3lib.form import encode_multipart
-import setuptools # not used in code but needed in runtime, don't remove!
-
-from scrapy.utils.project import inside_project
-from scrapy.utils.http import basic_auth_header
-from scrapy.utils.python import retry_on_eintr
-from scrapy.utils.conf import get_config, closest_scrapy_cfg
-
-_SETUP_PY_TEMPLATE = \
-"""# Automatically created by: scrapyd-deploy
-
-from setuptools import setup, find_packages
-
-setup(
-    name         = 'project',
-    version      = '1.0',
-    packages     = find_packages(),
-    entry_points = {'scrapy': ['settings = %(settings)s']},
-)
-"""
-
-def parse_opts():
-    parser = OptionParser(usage="%prog [options] [ [target] | -l | -L <target> ]",
-        description="Deploy Scrapy project to Scrapyd server")
-    parser.add_option("-p", "--project",
-        help="the project name in the target")
-    parser.add_option("-v", "--version",
-        help="the version to deploy. Defaults to current timestamp")
-    parser.add_option("-l", "--list-targets", action="store_true", \
-        help="list available targets")
-    parser.add_option("-a", "--deploy-all-targets",action="store_true", help="deploy all targets")
-    parser.add_option("-d", "--debug", action="store_true",
-        help="debug mode (do not remove build dir)")
-    parser.add_option("-L", "--list-projects", metavar="TARGET", \
-        help="list available projects on TARGET")
-    parser.add_option("--egg", metavar="FILE",
-        help="use the given egg, instead of building it")
-    parser.add_option("--build-egg", metavar="FILE",
-        help="only build the egg, don't deploy it")
-    return parser.parse_args()
-
-def main():
-    opts, args = parse_opts()
-    exitcode = 0
-    if not inside_project():
-        _log("Error: no Scrapy project found in this location")
-        sys.exit(1)
-
-    install_opener(
-        build_opener(HTTPRedirectHandler)
-    )
-
-    if opts.list_targets:
-        for name, target in _get_targets().items():
-            print("%-20s %s" % (name, target['url']))
-        return
-
-    if opts.list_projects:
-        target = _get_target(opts.list_projects)
-        req = Request(_url(target, 'listprojects.json'))
-        _add_auth_header(req, target)
-        f = urlopen(req)
-        projects = json.loads(f.read())['projects']
-        print(os.linesep.join(projects))
-        return
-
-    tmpdir = None
-
-    if opts.build_egg: # build egg only
-        egg, tmpdir = _build_egg()
-        _log("Writing egg to %s" % opts.build_egg)
-        shutil.copyfile(egg, opts.build_egg)
-    elif opts.deploy_all_targets:
-        version = None
-        for name, target in _get_targets().items():
-            if version is None:
-                version = _get_version(target, opts)
-            _build_egg_and_deploy_target(target, version, opts)
-    else: # buld egg and deploy
-        target_name = _get_target_name(args)
-        target = _get_target(target_name)
-        version = _get_version(target, opts)
-        exitcode, tmpdir = _build_egg_and_deploy_target(target, version, opts)
-
-    if tmpdir:
-        if opts.debug:
-            _log("Output dir not removed: %s" % tmpdir)
-        else:
-            shutil.rmtree(tmpdir)
-
-    sys.exit(exitcode)
-
-def _build_egg_and_deploy_target(target, version, opts):
-    exitcode = 0
-    tmpdir = None
-
-    project = _get_project(target, opts)
-    if opts.egg:
-        _log("Using egg: %s" % opts.egg)
-        egg = opts.egg
-    else:
-        _log("Packing version %s" % version)
-        egg, tmpdir = _build_egg()
-    if not _upload_egg(target, egg, project, version):
-        exitcode = 1
-    return exitcode, tmpdir
-
-def _log(message):
-    sys.stderr.write(message + os.linesep)
-
-def _fail(message, code=1):
-    _log(message)
-    sys.exit(code)
-
-def _get_target_name(args):
-    if len(args) > 1:
-        raise _fail("Error: Too many arguments: %s" % ' '.join(args))
-    elif args:
-        return args[0]
-    elif len(args) < 1:
-        return 'default'
-
-def _get_project(target, opts):
-    project = opts.project or target.get('project')
-    if not project:
-        raise _fail("Error: Missing project")
-    return project
-
-def _get_option(section, option, default=None):
-    cfg = get_config()
-    return cfg.get(section, option) if cfg.has_option(section, option) \
-        else default
-
-def _get_targets():
-    cfg = get_config()
-    baset = dict(cfg.items('deploy')) if cfg.has_section('deploy') else {}
-    targets = {}
-    if 'url' in baset:
-        targets['default'] = baset
-    for x in cfg.sections():
-        if x.startswith('deploy:'):
-            t = baset.copy()
-            t.update(cfg.items(x))
-            targets[x[7:]] = t
-    return targets
-
-def _get_target(name):
-    try:
-        return _get_targets()[name]
-    except KeyError:
-        raise _fail("Unknown target: %s" % name)
-
-def _url(target, action):
-    return urljoin(target['url'], action)
-
-def _get_version(target, opts):
-    version = opts.version or target.get('version')
-    if version == 'HG':
-        p = Popen(['hg', 'tip', '--template', '{rev}'], stdout=PIPE, universal_newlines=True)
-        d = 'r%s' % p.communicate()[0]
-        p = Popen(['hg', 'branch'], stdout=PIPE, universal_newlines=True)
-        b = p.communicate()[0].strip('\n')
-        return '%s-%s' % (d, b)
-    elif version == 'GIT':
-        p = Popen(['git', 'describe'], stdout=PIPE, universal_newlines=True)
-        d = p.communicate()[0].strip('\n')
-        if p.wait() != 0:
-            p = Popen(['git', 'rev-list', '--count', 'HEAD'], stdout=PIPE, universal_newlines=True)
-            d = 'r%s' % p.communicate()[0].strip('\n')
-
-        p = Popen(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], stdout=PIPE, universal_newlines=True)
-        b = p.communicate()[0].strip('\n')
-        return '%s-%s' % (d, b)
-    elif version:
-        return version
-    else:
-        return str(int(time.time()))
-
-def _upload_egg(target, eggpath, project, version):
-    with open(eggpath, 'rb') as f:
-        eggdata = f.read()
-    data = {
-        'project': project,
-        'version': version,
-        'egg': ('project.egg', eggdata),
-    }
-    body, boundary = encode_multipart(data)
-    url = _url(target, 'addversion.json')
-    headers = {
-        'Content-Type': 'multipart/form-data; boundary=%s' % boundary,
-        'Content-Length': str(len(body)),
-    }
-    req = Request(url, body, headers)
-    _add_auth_header(req, target)
-    _log('Deploying to project "%s" in %s' % (project, url))
-    return _http_post(req)
-
-def _add_auth_header(request, target):
-    if 'username' in target:
-        u, p = target.get('username'), target.get('password', '')
-        request.add_header('Authorization', basic_auth_header(u, p))
-    else: # try netrc
-        try:
-            host = urlparse(target['url']).hostname
-            a = netrc.netrc().authenticators(host)
-            request.add_header('Authorization', basic_auth_header(a[0], a[2]))
-        except (netrc.NetrcParseError, IOError, TypeError):
-            pass
-
-def _http_post(request):
-    try:
-        f = urlopen(request)
-        _log("Server response (%s):" % f.code)
-        print(f.read().decode('utf-8'))
-        return True
-    except HTTPError as e:
-        _log("Deploy failed (%s):" % e.code)
-        resp = e.read().decode('utf-8')
-        try:
-            d = json.loads(resp)
-        except ValueError:
-            print(resp)
-        else:
-            if "status" in d and "message" in d:
-                print("Status: %(status)s" % d)
-                print("Message:\n%(message)s" % d)
-            else:
-                print(json.dumps(d, indent=3))
-    except URLError as e:
-        _log("Deploy failed: %s" % e)
-
-def _build_egg():
-    closest = closest_scrapy_cfg()
-    os.chdir(os.path.dirname(closest))
-    if not os.path.exists('setup.py'):
-        settings = get_config().get('settings', 'default')
-        _create_default_setup_py(settings=settings)
-    d = tempfile.mkdtemp(prefix="scrapydeploy-")
-    o = open(os.path.join(d, "stdout"), "wb")
-    e = open(os.path.join(d, "stderr"), "wb")
-    retry_on_eintr(check_call, [sys.executable, 'setup.py', 'clean', '-a', 'bdist_egg', '-d', d], stdout=o, stderr=e)
-    o.close()
-    e.close()
-    egg = glob.glob(os.path.join(d, '*.egg'))[0]
-    return egg, d
-
-def _create_default_setup_py(**kwargs):
-    with open('setup.py', 'w') as f:
-        f.write(_SETUP_PY_TEMPLATE % kwargs)
-
-
-class HTTPRedirectHandler(UrllibHTTPRedirectHandler):
-
-    def redirect_request(self, req, fp, code, msg, headers, newurl):
-        newurl = newurl.replace(' ', '%20')
-        if code in (301, 307):
-            return Request(newurl,
-                                   data=req.get_data(),
-                                   headers=req.headers,
-                                   origin_req_host=req.get_origin_req_host(),
-                                   unverifiable=True)
-        elif code in (302, 303):
-            newheaders = dict((k, v) for k, v in req.headers.items()
-                              if k.lower() not in ("content-length", "content-type"))
-            return Request(newurl,
-                                   headers=newheaders,
-                                   origin_req_host=req.get_origin_req_host(),
-                                   unverifiable=True)
-        else:
-            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
-
-
-if __name__ == "__main__":
-    main()
--- a/shiyan.txt
+++ b/shiyan.txt
@ -1 +0,0 @@
-下信息下下下