Browse Source

查策网爬虫

master
Wang0018 5 years ago
parent
commit
dc88f9082d
  1. 6
      .idea/inspectionProfiles/profiles_settings.xml
  2. 7
      .idea/misc.xml
  3. 8
      .idea/modules.xml
  4. 6
      .idea/vcs.xml
  5. 11
      .idea/wiki_enterprise_python.iml
  6. 43
      .idea/workspace.xml
  7. 2
      spiders/chacewangSpider.py

6
.idea/inspectionProfiles/profiles_settings.xml

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/wiki_enterprise_python.iml" filepath="$PROJECT_DIR$/.idea/wiki_enterprise_python.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

11
.idea/wiki_enterprise_python.iml

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Twisted Trial" />
</component>
</module>

43
.idea/workspace.xml

@ -0,0 +1,43 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="12c573dd-d4cf-4126-8539-b1118032a999" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/spiders/chacewangSpider.py" beforeDir="false" afterPath="$PROJECT_DIR$/spiders/chacewangSpider.py" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="ProjectId" id="1ezVrogDFCxxffK6pR1Z8s0pDLw" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showExcludedFiles" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
<property name="WebServerToolWindowFactoryState" value="false" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="12c573dd-d4cf-4126-8539-b1118032a999" name="Default Changelist" comment="" />
<created>1595324609961</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1595324609961</updated>
<workItem from="1595324611167" duration="65000" />
</task>
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" />
</component>
</project>

2
spiders/chacewangSpider.py

@ -100,6 +100,8 @@ class kjtSpider(scrapy.Spider):
cursor=self.db.cursor()
cursor.execute('select count(title_url) as nums FROM t_policy where title_url ="https://www.chacewang.com/news/NewsDetail/22014"')
res=cursor.fetchall()[0].get('nums')
# 这里应该加一个值,判断次数而且是首页
if res!=0:
yield scrapy.FormRequest(url=item['lianjie'],
meta={'item': copy.deepcopy(item)},

Loading…
Cancel
Save