1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- '''
- @Auther :liuyuqi.gov@msn.cn
- @Time :2018/4/11 14:57
- @File :db-manager.py
- '''
- import pymysql
- import traceback
- import sys
- from conf import readDBConf
- #创建数据库
- def createDB():
- db=readDBConf()
- conn=pymysql.connect(host=db.db_host,port=db.db_port,user=db.db_user,passwd=db.db_passwd)
- cur=conn.cursor()
- cur.execute('CREATE DATABASE IF NOT EXISTS mywpspider DEFAULT CHARSET utf8 COLLATE utf8_general_ci')
- cur.execute('use mywpspider')
- createtablesql="""CREATE TABLE spider(
- id int(11) NOT NULL AUTO_INCREMENT PRIMARY KEY,
- name varchar(50) DEFAULT NULL,
- url varchar(256) NOT NULL,
- urlstag varchar(255) NOT NULL,
- restr varchar(255) NOT NULL,
- newstag varchar(255) NOT NULL,
- state int(11) NOT NULL DEFAULT 0)ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8"""
- cur.execute(createtablesql)
- cur.close()
- conn.close()
- #增加爬取站点配置
- def addConf():
- db=readDBConf()
- conn=pymysql.connect(host=db.db_host,port=db.db_port,user=db.db_user,passwd=db.db_passwd,database='mywpspider')
- cur=conn.cursor()
- name=input('请输入抓取的站点名称:')
- url=input('请输入站点链接:')
- urlstag=input('请输入包含文章链接列表的div class属性的值:')
- restr=input('请输入匹配文章链接的正则表达式:')
- newstag=input('请输入站点包含新闻的 div class 属性值:')
- insertsql="insert into spider (name,url,urlstag,restr,newstag) values ('%s','%s','%s','%s','%s')"%(name,url,urlstag,restr,newstag)
- print(insertsql)
- try:
- cur.execute(insertsql)
- conn.commit()
- except Exception as e:
- m=traceback.format_exc()
- print(m)
- conn.rollback()
- cur.close()
- conn.close()
- #执行命令行参数
- def executeCommand():
- methodname=sys.argv[1]
- print(methodname)
- if methodname=='createDB':
- createDB()
- elif methodname=='addConf':
- addConf()
- else:
- print('输入错误')
- if __name__ == '__main__':
- executeCommand()
|