conf.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. @Auther :liuyuqi.gov@msn.cn
  5. @Time :2018/4/11 14:58
  6. @File :conf.py
  7. '''
  8. import pymysql
  9. import traceback
  10. import configparser
  11. # 站点配置类
  12. class Conf(object):
  13. def __init__(self, id, name, url, urltag, restr, newstag):
  14. self.id = id
  15. self.name = name # 抓取的网站名
  16. self.url = url # 抓取的链接
  17. self.urltag = urltag # url列表div对应的class 属性名
  18. self.restr = restr # 匹配文章链接的正则表达式
  19. self.newstag = newstag # 抓取文章内容时包含文章内容的div对应的class属性名
  20. # 用户配置类
  21. class User(object):
  22. def __init__(self, website, username, password):
  23. self.website = website # 用户站点
  24. self.username = username # 站点用户名
  25. self.password = password # 站点密码
  26. class Db(object):
  27. def __init__(self, db_host, db_port, db_user, db_passwd):
  28. self.db_host = db_host
  29. self.db_port = db_port
  30. self.db_user = db_user
  31. self.db_passwd = db_passwd
  32. class Email(object):
  33. def __init__(self, mail_user, mail_postfix, sender, receiver, smtpserver, message, subject, username, password):
  34. self.mail_user = mail_user
  35. self.mail_postfix = mail_postfix
  36. self.sender = sender
  37. self.receiver = receiver
  38. self.smtpserver = smtpserver
  39. self.message = message
  40. self.subject = subject
  41. self.username = username
  42. self.password = password
  43. # 读取数据库配置文件
  44. def readDBConf():
  45. cf = configparser.ConfigParser()
  46. cf.read('wp.conf')
  47. db_host = cf.get("db", "db_host")
  48. db_port = cf.getint("db", "db_port")
  49. db_user = cf.get("db", "db_user")
  50. db_passwd = cf.get("db", "db_passwd")
  51. db = Db(db_host, db_port, db_user, db_passwd)
  52. return db
  53. # 读取WordPress站点配置
  54. def readUserConf():
  55. cf = configparser.ConfigParser()
  56. cf.read('wp.conf')
  57. website = cf.get('web', 'website')
  58. username = cf.get('web', 'username')
  59. password = cf.get('web', 'password')
  60. user = User(website, username, password)
  61. return user
  62. # 读取email配置
  63. def readEmailConf():
  64. cf = configparser.ConfigParser()
  65. cf.read('wp.conf')
  66. mail_user = cf.get('email', 'mail_user')
  67. mail_postfix = cf.get('email', 'mail_postfix')
  68. sender = cf.get('email', 'sender')
  69. receiver = cf.get('email', 'receiver')
  70. smtpserver = cf.get('email', 'smtpserver')
  71. message = cf.get('email', 'message')
  72. subject = cf.get('email', 'subject')
  73. username = cf.get('email', 'username')
  74. password = cf.get('email', 'password')
  75. email = Email(mail_user, mail_postfix, sender, receiver, smtpserver, message, subject, username, password)
  76. return email
  77. # 从数据库加载配置
  78. # user:数据库用户名
  79. # passwd:数据库密码
  80. def getConf():
  81. db = readDBConf()
  82. conn = pymysql.connect(host=db.db_host, port=db.db_port, user=db.db_user, passwd=db.db_passwd,
  83. database='mywpspider')
  84. cur = conn.cursor()
  85. cur.execute('select * from spider where state=0')
  86. l = cur.fetchall()
  87. confList = []
  88. for s in l:
  89. id = s[0]
  90. name = s[1]
  91. url = s[2]
  92. urltag = s[3]
  93. restr = s[4]
  94. newstag = s[5]
  95. spider = Conf(id, name, url, urltag, restr, newstag)
  96. confList.append(spider)
  97. cur.close()
  98. conn.close()
  99. return confList