utils.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. @Auther :liuyuqi.gov@msn.cn
  5. @Time :2018/4/11 14:59
  6. @File :utils.py
  7. '''
  8. from urllib.request import urlretrieve
  9. from wordpress_xmlrpc.compat import xmlrpc_client
  10. from wordpress_xmlrpc.methods import media
  11. from email.mime.text import MIMEText
  12. from email.header import Header
  13. import smtplib
  14. import traceback
  15. import os,re
  16. from conf import readEmailConf
  17. # 根据url 获取主机名
  18. def getHost(url):
  19. reg = r'^https?:\/\/([a-z0-9\-\.]+)[\/\?]?'
  20. m = re.match(reg, url)
  21. uri = m.groups()[0] if m else ''
  22. host=uri[uri.rfind('.', 0, uri.rfind('.')) + 1:]
  23. return host
  24. #加载user_agents配置文件
  25. def load_user_agent():
  26. user_agents=[]
  27. fp = open('user_agents', 'r')
  28. line = fp.readline().strip('\n')
  29. while(line):
  30. user_agents.append(line)
  31. line = fp.readline().strip('\n')
  32. fp.close()
  33. return user_agents
  34. #下载图片
  35. '''
  36. 将图片保存到本地
  37. '''
  38. def get_image(image_url,image_name):
  39. os.makedirs('images',exist_ok=True)
  40. #print('下载了--->'+image_name)
  41. urlretrieve(image_url,'images/'+image_name)
  42. #上传图片
  43. '''
  44. 根据图片路径将图片上传到wordpress
  45. 返回attachment_id
  46. '''
  47. def upload_image(image_name,client):
  48. data={
  49. 'name':image_name,
  50. 'type':'image/jpeg'
  51. }
  52. with open('images/'+image_name, 'rb') as img:
  53. data['bits'] = xmlrpc_client.Binary(img.read())
  54. response = client.call(media.UploadFile(data))
  55. #print('上传了--->'+image_name)
  56. attachment_id = response['id']
  57. return attachment_id
  58. #将文章标题写入文件
  59. def write_file(str_title):
  60. with open('title.txt','a') as f:
  61. f.write(str_title)
  62. #发送电子邮件
  63. '''
  64. mail_user :发送者名称
  65. mail_postfix:邮箱后缀
  66. sender :发送者
  67. receiver :接收者(可以设置为139邮箱)
  68. smtpserver :smtp服务器地址
  69. message :消息
  70. subject :主题
  71. username :用户名
  72. password :密码
  73. example: 以新浪邮箱为例
  74. send_email('user','sina.com','user@sina.com','xxxx@qq.com','smtp.sina.com','您的爬虫出现异常\n'+m,'wpspider','user@sina.com','abc123')
  75. '''
  76. def send_email(m):
  77. email=readEmailConf()
  78. try:
  79. msg=MIMEText(email.message+m,'plain','utf-8')
  80. me="Wpspider"+"<"+email.mail_user+"@"+email.mail_postfix+">"
  81. msg['From']=Header(me)
  82. msg['Subject']=Header(email.subject,'utf-8')
  83. smtp = smtplib.SMTP()
  84. smtp.connect(email.smtpserver)
  85. smtp.login(email.username,email.password)
  86. smtp.sendmail(email.sender, email.receiver, msg.as_string())
  87. smtp.quit()
  88. print ("邮件发送成功")
  89. except smtplib.SMTPException as e:
  90. print ("Error: 无法发送邮件")