main.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. # coding=utf-8
  2. '''
  3. Created on 2017年7月15日
  4. @vsersion:python3.6
  5. @author: liuyuqi
  6. '''
  7. import csv
  8. import os
  9. from time import sleep
  10. import random
  11. from urllib import request
  12. project_dir = "C:/Users/dell/Desktop/xiaohua-crawl"
  13. img_dir = project_dir+"/images"
  14. data_dir = project_dir+"/data"
  15. def downloadImg(imgUrl, fileName):
  16. try:
  17. headers = {
  18. 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
  19. 'Referer': 'http://www.xiaohuar.com'
  20. }
  21. req = request.Request(url=imgUrl)
  22. for i in headers:
  23. req.add_header(i, headers[i])
  24. res = request.urlopen(req)
  25. with open(img_dir+"/"+fileName+imgUrl[-4:], "wb") as code:
  26. code.write(res.read())
  27. # sleep(random.randint(1,5))
  28. except Exception as err:
  29. print(err)
  30. finally:
  31. print("pic:" + fileName+".jpg")
  32. def __init__():
  33. if(os.path.exists(img_dir) != True):
  34. os.mkdir(img_dir)
  35. if(os.path.exists(data_dir) != True):
  36. os.mkdir(data_dir)
  37. def main():
  38. file = data_dir+"/result.csv"
  39. with open(file, 'r') as f:
  40. # data=csv.reader(f, csv.excel_tab)
  41. data = csv.reader(f)
  42. for row in data:
  43. imgUrl = ""
  44. fileName = ""
  45. for i in range(len(row)):
  46. fileName = row[4]+"-"+row[3]
  47. imgUrl = "http://www.xiaohuar.com"+row[2]
  48. downloadImg(imgUrl, fileName)
  49. __init__()
  50. main()