123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- # coding=utf-8
- '''
- Created on 2017年7月15日
- @vsersion:python3.6
- @author: liuyuqi
- '''
- import csv
- import os
- from time import sleep
- import random
- from urllib import request
- project_dir = "C:/Users/dell/Desktop/xiaohua-crawl"
- img_dir = project_dir+"/images"
- data_dir = project_dir+"/data"
- def downloadImg(imgUrl, fileName):
- try:
- headers = {
- 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
- 'Referer': 'http://www.xiaohuar.com'
- }
- req = request.Request(url=imgUrl)
- for i in headers:
- req.add_header(i, headers[i])
- res = request.urlopen(req)
- with open(img_dir+"/"+fileName+imgUrl[-4:], "wb") as code:
- code.write(res.read())
- # sleep(random.randint(1,5))
- except Exception as err:
- print(err)
- finally:
- print("pic:" + fileName+".jpg")
- def __init__():
- if(os.path.exists(img_dir) != True):
- os.mkdir(img_dir)
- if(os.path.exists(data_dir) != True):
- os.mkdir(data_dir)
- def main():
- file = data_dir+"/result.csv"
- with open(file, 'r') as f:
- # data=csv.reader(f, csv.excel_tab)
- data = csv.reader(f)
- for row in data:
- imgUrl = ""
- fileName = ""
- for i in range(len(row)):
- fileName = row[4]+"-"+row[3]
- imgUrl = "http://www.xiaohuar.com"+row[2]
- downloadImg(imgUrl, fileName)
- __init__()
- main()
|