123456789101112131415161718192021222324252627282930313233 |
- #!/usr/bin/env python
- # -*- encoding: utf-8 -*-
- '''
- @File : get_face.py
- @Time : 2019/05/16 05:44:10
- @Author : Liuyuqi
- @Version : 1.0
- @Contact : liuyuqi.gov@msn.cn
- @License : (C)Copyright 2019
- @Desc : 抓取用户头像,User表中头像字段导出后下载即可。
- '''
- import urllib
- import re
- import os, sys
- src = "C:/Users/liuyuqi/Desktop/crawl_bilibili"
- os.chdir(src)
- sys.path.append(src)
- f = open("data/bilibili_user_face.txt")
- line = f.readline()
- for i in range(1, 1000):
- print(line,)
- if re.match('http://static.*', line):
- line = f.readline()
- print('noface:' + str(i))
- else:
- path = r"../data/face/" + str(i) + ".jpg"
- data = urllib.request.urlretrieve(line, path) #下载到指定位置
- line = f.readline()
- print('succeed:' + str(i))
- f.close()
|