taobao_sale.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Dec 13 15:35:26 2018
  5. @author: yura
  6. """
  7. import requests
  8. import re
  9. from bs4 import BeautifulSoup
  10. import json
  11. import pandas as pd
  12. import time
  13. #修改输入、输出文件名称
  14. sale=[]
  15. df=pd.read_excel('口红ID.xlsx')
  16. df=df[600:]
  17. headers = {
  18. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
  19. 'Referer':'https://www.taobao.com/',
  20. 'Connection':'keep-alive'}
  21. cookies={'cookie':''}
  22. url='https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?jsv=2.5.0&appKey=12574478&t=1545890324697&sign=b3019d1cfa9fca53e96c2dce375af631&api=mtop.taobao.detail.getdetail&v=6.0&ttid=2018%40taobao_iphone_9.9.9&utdid=123123123123123&isSec=0&ecode=0&AntiFlood=true&AntiCreep=true&H5Request=true&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22itemNumId%22%3A%22{a}%22%2C%22exParams%22%3A%22%7B%5C%22id%5C%22%3A%5C%22{b}%5C%22%2C%5C%22abtest%5C%22%3A%5C%2214%5C%22%2C%5C%22rn%5C%22%3A%5C%2288c67406326ce50a3b7c45a84fd373f8%5C%22%2C%5C%22sid%5C%22%3A%5C%221358529dab8ea682d11893feec138136%5C%22%7D%22%2C%22detail_v%22%3A%223.1.1%22%2C%22ttid%22%3A%222018%40taobao_iphone_9.9.9%22%2C%22utdid%22%3A%22123123123123123%22%7D'
  23. #url='https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?jsv=2.5.0&appKey=12574478&t=1545097454728&sign=7a334fe28be77a280b6039cf82f9ec5a&api=mtop.taobao.detail.getdetail&v=6.0&ttid=2018%40taobao_iphone_9.9.9&utdid=123123123123123&isSec=0&ecode=0&AntiFlood=true&AntiCreep=true&H5Request=true&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22itemNumId%22%3A%22{}%22%2C%22exParams%22%3A%22%7B%5C%22id%5C%22%3A%5C%22{}%5C%22%2C%5C%22abtest%5C%22%3A%5C%2215%5C%22%2C%5C%22rn%5C%22%3A%5C%22fce108850a452024b097fba77551c041%5C%22%2C%5C%22sid%5C%22%3A%5C%2210bedb1db0e75ee9d5060821336dde24%5C%22%7D%22%2C%22detail_v%22%3A%223.1.1%22%2C%22ttid%22%3A%222018%40taobao_iphone_9.9.9%22%2C%22utdid%22%3A%22123123123123123%22%7D'
  24. k=0
  25. for id in df['商品编号']:
  26. k=k+1
  27. print('正在爬取第{}个产品:{}'.format(k,id),)
  28. time.sleep(1)
  29. full_url=url.format(a=id,b=id)
  30. res=requests.get(full_url,timeout=20)
  31. res.encoding='utf-8'
  32. html=res.text
  33. data=re.findall(".*sellCount(.*)vagueSellCount.*",html)
  34. if(len(data)):
  35. sale.append(data[0][5:-5])
  36. else:
  37. sale.append('未找到')
  38. df['销量']=sale
  39. df.to_excel('口红allinfo.xlsx')