1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- #!/usr/bin/env python
- # -*- encoding: utf-8 -*-
- '''
- @Author : liuyuqi
- @Contact : liuyuqi.gov@msn.cn
- @Time : 2020/04/06 21:18:40
- @Version : 1.0
- @License : Copyright © 2017-2020 liuyuqi. All Rights Reserved.
- @Desc : 天猫爬取商品评论
- '''
- # 导入所需库
- import pandas as pd
- import requests
- import re
- import time
- import json
- from jsonpath import jsonpath
- from pprint import pprint
- df_all = pd.DataFrame()
- for i in range(1, 101):
- true_url = "https://rate.tmall.com/list_detail_rate.htm?itemId=598614273525&spuId=0&sellerId=3965833216&order=3¤tPage={}&append=0&content=1&tagId=&posi=&picture=&groupId=&ua=098%23E1hvFvvpvoUvUvCkvvvvvjiPn25y0jtRRsFh6jEUPmPwzj3URFdWtjibRLFOtjrPdphvmpmvqRkJvvvWUghCvCWpvREwsDsNzYGUTnAYAZrqv6ruRphvCvvvphvPvpvhvv2MMQhCvvOv9hCvvvvEvpCWvhDi4Bz6VXu4hAx%2F0jZ7%2Bu0Owmz6%2Ff8r58t%2Bm7zydigXe5xLD76fd34AVAllY2%2FAdXQaWXxr58TJ%2B3%2BuQjZL%2Bu6fjLVxfBKK5FGDNdyCvm9vvhCvvvvvvvvvBJZvvUVavvCHtpvv9ZUvvhcDvvmCp9vvBJZvvUHmuphvmvvvpLvEk6nskphvC9hvpyP9Q8wCvvpvvhHh3QhvCvmvphmrvpvEvvEN7GOvvvExRphvCvvvphmrvpvEvvjKMngvvEbp9phvHnMS01gH7rMNz15bMH1btqjN%2FnsvRphvCvvvphv%3D&needFold=0&_ksTS=1585965084590_619&callback=jsonp620".format(i)
- headers ={
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
- 'referer': 'https://detail.tmall.com/item.htm?spm=a230r.1.14.6.62982a9d1ZzQky&id=598614273525&cm_id=140105335569ed55e27b&abbucket=10&skuId=4416255191724',
- 'cookie': 'cna=6aIDF0UBwyMCAd9IWWP0ve2T; hng=CN%7Czh-CN%7CCNY%7C156; _m_h5_tk=352faf1e2c7459bb9be6dcfb1a686859_1585650890558; _m_h5_tk_enc=f98da3ebe54c27135860fad3eb0dd30f; enc=7ukbCamgQiUCkbWFKHSyWs3%2FuaYf2BwLnE%2FrqYLwkifjMrzkW7Z9o9ZnyLldLtq72TZm4k67jLO3g3pY8WCUTg%3D%3D; t=fc84a8e7475ba65cba01390893141481; tracknick=%5Cu738B%5Cu771F%5Cu8FBE; lgc=%5Cu738B%5Cu771F%5Cu8FBE; _tb_token_=ee45eb07d7e8a; cookie2=108528103da2fb86166fe94f4ec6d16c; x5sec=7b22726174656d616e616765723b32223a223664363835376639313134633235396364656133353961346431343332383436434f4c4c6e2f514645497659677553626973487066513d3d227d; dnk=%5Cu738B%5Cu771F%5Cu8FBE; uc1=cookie15=VFC%2FuZ9ayeYq2g%3D%3D&existShop=false&tag=8&pas=0&cookie16=UtASsssmPlP%2Ff1IHDsDaPRu%2BPw%3D%3D&lng=zh_CN&cookie14=UoTUP2uetwoTUg%3D%3D&cookie21=V32FPkk%2FgPzW; uc3=id2=W8g1q36CK3mT&nk2=rpB%2B19XZ&lg2=URm48syIIVrSKA%3D%3D&vt3=F8dBxdAXsSeyaM9RvMI%3D; _l_g_=Ug%3D%3D; uc4=nk4=0%40rMpGHW%2BbjZbmUWy3pSf%2FTjQ%3D&id4=0%40WeuSdm3pqpKqyJu1mxC59Ozb5YU%3D; unb=816423751; cookie1=VvaOTBfnXLrXE%2FHlUE7SD0YJwBoh4uUMhGIHJ9cs6KA%3D; login=true; cookie17=W8g1q36CK3mT; _nk_=%5Cu738B%5Cu771F%5Cu8FBE; sgcookie=E61hfOKws3lgRAqU1v%2Fvg; sg=%E8%BE%BE19; csg=0a7d2065; l=dBM9vnp4Qcg5Mq6wBOfgCkjmkJ_t6IRf1sPzt2XL0ICP_H5JLsJNWZfuxtTvCnGVn6y6R35mgkfgBjLtxy4EhZXRFJXn9MpOLd8h.; isg=BNPTDT2niX35UEVSwcvpZWTrYlf9iGdK353nFoXw2_JOBPCmDVoIm3gSPnRqpL9C'
- }
- # 发起请求
- data = requests.get(true_url, headers=headers).text
- # 提取内容
- json_data = re.findall(r'jsonp620\((.*)\)', data)[0]
- # 解析数据
- js_data = json.loads(json_data)
- # 获取数据
- UserNick = jsonpath(js_data, '$..rateList..displayUserNick')
- comment_time = jsonpath(js_data, '$..rateList..rateDate')
- content = jsonpath(js_data, '$..rateList..rateContent')
- auctionSku = jsonpath(js_data, '$..rateList..auctionSku')
- df_one = pd.DataFrame({
- 'UserNick': UserNick,
- 'comment_time': comment_time,
- 'content': content,
- 'auctionSku': auctionSku
- })
- # 循环追加
- df_all = df_all.append(df_one, ignore_index=True)
- # 休眠5秒
- time.sleep(5)
- # 打印进度
- print('我正在获取第{}页的信息'.format(i))
- # 读出数据
- df_all.to_excel('data/李子柒螺蛳粉评论.xlsx', index=False)
|