|
- #!usr/bin/python
- import json
- import time
- import random
- import datetime
- import requests
- import pandas as pd
- import matplotlib as mpl
- import matplotlib.pyplot as plt
- from tkinter import *
- from urllib import error
- from urllib.parse import *
- from lxml import etree
- from requests.packages import urllib3
- from matplotlib import pyplot
- import xlrd
- import socket
- from time import ctime
- mpl.rcParams['font.sans-serif'] = ['SimHei']
- plt.rcParams['axes.unicode_minus']=False
- now_date = time.strftime("%m-%d", time.localtime(time.time()))
- now_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
- phone_list = [
- 18303517744,
- 13613416611,
- 15219466201,
- 15036222256,
- 18438888133,
- 18876941131,
- 18876622089,
- 18889262767,
- 13715150077,
- 13717033838,
- 18351078990,
- 13467719111,
- 15997693333,
- 13730600607,
- 13699051071,
- 13849038741,
- 18352936688,
- 13880888292,
- 18822441999,
- 15777770130,
- 15777772845,
- 13727693111,
- 13632577333,
- 15976856868,
- 18222167181,
- 13512960022,
- 13530102266,
- 18300666187,
- 15824817777,
- 18349333171,
- 13838555227,
- 15890005577,
- 15890008887,
- 13838200888,
- 13924853168,
- 18822488887,
- 13972911999,
- 13428334566,
- 13566102222,
- 13732097555,
- 15233333323,
- 13682987828,
- 13923918859,
- 18859981392,
- 15818692899,
- 15012563066,
- 18222522000,
- 13828716737,
- 13692298935,
- 13706053195,
- 13887441413,
- 18322040999,
- 13911336673,
- 13801391870,
- 13433196988,
- 13702485588,
- 13924578588,
- 13924852345,
- 18823143456,
- 13637666699,
- 13755630022,
- 13920593529,
- 18702888838,
- 15198120000,
- 13908057178,
- 18844227188,
- 18750468844,
- 13505952075,
- 15768179999,
- 18356194521,
- 13696754521,
- 13788829706,
- 15208275054,
- 18777770214,
- 13551275898,
- 18280151115,
- 13677777254,
- 18769721000,
- 18897777726,
- 15814226133,
- 15918128980,
- 15918129083,
- 15918129282,
- 15918129090,
- 18300077779,
- 15022277000,
- 15875766666,
- 18428088892,
- 15703382298,
- 15131712232,
- 15732922520,
- 13874677777,
- 18255555551,
- 18393897777,
- 15180222225,
- 13505740467,
- 13780390000,
- 18859567892,
- 15277775445,
- 13662688881,
- 18213777222,
- 13761746746,
- 15000505062,
- 14761188884,
- 13809070207,
- 13818357698,
- 13873179698,
- 18817871288,
- 15112998888,
- 15703361816,
- 15290911121,
- 15107555885,
- 18396217171,
- 13825876548,
- 13619870320,
- 13778891234,
- 13548291222,
- 18282200022,
- 18402898980,
- 18328025788,
- 15228886138,
- 17878781118,
- 15123888444,
- 15837182792,
- 15838125087,
- 18703896718,
- 18736011629,
- 18839781750,
- 18837170569,
- 15777776964,
- 18761755000,
- 18751373210,
- 15962711155,
- 15962792088,
- 18761755088,
- 13656291113,
- 18862779378,
- 15190971978,
- 13777888585,
- 15068936333,
- 15204025988,
- 13654059991,
- 15775677700,
- 13684218789,
- 15281898765,
- 13616202666,
- 18751126999,
- 13812920788,
- 13809055222,
- 13962350777,
- 18353240966,
- 18853296464,
- 17839929705,
- 18838967382,
- 18749418806,
- 15093239328,
- 15188349522,
- 18236956924,
- 18348405579,
- 15093334268,
- 13505647555,
- 15220525678,
- 15020050513,
- 15020030417,
- 15267701717,
- 15088931331,
- 15906878938,
- 13646514938,
- 13706636314,
- 18867793298,
- 13739742666,
- 15731102345,
- 13859652222,
- 18232102678,
- 13601261337,
- 15231099666,
- 18337728521,
- 15203802168,
- 18331758666,
- 18736599499,
- 13930109099,
- 15738888289,
- 15738888538,
- 15738888576,
- 15738888697,
- 15738888963,
- 13797904444,
- 15243191111,
- 18405311888,
- 18405311888,
- 13791080000,
- 13791080000,
- 13908376207,
- 13908335110,
- 13908374332,
- 18702397333,
- 18702379555,
- 15922584000,
- 13783666664,
- 18335392777,
- 15217430000,
- 15992225679,
- 13585510688,
- 15818991889,
- 17806722226,
- 13536565653,
- 18738651999,
- 18388555511,
- 15825022222,
- 15882234084,
- 13776268888,
- 15018310888,
- 15113133313,
- 13701097729,
- 15726835666,
- 15058299222,
- 15118444415,
- 18820300009,
- 18825700007,
- 13829111788,
- 13825766788,
- 13480423333,
- 13711888886,
- 13532923333,
- 13825737888,
- 13537328888,
- 13686678888,
- 13538345678,
- 15016967488,
- 15917735557,
- 15217104555,
- 15917669777,
- 15017888444,
- 15931390000,
- 15267180777,
- 15068793333,
- 18335156789,
- 13835175177,
- 18202468383,
- 13926787833,
- 15815100303,
- 15892056631,
- 13599305858,
- 13616979898,
- 13511100900,
- 13786766667,
- 13686868538,
- 13632878899,
- 13883038222,
- 18838200011,
- 13911672661,
- 13521935222,
- 13802289678,
- 13728888822,
- 13801507158,
- 15093939323,
- 15160299539,
- 18831119031,
- 13974259999,
- 15807539093,
- 15023669066,
- 13785811099,
- 18716433334,
- 18834845999,
- 13507170130,
- 13507115301,
- 13995588392,
- 13657247111,
- 17839999122,
- 17839993883,
- 13807196657,
- 13807197319,
- 13807198517,
- 13807153256,
- 13807190231,
- 13908631578,
- 13908863082,
- 18822858108,
- 13510308789,
- 13510102070,
- 18419521214,
- 13877853333,
- 18351203222,
- 18261197555,
- 15815285757,
- 15261115522,
- 13903173981,
- 15132755552,
- 15019677099,
- 18862192899,
- 13678863811,
- 13983652278,
- 13856977511,
- 13589966223,
- 18337623210,
- 13979673333,
- 15007927777,
- 18837744446,
- 13950654999,
- 13861186488,
- 18870000005,
- 15158172221,
- 15824107733,
- 13790746666,
- 15802648889,
- 13808322226,
- 15823513000,
- 18883190766,
- 18883298278,
- 18375801115,
- 18375702233,
- 15023871222,
- 13779033333,
- 18872855555,
- 18270003333,
- 18886889988,
- 15777777783,
- 18881111115,
- 18882888802,
- 14799448888,
- 13688819128,
- 13688819693,
- ]
- user_agent_m = [
- 'Mozilla/5.0 (Linux; Android 8.1; PAR-AL00 Build/HUAWEIPAR-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044304 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/tools',
- 'Mozilla/5.0 (Linux; Android 8.1; EML-AL00 Build/HUAWEIEML-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.143 Crosswalk/24.53.595.0 XWEB/358 MMWEBSDK/23 Mobile Safari/537.36 MicroMessenger/6.7.2.1340(0x2607023A) NetType/4G Language/zh_CN',
- 'Mozilla/5.0 (Linux; Android 8.0; MHA-AL00 Build/HUAWEIMHA-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044304 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/4G Language/zh_CN Process/tools',
- 'Mozilla/5.0 (Linux; Android 5.1.1; vivo X6S A Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044207 Mobile Safari/537.36 MicroMessenger/6.7.3.1340(0x26070332) NetType/4G Language/zh_CN Process/tools'
- ]
- ip_origin = ["中国联通", "中国移动", "中国电信"]
- c_devmodel_list = ['Mate10', 'P8青春', '荣耀7i', '畅玩7A', '荣耀8XMax',
- 'Mate10Pro', '荣耀10', 'M3青春', '荣耀8青春']
- class CrawlCompareWeb:
- """
- 比价网反爬严格,考虑换ip突破,此条有待考证
- 另一个查询历史价格接口:http://tool.manmanbuy.com/history.aspx?DA=1&action=gethistory&url=http%3a%2
- f%2fitem.tmall.com%2fitem.htm%3fid%3d532034800285&bjid=&spbh=&cxid=&zkid=&w=350&token=yva7088d209cdc
- bbbf30e6af9cf24005ce2dx
- 破解token就可以
- """
- def __init__(self, search_words, writer):
- self.start_url = "https://apapia-search.manmanbuy.com/index_json.ashx"
- self.decode_type = "utf-8"
- self.total_page = None
- self.writer = writer
- self.words = search_words
- self.search_words = quote(
- search_words, encoding=self.decode_type, errors="replace"
- )
- self.headers = {
- "Host": "apapia-search.manmanbuy.com",
- "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
- "Proxy-Connection": "close",
- "Cookie": "ASP.NET_SessionId=5nm1vf35xt2eisuhe2k0rm33; jjkcpnew111=cp98576765_1063811521_2018/9/26",
- "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_4 like Mac OS X) AppleWebKit/605.1.15 "
- "(KHTML, like Gecko) Mobile/15F79 mmbWebBrowse",
- "Content-Length": "523",
- "Accept-Encoding": "gzip",
- "Connection": "close",
- }
- self.data = "jsoncallback=%3F&c_devmodel=iPhone%207&f1=&c_win=w_375_h_667&c_devid=C5707B0E-7A25-4BDF-BDF4-C64F8" "1711CAB&c_devtype=phone&f2=&key={}&iszy=&f3=&c_dp=2&f4=&c_devtoken=&c_channel=AppStore&f5=&" "smallclass=&f6=&methodName=getsearchkeylist&username=&c_operator=%E4%B8%AD%E5%9B%BD%E8%81%94%E" "9%80%9A&price2=&c_ostype=ios&c_engver=1.2.81&c_ctrl=w_search_form_f_search_product_content&page={}" "&sign={}&ppid=&price1=&c_contype=wifi&t={}&orderby=&c_osver=11.4&siteid=&c_appver=3.0.2"
- self.title_list = []
- self.mall_list = []
- self.iszy_list = []
- self.price_list = []
- self.sales_list = []
- self.prourl_list = []
- self.skuid_list = []
- self.itemid_list = []
- self.crawl_time_list = []
- self.comment_list = []
- def turn_page_get_info(self):
- for i in range(1, 10):
- t_1 = int(round(time.time() * 1000))
- t_2 = t_1 + random.randint(1, 5)
- data = self.data.format(self.search_words, i, t_1, t_2)
- response = requests.post(self.start_url, data=data, headers=self.headers)
- result_data = response.content.decode("utf-8")
- if result_data:
- datas = result_data.replace("'", "").replace("[", "").replace("]", "")
- for j in datas.split("}"):
- j = j.strip(",").strip("\n") + "}"
- if "img" in j and j:
- data_json = json.loads(j)
- id = data_json["id"]
- iszy = data_json["iszy"]
- siteid = data_json["siteid"]
- img = data_json["img"]
- image = data_json["image"]
- title = data_json["title"]
- price = data_json["price"]
- mall = data_json["mall"]
- sales = data_json["sales"]
- gourl = data_json["gourl"]
- prourl = data_json["prourl"]
- skuid = data_json["skuid"]
- itemid = data_json["itemid"]
- comment = data_json["comment"]
- crawl_time = time.strftime(
- "%Y%m%d%H%M%S", time.localtime(time.time())
- )
- self.title_list.append(title)
- self.mall_list.append(mall)
- self.iszy_list.append(iszy)
- self.price_list.append(price)
- self.sales_list.append(sales)
- self.prourl_list.append(prourl)
- self.skuid_list.append(skuid)
- self.itemid_list.append(itemid)
- self.comment_list.append(comment)
- self.crawl_time_list.append(crawl_time)
- time.sleep(random.uniform(2, 3))
- else:
- break
- def download_file(self):
- dataframe = pd.DataFrame(
- columns=["商品标题", "平台", "店铺", "价格", "销量", "评论量", "地址", "sku"]
- )
- dataframe["商品标题"] = self.title_list
- dataframe["平台"] = self.mall_list
- dataframe["店铺"] = self.iszy_list
- dataframe["价格"] = self.price_list
- dataframe["销量"] = self.sales_list
- dataframe["地址"] = self.prourl_list
- dataframe["sku"] = self.skuid_list
- dataframe["评论量"] = self.comment_list
- to_c_sheet = (
- self.words
- + "_"
- + "全网价格数据"
- + "_"
- + time.strftime("%m%d", time.localtime(time.time()))
- )
- dataframe.to_excel(
- self.writer, index=False, encoding="utf-8", sheet_name=to_c_sheet
- )
- print("数据写入完成,进程结束")
- class HistoryPriceSearch:
- def __init__(self, search_url, writer):
- self.search_preferential_url = "https://apapia-history.manmanbuy.com/ChromeWidgetServices/WidgetServices.ashx"
- self.search_price_url = "https://ext.henzanapp.com/api.html"
- self.t = int(time.time() * 1000)
- self.preferential_headers = {
- "Host": "apapia-history.manmanbuy.com",
- "Content-Type": "application/x-www-form-urlencoded; charset:utf-8",
- "Proxy-Connection": "close",
- "Cookie": "jjkcpnew111:cp44979114_1063811528_2018/10/18",
- "User-Agent": random.choice(user_agent_m),
- "Content-Length": "548",
- "Accept-Encoding": "gzip",
- "Connection": "close",
- }
- self.price_headers = {
- "Host": "ext.henzanapp.com",
- "Proxy-Connection": "close",
- "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
- "Content-Length": "4550",
- "Accept-Encoding": "gzip",
- "Connection": "close",
- "Cookie": "mmzdd=482ef902b98b228c76a0f748e7deaa79",
- }
- self.preferential_data = {
- "c_devid": "C5707B0E-7A25-4BDF-BDF4-C64F81711CAB",
- "username": random.choice(phone_list),
- "ipage": "",
- "c_dp": "2",
- "c_engver": "1.2.83",
- "c_devtoken": "",
- "c_devmodel": random.choice(c_devmodel_list),
- "c_contype": "wifi",
- "c_win": "w_375_h_667",
- "t": self.t,
- "c_firstchannel": "AppStore_update",
- "p_url": search_url,
- "sign": "07E0CB3EF0B16E74",
- "c_ostype": "Android",
- "jsoncallback": "%3F",
- "c_ctrl": "w_search_trend0_f_content",
- "methodName": "getZhekou",
- "c_channel": "Google Play",
- "c_devtype": "Android",
- "c_operator": random.choice(ip_origin),
- "c_appver": "3.0.5",
- "c_firstquerendate": "1540799598929",
- "ipagesize": "6",
- "c_osver": "11.4",
- }
- self.price_data = {
- "tPrice": "",
- "toolbar_state": "open",
- "path1": "qihoo-mall-goodsinfo",
- "mid": "",
- "tSale": "",
- "fromTp": "0",
- "checkinfo": "c9f8d7a8a8d7e899d7c9a9d709d9d71999d71909d7f8d9d7c999d7c8a9d709d9d7d899d7d809d7d8d9d78899d79909d7d8d9d7c909d71909d7d8d9d78819d7e909d7e8d9d7f8a9d7e999d709d9d7b909d7b9a9d7e9d9d7e819d7c909d7d8d9d78809d7b9a9d7d8d9d7d899d7f819d7e8d9d7e8980909d7b919d7e8d9d7f89809d7b819d7d8d9d7e809d7d819d7d8d9d7c899d7c999d7e8d9d7a8a8d799b8d7a8a8d7db5c1ccc7bdbfbcb9baba8a8d7b9a8d7a8a8d7888868e89898a8a8d799b8d7a8a8d7dbbb1cac8c7bdc2ca8a8d7b9a8d7a8a8d7f8a9d7a9a9d7c8d9d79819d79919d7e8d9d7a809d7a909d7f8d9d7a8a8d799b8d7a8a8d7db5c9b6c7bdbbb1cac8c7bdc2ca8a8d7b9a8d7a8a8d7f819d7c909d7d8d9d7d909d7d8a9d7e8d9d7b919d709a9d7c8d9d798a9d798a9d7d8d9d7a8a8d799b8d7a8a8d7accbcb9b7b1dacdbec1c4cdbcba8a8d7b9a8d7a8a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a897977b9b0b0bbb0dbb0b0bfbe9881d2c6adca91b19ababec985c8aa8a9cae9a8d7f8d819f89809b8e8a8e9a8d7c81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a897977b9b0bea2d0c9b0b0bdb2d981d2c6adba91b2a6cab68ca3a1ba8a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a8589b0bead82d9b0b0bdb8cbcba2c6adca9fa19f85c0dac0aeba8a9cae9a8d7f8d819f89809b8e8a8e9a8d7b81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a8589b0b8cb899bb0b0b1daa5cba3c6adca91bdc1adb8cfadc4ca8a9cae9a8d7f8d819f89809b8e8a8e9a8d7b81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68bb1c8c7b5cdbcc1c588897977b9b0bea7cfb9b0b0b193a2bba2c6a3aa9ac1a1a3cabc968c898a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7a8a8d799b8d7a8a8d7fb5c1c1ccc4cdc5ca8a8d7b9a8d7a8a8d78809d7c819d719d9d7c8a9d7a919d7f8d9d7b909d71909d7f8d9d79809d7b819d7d8d9d7a8a8d799b8d7a8a8d7fb9bcc8c7c0cbca8a8d7b9a8d7a8a8d719e898a8a8d799b8d7a8a8d75cdc6ccc5cbba8a8d7b9a8d7a8a8d78899d7b809d7e8d9d70919d7b9a9d7c8d9d7b9a9d78809d7d8d9d7d809d7e8a9d709d9d7a8a8d799b8d7a8a8d7dbbb1cac8c4c9bdbaca8a8d7b9a8d7a8a8d7889898a8a8d799b8d7a8a8d75cdc6cdb4c9bbca8a8d7b9a8d7a8a8d7f819d799a9d7d8d9d788a9d70909d709d9d7f819d7f819d7e8d9d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7a8a8d799b8d7a8a8d7db5c9b6ccc6c9b0cbbacdb5ca8a8d7b9a8d7a8a8d768f809c8f8dac988a8d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7e9a8d7d94a1bcaba0aa8a8d799b8d7a8a8d7db5c9b6c7bccbbdccb7cac8ca8a8d7b9a8d7a8a8d768f809c8f8dac9a8a8d799b8d7a8a8d7acdbab5cdc6c7b5cdbcc1ca8a8d7b9a8d7a8a8d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7e9a8d7d94a1bcaba0aa8a8d799b8d7a8a8d7cb6c9bacaba8a8d7b9a8d7a8a8d7f8d819f89809b8e8a8a8a8d799b8d7a8a8d7cb1cacdb4c4cdbbc7c9bab7c9bcca8a8d7b9a8d7a8a8d7c88888e898a8b8a898a8a8d799b8d7a8a8d7cb1ccc9bbba8a8d7b9a8d7a8a8d7e898b9f8d7b9f8d7b9f8d7e898a8a8d799b8d7a8a8d7cb1cbba8a8d7b9a8d7a8a8d7a8a8d799b8d7a8a8d7ac9bec7bdc3cbca8a8d7b9a8d78899b8d7a8a8d7db4c9bba7c6ca8a8d7b9a8d7a8a8d7886898a8a8d799b8d7a8a8d76c7c1cbcacdbeca8a8d7b9a8d7a8a8d7a8a8d799b8d7a8a8d7ccbb1cacccbc1ccba8a8d7b9a8d79899b8d7a8a8d7db4c9bbabc1ca8a8d7b9a8d7a8a8d7fb8c2c6888199c88b8c80d88b8c87bfb8c2c68bb1c8c7b5cdbcc1c588897977b9b0bea7cfb9b0b0b193a2bba2c6a3aa9ac1a1a3cabc968c898a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d7bc8ccccc0ca8a8d799b8d7a8a8d7bb1c8ca8a8d7b9a8d7a8a8d798a9d70999d7e8d9d7f809c8f8dac9d809d7b899d709d9d7f819d7d899d7d8d9d79919d788a9d7d8d9d7e8a9d7f819d7e8d9d7f819d7d899d7d8d9d7e8a9d7a9a9d7c8d9d7c899d709a9d7c8d9d7e819d7a899d709d9d7c999d7e919d7f8d9d719a9d799a9d7f8d9d79899d7c919d7e8d9d7d9a9d7b999d7e8d9d788a9d7e819d7e8d9d7d809d7b899d709d9d7e909d7c899d7d8d9d7d809d7b899d709d9d7b8a9d7d899d7d8d9d70909d71909d7f8d9d71999d7e919d719d9d7099888a8c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7a8a8d799b8d7a8a8d7db5c9b6ca8a8d7b9a8d7c8b89899b8d7a8a8d7dbbb1cac8ca8a8d7b9a8d79809c8c8c899b8d7a8a8d7cb1a8c7c0cbca8a8d7b9a8d7a8a8d7a999d7b909d7f8d9d71999d7c899d7d8d9d7a8a8d799b8d7a8a8d7db5c9b6a8c7c0cbca8a8d7a9f8",
- "prevpop": "",
- "bfrom": "normal",
- "url": search_url,
- "path2": "goodspricecmp",
- "tplmd5": "7330361958732444829",
- "hisOpn": "0",
- "isGulike": "0",
- "cv": "4.2.1.0",
- "ref": search_url,
- "v": "v5",
- "pop": "1",
- }
- self.writer = writer
- self.spname_list = []
- self.spprice_list = []
- self.dt_list = []
- self.infoid_list = []
- self.infotype_list = []
- self.sppic_list = []
- self.history_price_dict = {}
- self.search_price_start_date = None
- self.search_price_end_date = None
- def parser_history_preferential_info(self, pages=None):
- if pages is not None and isinstance(pages, int):
- for page in range(1, pages + 1):
- self.preferential_data["ipage"] = page
- response = requests.post(
- url=self.search_preferential_url,
- headers=self.preferential_headers,
- data=self.preferential_data,
- verify=False,
- )
- print(response.content.decode("utf-8"))
- if (
- response.status_code == 200
- and json.loads(response.content.decode("utf-8")).get("ok") == 1
- ):
- json_data = json.loads(response.content.decode("utf-8"))
- for i in json_data.get("zklist"):
- spname = i.get("spname")
- spprice = (
- i.get("spprice").replace("<p>", "").replace("</p>", "")
- )
- if i.get("dt"):
- timeArray = time.localtime(
- int(re.findall("\d+", i.get("dt"))[0]) / 1000
- )
- dt = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
- else:
- dt = None
- infoid = i.get("infoid")
- infotype = i.get("infotype")
- sppic = i.get("sppic")
- self.spname_list.append(spname)
- self.spprice_list.append(spprice)
- self.dt_list.append(dt)
- self.infoid_list.append(infoid)
- self.infotype_list.append(infotype)
- self.sppic_list.append(sppic)
- print(spname, spprice, dt, infoid, infotype, sppic)
- else:
- error_status_code = response.status_code
- print("凉了,被ban了~ 状态码:%s,自己看的办吧" % error_status_code)
- elif pages is None:
- page = 1
- self.preferential_data["ipage"] = page
- response = requests.post(
- url=self.search_preferential_url,
- headers=self.preferential_headers,
- data=self.preferential_data,
- verify=False,
- )
- json_data = json.loads(response.content.decode("utf-8"))
- print(self.search_preferential_url)
- print(self.preferential_headers)
- print(self.preferential_data)
- print(json_data)
- if response.status_code == 200 and json_data.get("ok") == 1:
- while json_data.get("ok") == 1:
- for i in json_data.get("zklist"):
- spname = i.get("spname")
- spprice = (
- i.get("spprice").replace("<p>", "").replace("</p>", "")
- )
- if i.get("dt"):
- timeArray = time.localtime(
- int(re.findall("\d+", i.get("dt"))[0]) / 1000
- )
- dt = time.strftime("%m-%d", timeArray)
- else:
- dt = None
- infoid = i.get("infoid")
- infotype = i.get("infotype")
- sppic = i.get("sppic")
- self.spname_list.append(spname)
- self.spprice_list.append(spprice)
- self.dt_list.append(dt)
- self.infoid_list.append(infoid)
- self.infotype_list.append(infotype)
- self.sppic_list.append(sppic)
- print(spname, spprice, dt, infoid, infotype, sppic)
- page += 1
- self.preferential_data["ipage"] = page
- response = requests.post(
- url=self.search_preferential_url,
- headers=self.preferential_headers,
- data=self.preferential_data,
- verify=False,
- timeout=5,
- )
- print(response.status_code)
- print(self.preferential_data["ipage"])
- json_data = json.loads(response.content.decode("utf-8"))
- if not json_data["zklist"]:
- break
- print(json_data)
- time.sleep(random.uniform(0.5, 1.0))
- else:
- print("该商品无历史优惠信息或User-Agent错误或触发反爬,请重试")
- else:
- print("数据抓取失败,洗洗睡吧")
- def parser_history_price_info(self):
- response = requests.post(
- url=self.search_price_url,
- data=self.price_data,
- headers=self.price_headers,
- verify=False,
- )
- print(response.content.decode("utf-8"))
- if (
- response.status_code == 200
- and json.loads(response.content.decode("utf-8")).get("pcinfo")
- ):
- json_data = json.loads(response.content.decode("utf-8"))
- print(json_data)
- self.search_price_start_date = json_data["pcinfo"]["bd"]
- self.search_price_end_date = json_data["pcinfo"]["ed"]
- for k in json_data["pcinfo"]["info"]:
- self.history_price_dict[k["dt"]] = k["pr"]
- else:
- print("数据为空,或者被ban~~")
- def download_preferential_info_data(self):
- dataframe = pd.DataFrame(
- columns=["名称", "优惠信息", "日期", "infoid", "infotype", "商品主图"]
- )
- dataframe["名称"] = self.spname_list
- dataframe["优惠信息"] = self.spprice_list
- dataframe["日期"] = self.dt_list
- dataframe["infoid"] = self.infoid_list
- dataframe["infotype"] = self.infotype_list
- dataframe["商品主图"] = self.sppic_list
- to_c_sheet = (
- "商品历史查询数据"
- + "_"
- + time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
- )
- dataframe.to_excel(
- self.writer, index=False, encoding="utf-8", sheet_name=to_c_sheet
- )
- print("数据写入完成,进程结束")
-
- pyplot.plot(self.dt_list, self.spprice_list)
- pyplot.xlabel('日期')
- pyplot.ylabel('价格')
- pyplot.title(self.spname_list[0])
- #将纵坐标等刻度划分
-
- #设置填充选项:参数分别对应横坐标,纵坐标,纵坐标填充起始值,填充颜色(可以有更多选项)
- pyplot.fill_between(self.dt_list, self.spprice_list, 10, color = 'white')
- #使横坐标逆序输出
- pyplot.gca().invert_xaxis()
- #显示图表
- now_time1 = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
- pyplot.savefig("./" + now_time1 +".jpg")
- print("已经保存")
- pyplot.show()
-
- def download_price_info_data(self):
- fig = plt.figure()
- mpl.rcParams["font.sans-serif"] = ["SimHei"]
- mpl.rcParams["axes.unicode_minus"] = False
- plt.rcParams["figure.figsize"] = (8.0, 4.0)
- print(self.history_price_dict)
- if self.history_price_dict:
- min_date = min(self.history_price_dict, key=self.history_price_dict.get)
- min_price = self.history_price_dict[
- min(self.history_price_dict, key=self.history_price_dict.get)
- ]
- max_date = max(self.history_price_dict, key=self.history_price_dict.get)
- max_price = self.history_price_dict[
- max(self.history_price_dict, key=self.history_price_dict.get)
- ]
- plt.plot(self.history_price_dict.keys(), self.history_price_dict.values())
- plt.text(
- min(self.history_price_dict, key=self.history_price_dict.get),
- self.history_price_dict[
- min(self.history_price_dict, key=self.history_price_dict.get)
- ],
- min(self.history_price_dict, key=self.history_price_dict.get),
- ha="right",
- va="bottom",
- fontsize=10,
- )
- plt.text(
- min(self.history_price_dict, key=self.history_price_dict.get),
- self.history_price_dict[
- min(self.history_price_dict, key=self.history_price_dict.get)
- ],
- self.history_price_dict[
- min(self.history_price_dict, key=self.history_price_dict.get)
- ],
- ha="left",
- va="bottom",
- fontsize=10,
- )
- plt.title("历史价格分布")
- plt.xlabel("日期")
- plt.ylabel("金额")
- fig.savefig("foo.png")
- fig_title = (
- time.strftime("%m%d", time.localtime(time.time())) + "历史价格查询"
- )
- sheet = self.writer.book.add_worksheet(fig_title)
- sheet.insert_image(0, 0, "foo.png")
- else:
- print('数据为空,该商品未被收录')
- class Application:
- def __init__(self):
- self.window = Tk()
- self.text = Text(self.window)
- # 设置窗口大小和位置
- self.window.title("阳光价格")
- self.window.geometry("290x430+500+280")
- self.window.minsize(290, 380)
- # 创建一个文本框
- self.entry = Text(self.window)
- self.entry.place(x=10, y=10, width=200, height=165)
- self.entry.bind("<Key-Return>")
-
- # 创建历史溯源按钮
- self.submit_btn5 = Button(self.window, text=u"历史溯源", command=self.submit_5)
- self.submit_btn5.place(x=220, y=150, width=60, height=25)
- # 翻译结果标题
- self.title_label = Label(self.window, text=u"运行日志:")
- self.title_label.place(x=10, y=180)
- # 翻译结果
- self.result_text = Text(self.window, background="#ccc")
- self.result_text.place(x=10, y=205, width=270, height=205)
- # 所属标签
- self.title_label = Label(self.window, text=u"2019_a7_price ")
- self.title_label.place(x=60, y=410)
- self.file_path = None
- self.writer = None
- def submit_5(self):
- # 从输入框获取用户输入的值
- self.result_text.delete(0.0, END)
- self.file_path = "./" + now_date + "-" + "历史溯源" + ".xlsx"
- self.writer = pd.ExcelWriter(self.file_path)
- try:
- key_title = (
- self.entry.get(0.0, END)
- .strip()
- .replace("\n", " ")
- .replace(",", ",")
- .split(",")
- )
- for key in key_title:
- if key:
- # 将值传入对象
- self.result_text.delete(0.0, END)
- search_history = HistoryPriceSearch(key, self.writer)
- # 抓取历史趋势
- search_history.parser_history_preferential_info()
- search_history.parser_history_price_info()
- search_history.download_preferential_info_data()
- search_history.download_price_info_data()
- # 下载趋势数据
- log_1 = (
- "历史价格搜索中"
- + "\n"
- + "开始下载数据中…………"
- + "\n"
- + "下载数据请在跟程序处于相同位置查找,文件名为【当前时间+历史趋势】"
- )
- self.result_text.insert(END, log_1)
-
-
- except ValueError as e:
- self.result_text.delete(0.0, END)
- log = (
- "log: " + now_time + " " + "查询异常 " + str(e) + "\n" + "请检查键入格式: 国产红富士"
- )
- self.result_text.insert(END, log)
- except KeyError as e:
- self.result_text.delete(0.0, END)
- log = (
- "log: " + now_time + " " + "查询异常 " + str(e) + "\n" + "请检查键入格式: 烟台红富士"
- )
- self.result_text.insert(END, log)
- except error.HTTPError as e:
- self.result_text.delete(0.0, END)
- log = "log: " + now_time + " " + "URL异常 " + str(e) + "更换关键词重试"
- self.result_text.insert(END, log)
- except error.URLError as e:
- self.result_text.delete(0.0, END)
- log = (
- "log: "
- + now_time
- + " "
- + "请求异常 "
- + str(e)
- + "\n"
- + "查询太频繁啦~请稍后重新或换IP重试"
- )
- self.result_text.insert(END, log)
- finally:
- self.writer.save()
- log = "\n" + "log: " + now_time + " " + "数据下载结束,请在程序所处位置查收Excel"
- self.result_text.insert(END, log)
- def run(self):
- self.window.mainloop()
- app = Application()
- app.run()
|