#!usr/bin/python import json import time import random import datetime import requests import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt from tkinter import * from urllib import error from urllib.parse import * from lxml import etree from requests.packages import urllib3 from matplotlib import pyplot import xlrd import socket from time import ctime mpl.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus']=False now_date = time.strftime("%m-%d", time.localtime(time.time())) now_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time())) phone_list = [ 18303517744, 13613416611, 15219466201, 15036222256, 18438888133, 18876941131, 18876622089, 18889262767, 13715150077, 13717033838, 18351078990, 13467719111, 15997693333, 13730600607, 13699051071, 13849038741, 18352936688, 13880888292, 18822441999, 15777770130, 15777772845, 13727693111, 13632577333, 15976856868, 18222167181, 13512960022, 13530102266, 18300666187, 15824817777, 18349333171, 13838555227, 15890005577, 15890008887, 13838200888, 13924853168, 18822488887, 13972911999, 13428334566, 13566102222, 13732097555, 15233333323, 13682987828, 13923918859, 18859981392, 15818692899, 15012563066, 18222522000, 13828716737, 13692298935, 13706053195, 13887441413, 18322040999, 13911336673, 13801391870, 13433196988, 13702485588, 13924578588, 13924852345, 18823143456, 13637666699, 13755630022, 13920593529, 18702888838, 15198120000, 13908057178, 18844227188, 18750468844, 13505952075, 15768179999, 18356194521, 13696754521, 13788829706, 15208275054, 18777770214, 13551275898, 18280151115, 13677777254, 18769721000, 18897777726, 15814226133, 15918128980, 15918129083, 15918129282, 15918129090, 18300077779, 15022277000, 15875766666, 18428088892, 15703382298, 15131712232, 15732922520, 13874677777, 18255555551, 18393897777, 15180222225, 13505740467, 13780390000, 18859567892, 15277775445, 13662688881, 18213777222, 13761746746, 15000505062, 14761188884, 13809070207, 13818357698, 13873179698, 18817871288, 15112998888, 15703361816, 15290911121, 15107555885, 18396217171, 13825876548, 13619870320, 13778891234, 13548291222, 18282200022, 18402898980, 18328025788, 15228886138, 17878781118, 15123888444, 15837182792, 15838125087, 18703896718, 18736011629, 18839781750, 18837170569, 15777776964, 18761755000, 18751373210, 15962711155, 15962792088, 18761755088, 13656291113, 18862779378, 15190971978, 13777888585, 15068936333, 15204025988, 13654059991, 15775677700, 13684218789, 15281898765, 13616202666, 18751126999, 13812920788, 13809055222, 13962350777, 18353240966, 18853296464, 17839929705, 18838967382, 18749418806, 15093239328, 15188349522, 18236956924, 18348405579, 15093334268, 13505647555, 15220525678, 15020050513, 15020030417, 15267701717, 15088931331, 15906878938, 13646514938, 13706636314, 18867793298, 13739742666, 15731102345, 13859652222, 18232102678, 13601261337, 15231099666, 18337728521, 15203802168, 18331758666, 18736599499, 13930109099, 15738888289, 15738888538, 15738888576, 15738888697, 15738888963, 13797904444, 15243191111, 18405311888, 18405311888, 13791080000, 13791080000, 13908376207, 13908335110, 13908374332, 18702397333, 18702379555, 15922584000, 13783666664, 18335392777, 15217430000, 15992225679, 13585510688, 15818991889, 17806722226, 13536565653, 18738651999, 18388555511, 15825022222, 15882234084, 13776268888, 15018310888, 15113133313, 13701097729, 15726835666, 15058299222, 15118444415, 18820300009, 18825700007, 13829111788, 13825766788, 13480423333, 13711888886, 13532923333, 13825737888, 13537328888, 13686678888, 13538345678, 15016967488, 15917735557, 15217104555, 15917669777, 15017888444, 15931390000, 15267180777, 15068793333, 18335156789, 13835175177, 18202468383, 13926787833, 15815100303, 15892056631, 13599305858, 13616979898, 13511100900, 13786766667, 13686868538, 13632878899, 13883038222, 18838200011, 13911672661, 13521935222, 13802289678, 13728888822, 13801507158, 15093939323, 15160299539, 18831119031, 13974259999, 15807539093, 15023669066, 13785811099, 18716433334, 18834845999, 13507170130, 13507115301, 13995588392, 13657247111, 17839999122, 17839993883, 13807196657, 13807197319, 13807198517, 13807153256, 13807190231, 13908631578, 13908863082, 18822858108, 13510308789, 13510102070, 18419521214, 13877853333, 18351203222, 18261197555, 15815285757, 15261115522, 13903173981, 15132755552, 15019677099, 18862192899, 13678863811, 13983652278, 13856977511, 13589966223, 18337623210, 13979673333, 15007927777, 18837744446, 13950654999, 13861186488, 18870000005, 15158172221, 15824107733, 13790746666, 15802648889, 13808322226, 15823513000, 18883190766, 18883298278, 18375801115, 18375702233, 15023871222, 13779033333, 18872855555, 18270003333, 18886889988, 15777777783, 18881111115, 18882888802, 14799448888, 13688819128, 13688819693, ] user_agent_m = [ 'Mozilla/5.0 (Linux; Android 8.1; PAR-AL00 Build/HUAWEIPAR-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044304 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/tools', 'Mozilla/5.0 (Linux; Android 8.1; EML-AL00 Build/HUAWEIEML-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.143 Crosswalk/24.53.595.0 XWEB/358 MMWEBSDK/23 Mobile Safari/537.36 MicroMessenger/6.7.2.1340(0x2607023A) NetType/4G Language/zh_CN', 'Mozilla/5.0 (Linux; Android 8.0; MHA-AL00 Build/HUAWEIMHA-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044304 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/4G Language/zh_CN Process/tools', 'Mozilla/5.0 (Linux; Android 5.1.1; vivo X6S A Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044207 Mobile Safari/537.36 MicroMessenger/6.7.3.1340(0x26070332) NetType/4G Language/zh_CN Process/tools' ] ip_origin = ["中国联通", "中国移动", "中国电信"] c_devmodel_list = ['Mate10', 'P8青春', '荣耀7i', '畅玩7A', '荣耀8XMax', 'Mate10Pro', '荣耀10', 'M3青春', '荣耀8青春'] class CrawlCompareWeb: """ 比价网反爬严格,考虑换ip突破,此条有待考证 另一个查询历史价格接口:http://tool.manmanbuy.com/history.aspx?DA=1&action=gethistory&url=http%3a%2 f%2fitem.tmall.com%2fitem.htm%3fid%3d532034800285&bjid=&spbh=&cxid=&zkid=&w=350&token=yva7088d209cdc bbbf30e6af9cf24005ce2dx 破解token就可以 """ def __init__(self, search_words, writer): self.start_url = "https://apapia-search.manmanbuy.com/index_json.ashx" self.decode_type = "utf-8" self.total_page = None self.writer = writer self.words = search_words self.search_words = quote( search_words, encoding=self.decode_type, errors="replace" ) self.headers = { "Host": "apapia-search.manmanbuy.com", "Content-Type": "application/x-www-form-urlencoded; charset=utf-8", "Proxy-Connection": "close", "Cookie": "ASP.NET_SessionId=5nm1vf35xt2eisuhe2k0rm33; jjkcpnew111=cp98576765_1063811521_2018/9/26", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_4 like Mac OS X) AppleWebKit/605.1.15 " "(KHTML, like Gecko) Mobile/15F79 mmbWebBrowse", "Content-Length": "523", "Accept-Encoding": "gzip", "Connection": "close", } self.data = "jsoncallback=%3F&c_devmodel=iPhone%207&f1=&c_win=w_375_h_667&c_devid=C5707B0E-7A25-4BDF-BDF4-C64F8" "1711CAB&c_devtype=phone&f2=&key={}&iszy=&f3=&c_dp=2&f4=&c_devtoken=&c_channel=AppStore&f5=&" "smallclass=&f6=&methodName=getsearchkeylist&username=&c_operator=%E4%B8%AD%E5%9B%BD%E8%81%94%E" "9%80%9A&price2=&c_ostype=ios&c_engver=1.2.81&c_ctrl=w_search_form_f_search_product_content&page={}" "&sign={}&ppid=&price1=&c_contype=wifi&t={}&orderby=&c_osver=11.4&siteid=&c_appver=3.0.2" self.title_list = [] self.mall_list = [] self.iszy_list = [] self.price_list = [] self.sales_list = [] self.prourl_list = [] self.skuid_list = [] self.itemid_list = [] self.crawl_time_list = [] self.comment_list = [] def turn_page_get_info(self): for i in range(1, 10): t_1 = int(round(time.time() * 1000)) t_2 = t_1 + random.randint(1, 5) data = self.data.format(self.search_words, i, t_1, t_2) response = requests.post(self.start_url, data=data, headers=self.headers) result_data = response.content.decode("utf-8") if result_data: datas = result_data.replace("'", "").replace("[", "").replace("]", "") for j in datas.split("}"): j = j.strip(",").strip("\n") + "}" if "img" in j and j: data_json = json.loads(j) id = data_json["id"] iszy = data_json["iszy"] siteid = data_json["siteid"] img = data_json["img"] image = data_json["image"] title = data_json["title"] price = data_json["price"] mall = data_json["mall"] sales = data_json["sales"] gourl = data_json["gourl"] prourl = data_json["prourl"] skuid = data_json["skuid"] itemid = data_json["itemid"] comment = data_json["comment"] crawl_time = time.strftime( "%Y%m%d%H%M%S", time.localtime(time.time()) ) self.title_list.append(title) self.mall_list.append(mall) self.iszy_list.append(iszy) self.price_list.append(price) self.sales_list.append(sales) self.prourl_list.append(prourl) self.skuid_list.append(skuid) self.itemid_list.append(itemid) self.comment_list.append(comment) self.crawl_time_list.append(crawl_time) time.sleep(random.uniform(2, 3)) else: break def download_file(self): dataframe = pd.DataFrame( columns=["商品标题", "平台", "店铺", "价格", "销量", "评论量", "地址", "sku"] ) dataframe["商品标题"] = self.title_list dataframe["平台"] = self.mall_list dataframe["店铺"] = self.iszy_list dataframe["价格"] = self.price_list dataframe["销量"] = self.sales_list dataframe["地址"] = self.prourl_list dataframe["sku"] = self.skuid_list dataframe["评论量"] = self.comment_list to_c_sheet = ( self.words + "_" + "全网价格数据" + "_" + time.strftime("%m%d", time.localtime(time.time())) ) dataframe.to_excel( self.writer, index=False, encoding="utf-8", sheet_name=to_c_sheet ) print("数据写入完成,进程结束") class HistoryPriceSearch: def __init__(self, search_url, writer): self.search_preferential_url = "https://apapia-history.manmanbuy.com/ChromeWidgetServices/WidgetServices.ashx" self.search_price_url = "https://ext.henzanapp.com/api.html" self.t = int(time.time() * 1000) self.preferential_headers = { "Host": "apapia-history.manmanbuy.com", "Content-Type": "application/x-www-form-urlencoded; charset:utf-8", "Proxy-Connection": "close", "Cookie": "jjkcpnew111:cp44979114_1063811528_2018/10/18", "User-Agent": random.choice(user_agent_m), "Content-Length": "548", "Accept-Encoding": "gzip", "Connection": "close", } self.price_headers = { "Host": "ext.henzanapp.com", "Proxy-Connection": "close", "Content-Type": "application/x-www-form-urlencoded; charset=utf-8", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36", "Content-Length": "4550", "Accept-Encoding": "gzip", "Connection": "close", "Cookie": "mmzdd=482ef902b98b228c76a0f748e7deaa79", } self.preferential_data = { "c_devid": "C5707B0E-7A25-4BDF-BDF4-C64F81711CAB", "username": random.choice(phone_list), "ipage": "", "c_dp": "2", "c_engver": "1.2.83", "c_devtoken": "", "c_devmodel": random.choice(c_devmodel_list), "c_contype": "wifi", "c_win": "w_375_h_667", "t": self.t, "c_firstchannel": "AppStore_update", "p_url": search_url, "sign": "07E0CB3EF0B16E74", "c_ostype": "Android", "jsoncallback": "%3F", "c_ctrl": "w_search_trend0_f_content", "methodName": "getZhekou", "c_channel": "Google Play", "c_devtype": "Android", "c_operator": random.choice(ip_origin), "c_appver": "3.0.5", "c_firstquerendate": "1540799598929", "ipagesize": "6", "c_osver": "11.4", } self.price_data = { "tPrice": "", "toolbar_state": "open", "path1": "qihoo-mall-goodsinfo", "mid": "", "tSale": "", "fromTp": "0", "checkinfo": "c9f8d7a8a8d7e899d7c9a9d709d9d71999d71909d7f8d9d7c999d7c8a9d709d9d7d899d7d809d7d8d9d78899d79909d7d8d9d7c909d71909d7d8d9d78819d7e909d7e8d9d7f8a9d7e999d709d9d7b909d7b9a9d7e9d9d7e819d7c909d7d8d9d78809d7b9a9d7d8d9d7d899d7f819d7e8d9d7e8980909d7b919d7e8d9d7f89809d7b819d7d8d9d7e809d7d819d7d8d9d7c899d7c999d7e8d9d7a8a8d799b8d7a8a8d7db5c1ccc7bdbfbcb9baba8a8d7b9a8d7a8a8d7888868e89898a8a8d799b8d7a8a8d7dbbb1cac8c7bdc2ca8a8d7b9a8d7a8a8d7f8a9d7a9a9d7c8d9d79819d79919d7e8d9d7a809d7a909d7f8d9d7a8a8d799b8d7a8a8d7db5c9b6c7bdbbb1cac8c7bdc2ca8a8d7b9a8d7a8a8d7f819d7c909d7d8d9d7d909d7d8a9d7e8d9d7b919d709a9d7c8d9d798a9d798a9d7d8d9d7a8a8d799b8d7a8a8d7accbcb9b7b1dacdbec1c4cdbcba8a8d7b9a8d7a8a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a897977b9b0b0bbb0dbb0b0bfbe9881d2c6adca91b19ababec985c8aa8a9cae9a8d7f8d819f89809b8e8a8e9a8d7c81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a897977b9b0bea2d0c9b0b0bdb2d981d2c6adba91b2a6cab68ca3a1ba8a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a8589b0bead82d9b0b0bdb8cbcba2c6adca9fa19f85c0dac0aeba8a9cae9a8d7f8d819f89809b8e8a8e9a8d7b81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a8589b0b8cb899bb0b0b1daa5cba3c6adca91bdc1adb8cfadc4ca8a9cae9a8d7f8d819f89809b8e8a8e9a8d7b81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68bb1c8c7b5cdbcc1c588897977b9b0bea7cfb9b0b0b193a2bba2c6a3aa9ac1a1a3cabc968c898a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7a8a8d799b8d7a8a8d7fb5c1c1ccc4cdc5ca8a8d7b9a8d7a8a8d78809d7c819d719d9d7c8a9d7a919d7f8d9d7b909d71909d7f8d9d79809d7b819d7d8d9d7a8a8d799b8d7a8a8d7fb9bcc8c7c0cbca8a8d7b9a8d7a8a8d719e898a8a8d799b8d7a8a8d75cdc6ccc5cbba8a8d7b9a8d7a8a8d78899d7b809d7e8d9d70919d7b9a9d7c8d9d7b9a9d78809d7d8d9d7d809d7e8a9d709d9d7a8a8d799b8d7a8a8d7dbbb1cac8c4c9bdbaca8a8d7b9a8d7a8a8d7889898a8a8d799b8d7a8a8d75cdc6cdb4c9bbca8a8d7b9a8d7a8a8d7f819d799a9d7d8d9d788a9d70909d709d9d7f819d7f819d7e8d9d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7a8a8d799b8d7a8a8d7db5c9b6ccc6c9b0cbbacdb5ca8a8d7b9a8d7a8a8d768f809c8f8dac988a8d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7e9a8d7d94a1bcaba0aa8a8d799b8d7a8a8d7db5c9b6c7bccbbdccb7cac8ca8a8d7b9a8d7a8a8d768f809c8f8dac9a8a8d799b8d7a8a8d7acdbab5cdc6c7b5cdbcc1ca8a8d7b9a8d7a8a8d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7e9a8d7d94a1bcaba0aa8a8d799b8d7a8a8d7cb6c9bacaba8a8d7b9a8d7a8a8d7f8d819f89809b8e8a8a8a8d799b8d7a8a8d7cb1cacdb4c4cdbbc7c9bab7c9bcca8a8d7b9a8d7a8a8d7c88888e898a8b8a898a8a8d799b8d7a8a8d7cb1ccc9bbba8a8d7b9a8d7a8a8d7e898b9f8d7b9f8d7b9f8d7e898a8a8d799b8d7a8a8d7cb1cbba8a8d7b9a8d7a8a8d7a8a8d799b8d7a8a8d7ac9bec7bdc3cbca8a8d7b9a8d78899b8d7a8a8d7db4c9bba7c6ca8a8d7b9a8d7a8a8d7886898a8a8d799b8d7a8a8d76c7c1cbcacdbeca8a8d7b9a8d7a8a8d7a8a8d799b8d7a8a8d7ccbb1cacccbc1ccba8a8d7b9a8d79899b8d7a8a8d7db4c9bbabc1ca8a8d7b9a8d7a8a8d7fb8c2c6888199c88b8c80d88b8c87bfb8c2c68bb1c8c7b5cdbcc1c588897977b9b0bea7cfb9b0b0b193a2bba2c6a3aa9ac1a1a3cabc968c898a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d7bc8ccccc0ca8a8d799b8d7a8a8d7bb1c8ca8a8d7b9a8d7a8a8d798a9d70999d7e8d9d7f809c8f8dac9d809d7b899d709d9d7f819d7d899d7d8d9d79919d788a9d7d8d9d7e8a9d7f819d7e8d9d7f819d7d899d7d8d9d7e8a9d7a9a9d7c8d9d7c899d709a9d7c8d9d7e819d7a899d709d9d7c999d7e919d7f8d9d719a9d799a9d7f8d9d79899d7c919d7e8d9d7d9a9d7b999d7e8d9d788a9d7e819d7e8d9d7d809d7b899d709d9d7e909d7c899d7d8d9d7d809d7b899d709d9d7b8a9d7d899d7d8d9d70909d71909d7f8d9d71999d7e919d719d9d7099888a8c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7a8a8d799b8d7a8a8d7db5c9b6ca8a8d7b9a8d7c8b89899b8d7a8a8d7dbbb1cac8ca8a8d7b9a8d79809c8c8c899b8d7a8a8d7cb1a8c7c0cbca8a8d7b9a8d7a8a8d7a999d7b909d7f8d9d71999d7c899d7d8d9d7a8a8d799b8d7a8a8d7db5c9b6a8c7c0cbca8a8d7a9f8", "prevpop": "", "bfrom": "normal", "url": search_url, "path2": "goodspricecmp", "tplmd5": "7330361958732444829", "hisOpn": "0", "isGulike": "0", "cv": "4.2.1.0", "ref": search_url, "v": "v5", "pop": "1", } self.writer = writer self.spname_list = [] self.spprice_list = [] self.dt_list = [] self.infoid_list = [] self.infotype_list = [] self.sppic_list = [] self.history_price_dict = {} self.search_price_start_date = None self.search_price_end_date = None def parser_history_preferential_info(self, pages=None): if pages is not None and isinstance(pages, int): for page in range(1, pages + 1): self.preferential_data["ipage"] = page response = requests.post( url=self.search_preferential_url, headers=self.preferential_headers, data=self.preferential_data, verify=False, ) print(response.content.decode("utf-8")) if ( response.status_code == 200 and json.loads(response.content.decode("utf-8")).get("ok") == 1 ): json_data = json.loads(response.content.decode("utf-8")) for i in json_data.get("zklist"): spname = i.get("spname") spprice = ( i.get("spprice").replace("

", "").replace("

", "") ) if i.get("dt"): timeArray = time.localtime( int(re.findall("\d+", i.get("dt"))[0]) / 1000 ) dt = time.strftime("%Y-%m-%d %H:%M:%S", timeArray) else: dt = None infoid = i.get("infoid") infotype = i.get("infotype") sppic = i.get("sppic") self.spname_list.append(spname) self.spprice_list.append(spprice) self.dt_list.append(dt) self.infoid_list.append(infoid) self.infotype_list.append(infotype) self.sppic_list.append(sppic) print(spname, spprice, dt, infoid, infotype, sppic) else: error_status_code = response.status_code print("凉了,被ban了~ 状态码:%s,自己看的办吧" % error_status_code) elif pages is None: page = 1 self.preferential_data["ipage"] = page response = requests.post( url=self.search_preferential_url, headers=self.preferential_headers, data=self.preferential_data, verify=False, ) json_data = json.loads(response.content.decode("utf-8")) print(self.search_preferential_url) print(self.preferential_headers) print(self.preferential_data) print(json_data) if response.status_code == 200 and json_data.get("ok") == 1: while json_data.get("ok") == 1: for i in json_data.get("zklist"): spname = i.get("spname") spprice = ( i.get("spprice").replace("

", "").replace("

", "") ) if i.get("dt"): timeArray = time.localtime( int(re.findall("\d+", i.get("dt"))[0]) / 1000 ) dt = time.strftime("%m-%d", timeArray) else: dt = None infoid = i.get("infoid") infotype = i.get("infotype") sppic = i.get("sppic") self.spname_list.append(spname) self.spprice_list.append(spprice) self.dt_list.append(dt) self.infoid_list.append(infoid) self.infotype_list.append(infotype) self.sppic_list.append(sppic) print(spname, spprice, dt, infoid, infotype, sppic) page += 1 self.preferential_data["ipage"] = page response = requests.post( url=self.search_preferential_url, headers=self.preferential_headers, data=self.preferential_data, verify=False, timeout=5, ) print(response.status_code) print(self.preferential_data["ipage"]) json_data = json.loads(response.content.decode("utf-8")) if not json_data["zklist"]: break print(json_data) time.sleep(random.uniform(0.5, 1.0)) else: print("该商品无历史优惠信息或User-Agent错误或触发反爬,请重试") else: print("数据抓取失败,洗洗睡吧") def parser_history_price_info(self): response = requests.post( url=self.search_price_url, data=self.price_data, headers=self.price_headers, verify=False, ) print(response.content.decode("utf-8")) if ( response.status_code == 200 and json.loads(response.content.decode("utf-8")).get("pcinfo") ): json_data = json.loads(response.content.decode("utf-8")) print(json_data) self.search_price_start_date = json_data["pcinfo"]["bd"] self.search_price_end_date = json_data["pcinfo"]["ed"] for k in json_data["pcinfo"]["info"]: self.history_price_dict[k["dt"]] = k["pr"] else: print("数据为空,或者被ban~~") def download_preferential_info_data(self): dataframe = pd.DataFrame( columns=["名称", "优惠信息", "日期", "infoid", "infotype", "商品主图"] ) dataframe["名称"] = self.spname_list dataframe["优惠信息"] = self.spprice_list dataframe["日期"] = self.dt_list dataframe["infoid"] = self.infoid_list dataframe["infotype"] = self.infotype_list dataframe["商品主图"] = self.sppic_list to_c_sheet = ( "商品历史查询数据" + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) ) dataframe.to_excel( self.writer, index=False, encoding="utf-8", sheet_name=to_c_sheet ) print("数据写入完成,进程结束") pyplot.plot(self.dt_list, self.spprice_list) pyplot.xlabel('日期') pyplot.ylabel('价格') pyplot.title(self.spname_list[0]) #将纵坐标等刻度划分 #设置填充选项:参数分别对应横坐标,纵坐标,纵坐标填充起始值,填充颜色(可以有更多选项) pyplot.fill_between(self.dt_list, self.spprice_list, 10, color = 'white') #使横坐标逆序输出 pyplot.gca().invert_xaxis() #显示图表 now_time1 = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time())) pyplot.savefig("./" + now_time1 +".jpg") print("已经保存") pyplot.show() def download_price_info_data(self): fig = plt.figure() mpl.rcParams["font.sans-serif"] = ["SimHei"] mpl.rcParams["axes.unicode_minus"] = False plt.rcParams["figure.figsize"] = (8.0, 4.0) print(self.history_price_dict) if self.history_price_dict: min_date = min(self.history_price_dict, key=self.history_price_dict.get) min_price = self.history_price_dict[ min(self.history_price_dict, key=self.history_price_dict.get) ] max_date = max(self.history_price_dict, key=self.history_price_dict.get) max_price = self.history_price_dict[ max(self.history_price_dict, key=self.history_price_dict.get) ] plt.plot(self.history_price_dict.keys(), self.history_price_dict.values()) plt.text( min(self.history_price_dict, key=self.history_price_dict.get), self.history_price_dict[ min(self.history_price_dict, key=self.history_price_dict.get) ], min(self.history_price_dict, key=self.history_price_dict.get), ha="right", va="bottom", fontsize=10, ) plt.text( min(self.history_price_dict, key=self.history_price_dict.get), self.history_price_dict[ min(self.history_price_dict, key=self.history_price_dict.get) ], self.history_price_dict[ min(self.history_price_dict, key=self.history_price_dict.get) ], ha="left", va="bottom", fontsize=10, ) plt.title("历史价格分布") plt.xlabel("日期") plt.ylabel("金额") fig.savefig("foo.png") fig_title = ( time.strftime("%m%d", time.localtime(time.time())) + "历史价格查询" ) sheet = self.writer.book.add_worksheet(fig_title) sheet.insert_image(0, 0, "foo.png") else: print('数据为空,该商品未被收录') class Application: def __init__(self): self.window = Tk() self.text = Text(self.window) # 设置窗口大小和位置 self.window.title("阳光价格") self.window.geometry("290x430+500+280") self.window.minsize(290, 380) # 创建一个文本框 self.entry = Text(self.window) self.entry.place(x=10, y=10, width=200, height=165) self.entry.bind("") # 创建历史溯源按钮 self.submit_btn5 = Button(self.window, text=u"历史溯源", command=self.submit_5) self.submit_btn5.place(x=220, y=150, width=60, height=25) # 翻译结果标题 self.title_label = Label(self.window, text=u"运行日志:") self.title_label.place(x=10, y=180) # 翻译结果 self.result_text = Text(self.window, background="#ccc") self.result_text.place(x=10, y=205, width=270, height=205) # 所属标签 self.title_label = Label(self.window, text=u"2019_a7_price ") self.title_label.place(x=60, y=410) self.file_path = None self.writer = None def submit_5(self): # 从输入框获取用户输入的值 self.result_text.delete(0.0, END) self.file_path = "./" + now_date + "-" + "历史溯源" + ".xlsx" self.writer = pd.ExcelWriter(self.file_path) try: key_title = ( self.entry.get(0.0, END) .strip() .replace("\n", " ") .replace(",", ",") .split(",") ) for key in key_title: if key: # 将值传入对象 self.result_text.delete(0.0, END) search_history = HistoryPriceSearch(key, self.writer) # 抓取历史趋势 search_history.parser_history_preferential_info() search_history.parser_history_price_info() search_history.download_preferential_info_data() search_history.download_price_info_data() # 下载趋势数据 log_1 = ( "历史价格搜索中" + "\n" + "开始下载数据中…………" + "\n" + "下载数据请在跟程序处于相同位置查找,文件名为【当前时间+历史趋势】" ) self.result_text.insert(END, log_1) except ValueError as e: self.result_text.delete(0.0, END) log = ( "log: " + now_time + " " + "查询异常 " + str(e) + "\n" + "请检查键入格式: 国产红富士" ) self.result_text.insert(END, log) except KeyError as e: self.result_text.delete(0.0, END) log = ( "log: " + now_time + " " + "查询异常 " + str(e) + "\n" + "请检查键入格式: 烟台红富士" ) self.result_text.insert(END, log) except error.HTTPError as e: self.result_text.delete(0.0, END) log = "log: " + now_time + " " + "URL异常 " + str(e) + "更换关键词重试" self.result_text.insert(END, log) except error.URLError as e: self.result_text.delete(0.0, END) log = ( "log: " + now_time + " " + "请求异常 " + str(e) + "\n" + "查询太频繁啦~请稍后重新或换IP重试" ) self.result_text.insert(END, log) finally: self.writer.save() log = "\n" + "log: " + now_time + " " + "数据下载结束,请在程序所处位置查收Excel" self.result_text.insert(END, log) def run(self): self.window.mainloop() app = Application() app.run()