manmanmai.py 33 KB


  1. #!usr/bin/python
  2. import json
  3. import time
  4. import random
  5. import datetime
  6. import requests
  7. import pandas as pd
  8. import matplotlib as mpl
  9. import matplotlib.pyplot as plt
  10. from tkinter import *
  11. from urllib import error
  12. from urllib.parse import *
  13. from lxml import etree
  14. from requests.packages import urllib3
  15. from matplotlib import pyplot
  16. import xlrd
  17. import socket
  18. from time import ctime
  19. mpl.rcParams['font.sans-serif'] = ['SimHei']
  20. plt.rcParams['axes.unicode_minus']=False
  21. now_date = time.strftime("%m-%d", time.localtime(time.time()))
  22. now_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
  23. phone_list = [
  24. 18303517744,
  25. 13613416611,
  26. 15219466201,
  27. 15036222256,
  28. 18438888133,
  29. 18876941131,
  30. 18876622089,
  31. 18889262767,
  32. 13715150077,
  33. 13717033838,
  34. 18351078990,
  35. 13467719111,
  36. 15997693333,
  37. 13730600607,
  38. 13699051071,
  39. 13849038741,
  40. 18352936688,
  41. 13880888292,
  42. 18822441999,
  43. 15777770130,
  44. 15777772845,
  45. 13727693111,
  46. 13632577333,
  47. 15976856868,
  48. 18222167181,
  49. 13512960022,
  50. 13530102266,
  51. 18300666187,
  52. 15824817777,
  53. 18349333171,
  54. 13838555227,
  55. 15890005577,
  56. 15890008887,
  57. 13838200888,
  58. 13924853168,
  59. 18822488887,
  60. 13972911999,
  61. 13428334566,
  62. 13566102222,
  63. 13732097555,
  64. 15233333323,
  65. 13682987828,
  66. 13923918859,
  67. 18859981392,
  68. 15818692899,
  69. 15012563066,
  70. 18222522000,
  71. 13828716737,
  72. 13692298935,
  73. 13706053195,
  74. 13887441413,
  75. 18322040999,
  76. 13911336673,
  77. 13801391870,
  78. 13433196988,
  79. 13702485588,
  80. 13924578588,
  81. 13924852345,
  82. 18823143456,
  83. 13637666699,
  84. 13755630022,
  85. 13920593529,
  86. 18702888838,
  87. 15198120000,
  88. 13908057178,
  89. 18844227188,
  90. 18750468844,
  91. 13505952075,
  92. 15768179999,
  93. 18356194521,
  94. 13696754521,
  95. 13788829706,
  96. 15208275054,
  97. 18777770214,
  98. 13551275898,
  99. 18280151115,
  100. 13677777254,
  101. 18769721000,
  102. 18897777726,
  103. 15814226133,
  104. 15918128980,
  105. 15918129083,
  106. 15918129282,
  107. 15918129090,
  108. 18300077779,
  109. 15022277000,
  110. 15875766666,
  111. 18428088892,
  112. 15703382298,
  113. 15131712232,
  114. 15732922520,
  115. 13874677777,
  116. 18255555551,
  117. 18393897777,
  118. 15180222225,
  119. 13505740467,
  120. 13780390000,
  121. 18859567892,
  122. 15277775445,
  123. 13662688881,
  124. 18213777222,
  125. 13761746746,
  126. 15000505062,
  127. 14761188884,
  128. 13809070207,
  129. 13818357698,
  130. 13873179698,
  131. 18817871288,
  132. 15112998888,
  133. 15703361816,
  134. 15290911121,
  135. 15107555885,
  136. 18396217171,
  137. 13825876548,
  138. 13619870320,
  139. 13778891234,
  140. 13548291222,
  141. 18282200022,
  142. 18402898980,
  143. 18328025788,
  144. 15228886138,
  145. 17878781118,
  146. 15123888444,
  147. 15837182792,
  148. 15838125087,
  149. 18703896718,
  150. 18736011629,
  151. 18839781750,
  152. 18837170569,
  153. 15777776964,
  154. 18761755000,
  155. 18751373210,
  156. 15962711155,
  157. 15962792088,
  158. 18761755088,
  159. 13656291113,
  160. 18862779378,
  161. 15190971978,
  162. 13777888585,
  163. 15068936333,
  164. 15204025988,
  165. 13654059991,
  166. 15775677700,
  167. 13684218789,
  168. 15281898765,
  169. 13616202666,
  170. 18751126999,
  171. 13812920788,
  172. 13809055222,
  173. 13962350777,
  174. 18353240966,
  175. 18853296464,
  176. 17839929705,
  177. 18838967382,
  178. 18749418806,
  179. 15093239328,
  180. 15188349522,
  181. 18236956924,
  182. 18348405579,
  183. 15093334268,
  184. 13505647555,
  185. 15220525678,
  186. 15020050513,
  187. 15020030417,
  188. 15267701717,
  189. 15088931331,
  190. 15906878938,
  191. 13646514938,
  192. 13706636314,
  193. 18867793298,
  194. 13739742666,
  195. 15731102345,
  196. 13859652222,
  197. 18232102678,
  198. 13601261337,
  199. 15231099666,
  200. 18337728521,
  201. 15203802168,
  202. 18331758666,
  203. 18736599499,
  204. 13930109099,
  205. 15738888289,
  206. 15738888538,
  207. 15738888576,
  208. 15738888697,
  209. 15738888963,
  210. 13797904444,
  211. 15243191111,
  212. 18405311888,
  213. 18405311888,
  214. 13791080000,
  215. 13791080000,
  216. 13908376207,
  217. 13908335110,
  218. 13908374332,
  219. 18702397333,
  220. 18702379555,
  221. 15922584000,
  222. 13783666664,
  223. 18335392777,
  224. 15217430000,
  225. 15992225679,
  226. 13585510688,
  227. 15818991889,
  228. 17806722226,
  229. 13536565653,
  230. 18738651999,
  231. 18388555511,
  232. 15825022222,
  233. 15882234084,
  234. 13776268888,
  235. 15018310888,
  236. 15113133313,
  237. 13701097729,
  238. 15726835666,
  239. 15058299222,
  240. 15118444415,
  241. 18820300009,
  242. 18825700007,
  243. 13829111788,
  244. 13825766788,
  245. 13480423333,
  246. 13711888886,
  247. 13532923333,
  248. 13825737888,
  249. 13537328888,
  250. 13686678888,
  251. 13538345678,
  252. 15016967488,
  253. 15917735557,
  254. 15217104555,
  255. 15917669777,
  256. 15017888444,
  257. 15931390000,
  258. 15267180777,
  259. 15068793333,
  260. 18335156789,
  261. 13835175177,
  262. 18202468383,
  263. 13926787833,
  264. 15815100303,
  265. 15892056631,
  266. 13599305858,
  267. 13616979898,
  268. 13511100900,
  269. 13786766667,
  270. 13686868538,
  271. 13632878899,
  272. 13883038222,
  273. 18838200011,
  274. 13911672661,
  275. 13521935222,
  276. 13802289678,
  277. 13728888822,
  278. 13801507158,
  279. 15093939323,
  280. 15160299539,
  281. 18831119031,
  282. 13974259999,
  283. 15807539093,
  284. 15023669066,
  285. 13785811099,
  286. 18716433334,
  287. 18834845999,
  288. 13507170130,
  289. 13507115301,
  290. 13995588392,
  291. 13657247111,
  292. 17839999122,
  293. 17839993883,
  294. 13807196657,
  295. 13807197319,
  296. 13807198517,
  297. 13807153256,
  298. 13807190231,
  299. 13908631578,
  300. 13908863082,
  301. 18822858108,
  302. 13510308789,
  303. 13510102070,
  304. 18419521214,
  305. 13877853333,
  306. 18351203222,
  307. 18261197555,
  308. 15815285757,
  309. 15261115522,
  310. 13903173981,
  311. 15132755552,
  312. 15019677099,
  313. 18862192899,
  314. 13678863811,
  315. 13983652278,
  316. 13856977511,
  317. 13589966223,
  318. 18337623210,
  319. 13979673333,
  320. 15007927777,
  321. 18837744446,
  322. 13950654999,
  323. 13861186488,
  324. 18870000005,
  325. 15158172221,
  326. 15824107733,
  327. 13790746666,
  328. 15802648889,
  329. 13808322226,
  330. 15823513000,
  331. 18883190766,
  332. 18883298278,
  333. 18375801115,
  334. 18375702233,
  335. 15023871222,
  336. 13779033333,
  337. 18872855555,
  338. 18270003333,
  339. 18886889988,
  340. 15777777783,
  341. 18881111115,
  342. 18882888802,
  343. 14799448888,
  344. 13688819128,
  345. 13688819693,
  346. ]
  347. user_agent_m = [
  348. 'Mozilla/5.0 (Linux; Android 8.1; PAR-AL00 Build/HUAWEIPAR-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044304 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/WIFI Language/zh_CN Process/tools',
  349. 'Mozilla/5.0 (Linux; Android 8.1; EML-AL00 Build/HUAWEIEML-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.143 Crosswalk/24.53.595.0 XWEB/358 MMWEBSDK/23 Mobile Safari/537.36 MicroMessenger/6.7.2.1340(0x2607023A) NetType/4G Language/zh_CN',
  350. 'Mozilla/5.0 (Linux; Android 8.0; MHA-AL00 Build/HUAWEIMHA-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044304 Mobile Safari/537.36 MicroMessenger/6.7.3.1360(0x26070333) NetType/4G Language/zh_CN Process/tools',
  351. 'Mozilla/5.0 (Linux; Android 5.1.1; vivo X6S A Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/6.2 TBS/044207 Mobile Safari/537.36 MicroMessenger/6.7.3.1340(0x26070332) NetType/4G Language/zh_CN Process/tools'
  352. ]
  353. ip_origin = ["中国联通", "中国移动", "中国电信"]
  354. c_devmodel_list = ['Mate10', 'P8青春', '荣耀7i', '畅玩7A', '荣耀8XMax',
  355. 'Mate10Pro', '荣耀10', 'M3青春', '荣耀8青春']
  356. class CrawlCompareWeb:
  357. """
  358. 比价网反爬严格,考虑换ip突破,此条有待考证
  359. 另一个查询历史价格接口:http://tool.manmanbuy.com/history.aspx?DA=1&action=gethistory&url=http%3a%2
  360. f%2fitem.tmall.com%2fitem.htm%3fid%3d532034800285&bjid=&spbh=&cxid=&zkid=&w=350&token=yva7088d209cdc
  361. bbbf30e6af9cf24005ce2dx
  362. 破解token就可以
  363. """
  364. def __init__(self, search_words, writer):
  365. self.start_url = "https://apapia-search.manmanbuy.com/index_json.ashx"
  366. self.decode_type = "utf-8"
  367. self.total_page = None
  368. self.writer = writer
  369. self.words = search_words
  370. self.search_words = quote(
  371. search_words, encoding=self.decode_type, errors="replace"
  372. )
  373. self.headers = {
  374. "Host": "apapia-search.manmanbuy.com",
  375. "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
  376. "Proxy-Connection": "close",
  377. "Cookie": "ASP.NET_SessionId=5nm1vf35xt2eisuhe2k0rm33; jjkcpnew111=cp98576765_1063811521_2018/9/26",
  378. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_4 like Mac OS X) AppleWebKit/605.1.15 "
  379. "(KHTML, like Gecko) Mobile/15F79 mmbWebBrowse",
  380. "Content-Length": "523",
  381. "Accept-Encoding": "gzip",
  382. "Connection": "close",
  383. }
  384. self.data = "jsoncallback=%3F&c_devmodel=iPhone%207&f1=&c_win=w_375_h_667&c_devid=C5707B0E-7A25-4BDF-BDF4-C64F8" "1711CAB&c_devtype=phone&f2=&key={}&iszy=&f3=&c_dp=2&f4=&c_devtoken=&c_channel=AppStore&f5=&" "smallclass=&f6=&methodName=getsearchkeylist&username=&c_operator=%E4%B8%AD%E5%9B%BD%E8%81%94%E" "9%80%9A&price2=&c_ostype=ios&c_engver=1.2.81&c_ctrl=w_search_form_f_search_product_content&page={}" "&sign={}&ppid=&price1=&c_contype=wifi&t={}&orderby=&c_osver=11.4&siteid=&c_appver=3.0.2"
  385. self.title_list = []
  386. self.mall_list = []
  387. self.iszy_list = []
  388. self.price_list = []
  389. self.sales_list = []
  390. self.prourl_list = []
  391. self.skuid_list = []
  392. self.itemid_list = []
  393. self.crawl_time_list = []
  394. self.comment_list = []
  395. def turn_page_get_info(self):
  396. for i in range(1, 10):
  397. t_1 = int(round(time.time() * 1000))
  398. t_2 = t_1 + random.randint(1, 5)
  399. data = self.data.format(self.search_words, i, t_1, t_2)
  400. response = requests.post(self.start_url, data=data, headers=self.headers)
  401. result_data = response.content.decode("utf-8")
  402. if result_data:
  403. datas = result_data.replace("'", "").replace("[", "").replace("]", "")
  404. for j in datas.split("}"):
  405. j = j.strip(",").strip("\n") + "}"
  406. if "img" in j and j:
  407. data_json = json.loads(j)
  408. id = data_json["id"]
  409. iszy = data_json["iszy"]
  410. siteid = data_json["siteid"]
  411. img = data_json["img"]
  412. image = data_json["image"]
  413. title = data_json["title"]
  414. price = data_json["price"]
  415. mall = data_json["mall"]
  416. sales = data_json["sales"]
  417. gourl = data_json["gourl"]
  418. prourl = data_json["prourl"]
  419. skuid = data_json["skuid"]
  420. itemid = data_json["itemid"]
  421. comment = data_json["comment"]
  422. crawl_time = time.strftime(
  423. "%Y%m%d%H%M%S", time.localtime(time.time())
  424. )
  425. self.title_list.append(title)
  426. self.mall_list.append(mall)
  427. self.iszy_list.append(iszy)
  428. self.price_list.append(price)
  429. self.sales_list.append(sales)
  430. self.prourl_list.append(prourl)
  431. self.skuid_list.append(skuid)
  432. self.itemid_list.append(itemid)
  433. self.comment_list.append(comment)
  434. self.crawl_time_list.append(crawl_time)
  435. time.sleep(random.uniform(2, 3))
  436. else:
  437. break
  438. def download_file(self):
  439. dataframe = pd.DataFrame(
  440. columns=["商品标题", "平台", "店铺", "价格", "销量", "评论量", "地址", "sku"]
  441. )
  442. dataframe["商品标题"] = self.title_list
  443. dataframe["平台"] = self.mall_list
  444. dataframe["店铺"] = self.iszy_list
  445. dataframe["价格"] = self.price_list
  446. dataframe["销量"] = self.sales_list
  447. dataframe["地址"] = self.prourl_list
  448. dataframe["sku"] = self.skuid_list
  449. dataframe["评论量"] = self.comment_list
  450. to_c_sheet = (
  451. self.words
  452. + "_"
  453. + "全网价格数据"
  454. + "_"
  455. + time.strftime("%m%d", time.localtime(time.time()))
  456. )
  457. dataframe.to_excel(
  458. self.writer, index=False, encoding="utf-8", sheet_name=to_c_sheet
  459. )
  460. print("数据写入完成,进程结束")
  461. class HistoryPriceSearch:
  462. def __init__(self, search_url, writer):
  463. self.search_preferential_url = "https://apapia-history.manmanbuy.com/ChromeWidgetServices/WidgetServices.ashx"
  464. self.search_price_url = "https://ext.henzanapp.com/api.html"
  465. self.t = int(time.time() * 1000)
  466. self.preferential_headers = {
  467. "Host": "apapia-history.manmanbuy.com",
  468. "Content-Type": "application/x-www-form-urlencoded; charset:utf-8",
  469. "Proxy-Connection": "close",
  470. "Cookie": "jjkcpnew111:cp44979114_1063811528_2018/10/18",
  471. "User-Agent": random.choice(user_agent_m),
  472. "Content-Length": "548",
  473. "Accept-Encoding": "gzip",
  474. "Connection": "close",
  475. }
  476. self.price_headers = {
  477. "Host": "ext.henzanapp.com",
  478. "Proxy-Connection": "close",
  479. "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
  480. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36",
  481. "Content-Length": "4550",
  482. "Accept-Encoding": "gzip",
  483. "Connection": "close",
  484. "Cookie": "mmzdd=482ef902b98b228c76a0f748e7deaa79",
  485. }
  486. self.preferential_data = {
  487. "c_devid": "C5707B0E-7A25-4BDF-BDF4-C64F81711CAB",
  488. "username": random.choice(phone_list),
  489. "ipage": "",
  490. "c_dp": "2",
  491. "c_engver": "1.2.83",
  492. "c_devtoken": "",
  493. "c_devmodel": random.choice(c_devmodel_list),
  494. "c_contype": "wifi",
  495. "c_win": "w_375_h_667",
  496. "t": self.t,
  497. "c_firstchannel": "AppStore_update",
  498. "p_url": search_url,
  499. "sign": "07E0CB3EF0B16E74",
  500. "c_ostype": "Android",
  501. "jsoncallback": "%3F",
  502. "c_ctrl": "w_search_trend0_f_content",
  503. "methodName": "getZhekou",
  504. "c_channel": "Google Play",
  505. "c_devtype": "Android",
  506. "c_operator": random.choice(ip_origin),
  507. "c_appver": "3.0.5",
  508. "c_firstquerendate": "1540799598929",
  509. "ipagesize": "6",
  510. "c_osver": "11.4",
  511. }
  512. self.price_data = {
  513. "tPrice": "",
  514. "toolbar_state": "open",
  515. "path1": "qihoo-mall-goodsinfo",
  516. "mid": "",
  517. "tSale": "",
  518. "fromTp": "0",
  519. "checkinfo": "c9f8d7a8a8d7e899d7c9a9d709d9d71999d71909d7f8d9d7c999d7c8a9d709d9d7d899d7d809d7d8d9d78899d79909d7d8d9d7c909d71909d7d8d9d78819d7e909d7e8d9d7f8a9d7e999d709d9d7b909d7b9a9d7e9d9d7e819d7c909d7d8d9d78809d7b9a9d7d8d9d7d899d7f819d7e8d9d7e8980909d7b919d7e8d9d7f89809d7b819d7d8d9d7e809d7d819d7d8d9d7c899d7c999d7e8d9d7a8a8d799b8d7a8a8d7db5c1ccc7bdbfbcb9baba8a8d7b9a8d7a8a8d7888868e89898a8a8d799b8d7a8a8d7dbbb1cac8c7bdc2ca8a8d7b9a8d7a8a8d7f8a9d7a9a9d7c8d9d79819d79919d7e8d9d7a809d7a909d7f8d9d7a8a8d799b8d7a8a8d7db5c9b6c7bdbbb1cac8c7bdc2ca8a8d7b9a8d7a8a8d7f819d7c909d7d8d9d7d909d7d8a9d7e8d9d7b919d709a9d7c8d9d798a9d798a9d7d8d9d7a8a8d799b8d7a8a8d7accbcb9b7b1dacdbec1c4cdbcba8a8d7b9a8d7a8a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a897977b9b0b0bbb0dbb0b0bfbe9881d2c6adca91b19ababec985c8aa8a9cae9a8d7f8d819f89809b8e8a8e9a8d7c81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a897977b9b0bea2d0c9b0b0bdb2d981d2c6adba91b2a6cab68ca3a1ba8a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a8589b0bead82d9b0b0bdb8cbcba2c6adca9fa19f85c0dac0aeba8a9cae9a8d7f8d819f89809b8e8a8e9a8d7b81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68f8d819f89809b8e8a8589b0b8cb899bb0b0b1daa5cba3c6adca91bdc1adb8cfadc4ca8a9cae9a8d7f8d819f89809b8e8a8e9a8d7b81ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7fb8c2c6888199c8888090d8888097bfb8c2c68bb1c8c7b5cdbcc1c588897977b9b0bea7cfb9b0b0b193a2bba2c6a3aa9ac1a1a3cabc968c898a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d78ccccc0c88a8d7a8a8d799b8d7a8a8d7fb5c1c1ccc4cdc5ca8a8d7b9a8d7a8a8d78809d7c819d719d9d7c8a9d7a919d7f8d9d7b909d71909d7f8d9d79809d7b819d7d8d9d7a8a8d799b8d7a8a8d7fb9bcc8c7c0cbca8a8d7b9a8d7a8a8d719e898a8a8d799b8d7a8a8d75cdc6ccc5cbba8a8d7b9a8d7a8a8d78899d7b809d7e8d9d70919d7b9a9d7c8d9d7b9a9d78809d7d8d9d7d809d7e8a9d709d9d7a8a8d799b8d7a8a8d7dbbb1cac8c4c9bdbaca8a8d7b9a8d7a8a8d7889898a8a8d799b8d7a8a8d75cdc6cdb4c9bbca8a8d7b9a8d7a8a8d7f819d799a9d7d8d9d788a9d70909d709d9d7f819d7f819d7e8d9d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7a8a8d799b8d7a8a8d7db5c9b6ccc6c9b0cbbacdb5ca8a8d7b9a8d7a8a8d768f809c8f8dac988a8d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7e9a8d7d94a1bcaba0aa8a8d799b8d7a8a8d7db5c9b6c7bccbbdccb7cac8ca8a8d7b9a8d7a8a8d768f809c8f8dac9a8a8d799b8d7a8a8d7acdbab5cdc6c7b5cdbcc1ca8a8d7b9a8d7a8a8d7c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7e9a8d7d94a1bcaba0aa8a8d799b8d7a8a8d7cb6c9bacaba8a8d7b9a8d7a8a8d7f8d819f89809b8e8a8a8a8d799b8d7a8a8d7cb1cacdb4c4cdbbc7c9bab7c9bcca8a8d7b9a8d7a8a8d7c88888e898a8b8a898a8a8d799b8d7a8a8d7cb1ccc9bbba8a8d7b9a8d7a8a8d7e898b9f8d7b9f8d7b9f8d7e898a8a8d799b8d7a8a8d7cb1cbba8a8d7b9a8d7a8a8d7a8a8d799b8d7a8a8d7ac9bec7bdc3cbca8a8d7b9a8d78899b8d7a8a8d7db4c9bba7c6ca8a8d7b9a8d7a8a8d7886898a8a8d799b8d7a8a8d76c7c1cbcacdbeca8a8d7b9a8d7a8a8d7a8a8d799b8d7a8a8d7ccbb1cacccbc1ccba8a8d7b9a8d79899b8d7a8a8d7db4c9bbabc1ca8a8d7b9a8d7a8a8d7fb8c2c6888199c88b8c80d88b8c87bfb8c2c68bb1c8c7b5cdbcc1c588897977b9b0bea7cfb9b0b0b193a2bba2c6a3aa9ac1a1a3cabc968c898a9cae9a8d7f8d819f89809b8e8a8e9a8d7981ce9a8d79baccc0ddbfb5c1ce9a8d75c7cbb686ccbbb1c4c9b68fb5c1ce9a8d7e9a8d799b8d7bc8ccccc0ca8a8d799b8d7a8a8d7bb1c8ca8a8d7b9a8d7a8a8d798a9d70999d7e8d9d7f809c8f8dac9d809d7b899d709d9d7f819d7d899d7d8d9d79919d788a9d7d8d9d7e8a9d7f819d7e8d9d7f819d7d899d7d8d9d7e8a9d7a9a9d7c8d9d7c899d709a9d7c8d9d7e819d7a899d709d9d7c999d7e919d7f8d9d719a9d799a9d7f8d9d79899d7c919d7e8d9d7d9a9d7b999d7e8d9d788a9d7e819d7e8d9d7d809d7b899d709d9d7e909d7c899d7d8d9d7d809d7b899d709d9d7b8a9d7d899d7d8d9d70909d71909d7f8d9d71999d7e919d719d9d7099888a8c909d70909d709d9d7b899d79899d709d9d7c9a9d7b809d719d9d71999d7e919d719d9d7a8a8d799b8d7a8a8d7db5c9b6ca8a8d7b9a8d7c8b89899b8d7a8a8d7dbbb1cac8ca8a8d7b9a8d79809c8c8c899b8d7a8a8d7cb1a8c7c0cbca8a8d7b9a8d7a8a8d7a999d7b909d7f8d9d71999d7c899d7d8d9d7a8a8d799b8d7a8a8d7db5c9b6a8c7c0cbca8a8d7a9f8",
  520. "prevpop": "",
  521. "bfrom": "normal",
  522. "url": search_url,
  523. "path2": "goodspricecmp",
  524. "tplmd5": "7330361958732444829",
  525. "hisOpn": "0",
  526. "isGulike": "0",
  527. "cv": "4.2.1.0",
  528. "ref": search_url,
  529. "v": "v5",
  530. "pop": "1",
  531. }
  532. self.writer = writer
  533. self.spname_list = []
  534. self.spprice_list = []
  535. self.dt_list = []
  536. self.infoid_list = []
  537. self.infotype_list = []
  538. self.sppic_list = []
  539. self.history_price_dict = {}
  540. self.search_price_start_date = None
  541. self.search_price_end_date = None
  542. def parser_history_preferential_info(self, pages=None):
  543. if pages is not None and isinstance(pages, int):
  544. for page in range(1, pages + 1):
  545. self.preferential_data["ipage"] = page
  546. response = requests.post(
  547. url=self.search_preferential_url,
  548. headers=self.preferential_headers,
  549. data=self.preferential_data,
  550. verify=False,
  551. )
  552. print(response.content.decode("utf-8"))
  553. if (
  554. response.status_code == 200
  555. and json.loads(response.content.decode("utf-8")).get("ok") == 1
  556. ):
  557. json_data = json.loads(response.content.decode("utf-8"))
  558. for i in json_data.get("zklist"):
  559. spname = i.get("spname")
  560. spprice = (
  561. i.get("spprice").replace("<p>", "").replace("</p>", "")
  562. )
  563. if i.get("dt"):
  564. timeArray = time.localtime(
  565. int(re.findall("\d+", i.get("dt"))[0]) / 1000
  566. )
  567. dt = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
  568. else:
  569. dt = None
  570. infoid = i.get("infoid")
  571. infotype = i.get("infotype")
  572. sppic = i.get("sppic")
  573. self.spname_list.append(spname)
  574. self.spprice_list.append(spprice)
  575. self.dt_list.append(dt)
  576. self.infoid_list.append(infoid)
  577. self.infotype_list.append(infotype)
  578. self.sppic_list.append(sppic)
  579. print(spname, spprice, dt, infoid, infotype, sppic)
  580. else:
  581. error_status_code = response.status_code
  582. print("凉了,被ban了~ 状态码:%s,自己看的办吧" % error_status_code)
  583. elif pages is None:
  584. page = 1
  585. self.preferential_data["ipage"] = page
  586. response = requests.post(
  587. url=self.search_preferential_url,
  588. headers=self.preferential_headers,
  589. data=self.preferential_data,
  590. verify=False,
  591. )
  592. json_data = json.loads(response.content.decode("utf-8"))
  593. print(self.search_preferential_url)
  594. print(self.preferential_headers)
  595. print(self.preferential_data)
  596. print(json_data)
  597. if response.status_code == 200 and json_data.get("ok") == 1:
  598. while json_data.get("ok") == 1:
  599. for i in json_data.get("zklist"):
  600. spname = i.get("spname")
  601. spprice = (
  602. i.get("spprice").replace("<p>", "").replace("</p>", "")
  603. )
  604. if i.get("dt"):
  605. timeArray = time.localtime(
  606. int(re.findall("\d+", i.get("dt"))[0]) / 1000
  607. )
  608. dt = time.strftime("%m-%d", timeArray)
  609. else:
  610. dt = None
  611. infoid = i.get("infoid")
  612. infotype = i.get("infotype")
  613. sppic = i.get("sppic")
  614. self.spname_list.append(spname)
  615. self.spprice_list.append(spprice)
  616. self.dt_list.append(dt)
  617. self.infoid_list.append(infoid)
  618. self.infotype_list.append(infotype)
  619. self.sppic_list.append(sppic)
  620. print(spname, spprice, dt, infoid, infotype, sppic)
  621. page += 1
  622. self.preferential_data["ipage"] = page
  623. response = requests.post(
  624. url=self.search_preferential_url,
  625. headers=self.preferential_headers,
  626. data=self.preferential_data,
  627. verify=False,
  628. timeout=5,
  629. )
  630. print(response.status_code)
  631. print(self.preferential_data["ipage"])
  632. json_data = json.loads(response.content.decode("utf-8"))
  633. if not json_data["zklist"]:
  634. break
  635. print(json_data)
  636. time.sleep(random.uniform(0.5, 1.0))
  637. else:
  638. print("该商品无历史优惠信息或User-Agent错误或触发反爬,请重试")
  639. else:
  640. print("数据抓取失败,洗洗睡吧")
  641. def parser_history_price_info(self):
  642. response = requests.post(
  643. url=self.search_price_url,
  644. data=self.price_data,
  645. headers=self.price_headers,
  646. verify=False,
  647. )
  648. print(response.content.decode("utf-8"))
  649. if (
  650. response.status_code == 200
  651. and json.loads(response.content.decode("utf-8")).get("pcinfo")
  652. ):
  653. json_data = json.loads(response.content.decode("utf-8"))
  654. print(json_data)
  655. self.search_price_start_date = json_data["pcinfo"]["bd"]
  656. self.search_price_end_date = json_data["pcinfo"]["ed"]
  657. for k in json_data["pcinfo"]["info"]:
  658. self.history_price_dict[k["dt"]] = k["pr"]
  659. else:
  660. print("数据为空,或者被ban~~")
  661. def download_preferential_info_data(self):
  662. dataframe = pd.DataFrame(
  663. columns=["名称", "优惠信息", "日期", "infoid", "infotype", "商品主图"]
  664. )
  665. dataframe["名称"] = self.spname_list
  666. dataframe["优惠信息"] = self.spprice_list
  667. dataframe["日期"] = self.dt_list
  668. dataframe["infoid"] = self.infoid_list
  669. dataframe["infotype"] = self.infotype_list
  670. dataframe["商品主图"] = self.sppic_list
  671. to_c_sheet = (
  672. "商品历史查询数据"
  673. + "_"
  674. + time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
  675. )
  676. dataframe.to_excel(
  677. self.writer, index=False, encoding="utf-8", sheet_name=to_c_sheet
  678. )
  679. print("数据写入完成,进程结束")
  680. pyplot.plot(self.dt_list, self.spprice_list)
  681. pyplot.xlabel('日期')
  682. pyplot.ylabel('价格')
  683. pyplot.title(self.spname_list[0])
  684. #将纵坐标等刻度划分
  685. #设置填充选项:参数分别对应横坐标,纵坐标,纵坐标填充起始值,填充颜色(可以有更多选项)
  686. pyplot.fill_between(self.dt_list, self.spprice_list, 10, color = 'white')
  687. #使横坐标逆序输出
  688. pyplot.gca().invert_xaxis()
  689. #显示图表
  690. now_time1 = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
  691. pyplot.savefig("./" + now_time1 +".jpg")
  692. print("已经保存")
  693. pyplot.show()
  694. def download_price_info_data(self):
  695. fig = plt.figure()
  696. mpl.rcParams["font.sans-serif"] = ["SimHei"]
  697. mpl.rcParams["axes.unicode_minus"] = False
  698. plt.rcParams["figure.figsize"] = (8.0, 4.0)
  699. print(self.history_price_dict)
  700. if self.history_price_dict:
  701. min_date = min(self.history_price_dict, key=self.history_price_dict.get)
  702. min_price = self.history_price_dict[
  703. min(self.history_price_dict, key=self.history_price_dict.get)
  704. ]
  705. max_date = max(self.history_price_dict, key=self.history_price_dict.get)
  706. max_price = self.history_price_dict[
  707. max(self.history_price_dict, key=self.history_price_dict.get)
  708. ]
  709. plt.plot(self.history_price_dict.keys(), self.history_price_dict.values())
  710. plt.text(
  711. min(self.history_price_dict, key=self.history_price_dict.get),
  712. self.history_price_dict[
  713. min(self.history_price_dict, key=self.history_price_dict.get)
  714. ],
  715. min(self.history_price_dict, key=self.history_price_dict.get),
  716. ha="right",
  717. va="bottom",
  718. fontsize=10,
  719. )
  720. plt.text(
  721. min(self.history_price_dict, key=self.history_price_dict.get),
  722. self.history_price_dict[
  723. min(self.history_price_dict, key=self.history_price_dict.get)
  724. ],
  725. self.history_price_dict[
  726. min(self.history_price_dict, key=self.history_price_dict.get)
  727. ],
  728. ha="left",
  729. va="bottom",
  730. fontsize=10,
  731. )
  732. plt.title("历史价格分布")
  733. plt.xlabel("日期")
  734. plt.ylabel("金额")
  735. fig.savefig("foo.png")
  736. fig_title = (
  737. time.strftime("%m%d", time.localtime(time.time())) + "历史价格查询"
  738. )
  739. sheet = self.writer.book.add_worksheet(fig_title)
  740. sheet.insert_image(0, 0, "foo.png")
  741. else:
  742. print('数据为空,该商品未被收录')
  743. class Application:
  744. def __init__(self):
  745. self.window = Tk()
  746. self.text = Text(self.window)
  747. # 设置窗口大小和位置
  748. self.window.title("阳光价格")
  749. self.window.geometry("290x430+500+280")
  750. self.window.minsize(290, 380)
  751. # 创建一个文本框
  752. self.entry = Text(self.window)
  753. self.entry.place(x=10, y=10, width=200, height=165)
  754. self.entry.bind("<Key-Return>")
  755. # 创建历史溯源按钮
  756. self.submit_btn5 = Button(self.window, text=u"历史溯源", command=self.submit_5)
  757. self.submit_btn5.place(x=220, y=150, width=60, height=25)
  758. # 翻译结果标题
  759. self.title_label = Label(self.window, text=u"运行日志:")
  760. self.title_label.place(x=10, y=180)
  761. # 翻译结果
  762. self.result_text = Text(self.window, background="#ccc")
  763. self.result_text.place(x=10, y=205, width=270, height=205)
  764. # 所属标签
  765. self.title_label = Label(self.window, text=u"2019_a7_price ")
  766. self.title_label.place(x=60, y=410)
  767. self.file_path = None
  768. self.writer = None
  769. def submit_5(self):
  770. # 从输入框获取用户输入的值
  771. self.result_text.delete(0.0, END)
  772. self.file_path = "./" + now_date + "-" + "历史溯源" + ".xlsx"
  773. self.writer = pd.ExcelWriter(self.file_path)
  774. try:
  775. key_title = (
  776. self.entry.get(0.0, END)
  777. .strip()
  778. .replace("\n", " ")
  779. .replace(",", ",")
  780. .split(",")
  781. )
  782. for key in key_title:
  783. if key:
  784. # 将值传入对象
  785. self.result_text.delete(0.0, END)
  786. search_history = HistoryPriceSearch(key, self.writer)
  787. # 抓取历史趋势
  788. search_history.parser_history_preferential_info()
  789. search_history.parser_history_price_info()
  790. search_history.download_preferential_info_data()
  791. search_history.download_price_info_data()
  792. # 下载趋势数据
  793. log_1 = (
  794. "历史价格搜索中"
  795. + "\n"
  796. + "开始下载数据中…………"
  797. + "\n"
  798. + "下载数据请在跟程序处于相同位置查找,文件名为【当前时间+历史趋势】"
  799. )
  800. self.result_text.insert(END, log_1)
  801. except ValueError as e:
  802. self.result_text.delete(0.0, END)
  803. log = (
  804. "log: " + now_time + " " + "查询异常 " + str(e) + "\n" + "请检查键入格式: 国产红富士"
  805. )
  806. self.result_text.insert(END, log)
  807. except KeyError as e:
  808. self.result_text.delete(0.0, END)
  809. log = (
  810. "log: " + now_time + " " + "查询异常 " + str(e) + "\n" + "请检查键入格式: 烟台红富士"
  811. )
  812. self.result_text.insert(END, log)
  813. except error.HTTPError as e:
  814. self.result_text.delete(0.0, END)
  815. log = "log: " + now_time + " " + "URL异常 " + str(e) + "更换关键词重试"
  816. self.result_text.insert(END, log)
  817. except error.URLError as e:
  818. self.result_text.delete(0.0, END)
  819. log = (
  820. "log: "
  821. + now_time
  822. + " "
  823. + "请求异常 "
  824. + str(e)
  825. + "\n"
  826. + "查询太频繁啦~请稍后重新或换IP重试"
  827. )
  828. self.result_text.insert(END, log)
  829. finally:
  830. self.writer.save()
  831. log = "\n" + "log: " + now_time + " " + "数据下载结束,请在程序所处位置查收Excel"
  832. self.result_text.insert(END, log)
  833. def run(self):
  834. self.window.mainloop()
  835. app = Application()
  836. app.run()