#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @Author : liuyuqi @Contact : liuyuqi.gov@msn.cn @Time : 2020/04/06 22:13:01 @Version : 1.0 @License : Copyright © 2017-2020 liuyuqi. All Rights Reserved. @Desc : 爬取螺蛳粉天猫数据 ''' # 导入所需包 from selenium import webdriver import parsel import re import time import pandas as pd def login_taobao_acount(): # 打开浏览器 global browser browser = webdriver.Chrome() # 登录URL login_url = 'https://login.taobao.com/member/login.jhtml' # 打开网页 browser.get(login_url) # 支付宝登录 browser.find_element_by_class_name('alipay-login').click() def get_assigned_page(key_words): # 获取淘宝URL tb_url = 'https://www.taobao.com/' # 打开淘宝网 browser.get(tb_url) # 定位搜索框,输入数据 s_bar = browser.find_element_by_xpath('//*[@id="q"]') s_bar.send_keys('{}'.format(key_words)) # 点击搜索 browser.find_element_by_xpath('//*[@id="J_TSearchForm"]/div[1]/button').click() def get_one_page(): # 先获取第一页的信息 html = parsel.Selector(browser.page_source) # 获取数据 goods_name = html.xpath('//div[@class="grid g-clearfix"]//img/@alt').extract() shop_name = html.xpath('//div[@class="grid g-clearfix"]//div[@class="shop"]/a/span[2]/text()').extract() price = html.xpath('//div[@class="grid g-clearfix"]//div[contains(@class,"price")]/strong/text()').extract() purchase_num = [re.findall(r'