coser2.py 809 B

12345678910111213141516171819202122
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from scrapy.contrib.loader import ItemLoader, Identity
  4. from fun.items import CoserItem
  5. class CoserSpider(scrapy.Spider):
  6. name = "coser2"
  7. allowed_domains = ["bcy.net"]
  8. start_urls = (
  9. 'http://bcy.net/coser/detail/9495/130440',
  10. )
  11. def parse(self, response):
  12. l = ItemLoader(item=CoserItem(), response=response)
  13. l.add_xpath('name', "//h1[@class='js-post-title']/text()")
  14. l.add_xpath('info', "//div[@class='post__info']/div[@class='post__type post__info-group']/span/text()")
  15. urls = l.get_xpath('//img[@class="detail_std detail_clickable"]/@src')
  16. urls = [url.replace('/w650', '') for url in urls]
  17. l.add_value('image_urls', urls)
  18. l.add_value('url', response.url)
  19. return l.load_item()