Source code for torvend.spiders.torrentz2

# Copyright (c) 2017 Stephen Bunn (stephen@bunn.io)
# MIT License <https://opensource.org/licenses/MIT>

from .. import (items,)
from ._common import (BaseSpider,)

import furl


[docs]class Torrentz2Spider(BaseSpider): """ The spider for torrentz2.eu. """ name = 'torrentz2' allowed_domains = [ 'torrentz2.eu', ] _category_map = { 'audio': items.TorrentCategory.Audio, 'video': items.TorrentCategory.Video, 'application': items.TorrentCategory.Application, 'ebook': items.TorrentCategory.Book, 'adult': items.TorrentCategory.Adult, 'images': items.TorrentCategory.Image, 'game': items.TorrentCategory.Game, } @property def paging_index(self): """ Required property for paging indexing. :returns: The starting index of pages :rtype: int """ return 0 @property def paging_results(self): """ Required property for paging results. :returns: The number of results per queried page :rtype: int """ return 50 @property def query_scheme(self): """ Required property for query scheme. :returns: The scheme the query needs :rtype: str """ return 'https' @property def query_path(self): """ Required property for the query path. :returns: The path the query needs :rtype: str """ return '/search/?f={query}&p={page}'
[docs] def parse(self, response): """ Required first level page parser. :param response: The response instance from ``start_requests`` :type response: scrapy.Request :returns: Yields torrent items :rtype: list[items.Torrent] """ soup = self.get_soup(response.text) try: results = soup\ .find('div', {'class': 'results'})\ .find_all('dl') except AttributeError: return for result in results: torrent = items.Torrent(spider=self.name) result_links = result.find('a') torrent['name'] = result_links.contents[0].strip() info_hash = furl.furl( result_links.attrs['href'] ).path.segments[-1] torrent['hash'] = info_hash.lower() torrent['magnet'] = ( 'magnet:?xt=urn:btih:{info_hash}&dn' ).format(**locals()) torrent['source'] = furl.furl(response.url).set( path=info_hash, args={} ).url result_desc = result.find('dt') if len(result_desc.contents[-1]) > 1 and \ result_desc.contents[-1].lstrip().startswith('ยป'): torrent['categories'] = [ self._category_map.get( keyword.lower(), items.TorrentCategory.Unknown ) for keyword in result_desc.contents[-1].split(' ')[2:] ] if len(torrent['categories']) <= 0: torrent['categories'] = [items.TorrentCategory.Unknown] (_, uploaded, size, seeders, leechers,) = tuple([ column.contents[0] for column in result.find('dd').find_all('span') ]) torrent['uploaded'] = self.parse_datetime(( '{uploaded} ago' ).format(**locals())) torrent['size'] = self.parse_size(size) (torrent['seeders'], torrent['leechers'],) = ( int(seeders.replace(',', '')), int(leechers.replace(',', '')), ) yield torrent