Source code for torvend.spiders.torrentz2
# Copyright (c) 2017 Stephen Bunn (stephen@bunn.io)
# MIT License <https://opensource.org/licenses/MIT>
from .. import (items,)
from ._common import (BaseSpider,)
import furl
[docs]class Torrentz2Spider(BaseSpider):
""" The spider for torrentz2.eu.
"""
name = 'torrentz2'
allowed_domains = [
'torrentz2.eu',
]
_category_map = {
'audio': items.TorrentCategory.Audio,
'video': items.TorrentCategory.Video,
'application': items.TorrentCategory.Application,
'ebook': items.TorrentCategory.Book,
'adult': items.TorrentCategory.Adult,
'images': items.TorrentCategory.Image,
'game': items.TorrentCategory.Game,
}
@property
def paging_index(self):
""" Required property for paging indexing.
:returns: The starting index of pages
:rtype: int
"""
return 0
@property
def paging_results(self):
""" Required property for paging results.
:returns: The number of results per queried page
:rtype: int
"""
return 50
@property
def query_scheme(self):
""" Required property for query scheme.
:returns: The scheme the query needs
:rtype: str
"""
return 'https'
@property
def query_path(self):
""" Required property for the query path.
:returns: The path the query needs
:rtype: str
"""
return '/search/?f={query}&p={page}'
[docs] def parse(self, response):
""" Required first level page parser.
:param response: The response instance from ``start_requests``
:type response: scrapy.Request
:returns: Yields torrent items
:rtype: list[items.Torrent]
"""
soup = self.get_soup(response.text)
try:
results = soup\
.find('div', {'class': 'results'})\
.find_all('dl')
except AttributeError:
return
for result in results:
torrent = items.Torrent(spider=self.name)
result_links = result.find('a')
torrent['name'] = result_links.contents[0].strip()
info_hash = furl.furl(
result_links.attrs['href']
).path.segments[-1]
torrent['hash'] = info_hash.lower()
torrent['magnet'] = (
'magnet:?xt=urn:btih:{info_hash}&dn'
).format(**locals())
torrent['source'] = furl.furl(response.url).set(
path=info_hash, args={}
).url
result_desc = result.find('dt')
if len(result_desc.contents[-1]) > 1 and \
result_desc.contents[-1].lstrip().startswith('ยป'):
torrent['categories'] = [
self._category_map.get(
keyword.lower(),
items.TorrentCategory.Unknown
)
for keyword in result_desc.contents[-1].split(' ')[2:]
]
if len(torrent['categories']) <= 0:
torrent['categories'] = [items.TorrentCategory.Unknown]
(_, uploaded, size, seeders, leechers,) = tuple([
column.contents[0]
for column in result.find('dd').find_all('span')
])
torrent['uploaded'] = self.parse_datetime((
'{uploaded} ago'
).format(**locals()))
torrent['size'] = self.parse_size(size)
(torrent['seeders'], torrent['leechers'],) = (
int(seeders.replace(',', '')),
int(leechers.replace(',', '')),
)
yield torrent