Source code for torvend.spiders.skytorrents
# Copyright (c) 2017 Stephen Bunn (stephen@bunn.io)
# MIT License <https://opensource.org/licenses/MIT>
import re
from .. import (items,)
from ._common import (BaseSpider,)
import furl
import scrapy
[docs]class SkyTorrentsSpider(BaseSpider):
name = 'skytorrents'
allowed_domains = [
'skytorrents.in',
]
@property
def paging_index(self):
""" Required property for paging indexing.
:returns: The starting index of pages
:rtype: int
"""
return 1
@property
def paging_results(self):
""" Required property for paging results.
:returns: The number of results per queried page
:rtype: int
"""
return 40
@property
def query_scheme(self):
""" Required property for query scheme.
:returns: The scheme the query needs
:rtype: str
"""
return 'https'
@property
def query_path(self):
""" Required property for the query path.
:returns: The path the query needs
:rtype: str
"""
return '/search/all/ed/{page}/{query}/?l=en-us'
[docs] def parse(self, response):
""" Required first level page parser.
:param response: The response instance from ``start_requests``
:type response: scrapy.Request
:returns: Yields additional scrapy requests
:rtype: list[scrapy.Request]
"""
soup = self.get_soup(response.text)
try:
results = soup\
.find('div', {'class': 'columns'})\
.find_all('div', {'class': 'column'})[1]\
.find('table')\
.find_all('tr')[1:]
except AttributeError:
return
for result in results:
torrent = items.Torrent(spider=self.name)
(name_link, magnet_link,) = result.find('td').find_all('a')[:2]
torrent['name'] = name_link.text.strip()
torrent['source'] = furl.furl(response.url).set(
path=name_link.attrs['href'], args={}
).url
torrent['magnet'] = magnet_link.attrs['href']
torrent['hash'] = self.parse_infohash(torrent['magnet'])
(size_div, _, uploaded_div, seeders_div, leechers_div,) = \
result.find_all('td')[1:]
torrent['size'] = self.parse_size(size_div.text.strip())
torrent['uploaded'] = self.parse_datetime(
uploaded_div.text.strip(),
formats=[
'%m %b %Y',
]
)
torrent['seeders'] = int(seeders_div.text.strip())
torrent['leechers'] = int(leechers_div.text.strip())
# NOTE: skytorrents.in does not categorize torrents
torrent['categories'] = [items.TorrentCategory.Unknown]
torrent['uploader'] = None
yield torrent