Source code for torvend.spiders.limetorrents
# Copyright (c) 2017 Stephen Bunn (stephen@bunn.io)
# MIT License <https://opensource.org/licenses/MIT>
import re
from .. import (items,)
from ._common import (BaseSpider,)
import furl
import scrapy
[docs]class LimeTorrentsSpider(BaseSpider):
name = 'limetorrents'
allowed_domains = [
'www.limetorrents.cc',
]
_category_map = {
'movies': items.TorrentCategory.Video,
'tv shows': items.TorrentCategory.Video,
'music': items.TorrentCategory.Audio,
'applications': items.TorrentCategory.Application,
'games': items.TorrentCategory.Game,
'anime': items.TorrentCategory.Video,
'other': items.TorrentCategory.Unknown,
}
@property
def paging_index(self):
""" Required property for paging indexing.
:returns: The starting index of pages
:rtype: int
"""
return 1
@property
def paging_results(self):
""" Required property for paging results.
:returns: The number of results per queried page
:rtype: int
"""
return 36
@property
def query_scheme(self):
""" Required property for query scheme.
:returns: The scheme the query needs
:rtype: str
"""
return 'http'
@property
def query_path(self):
""" Required property for the query path.
:returns: The path the query needs
:rtype: str
"""
return '/search/all/{query}/seeds/{page}/'
def _parse_torrent(self, response):
""" Handle parsing torrent info.
:param response: The response instance from ``start_requests``
:type response: scrapy.Request
:returns: Yields torrent items
:rtype: list[items.Torrent]
"""
torrent = response.meta['torrent']
soup = self.get_soup(response.text)
result = soup.find('div', {'class': 'torrentinfo'})
result_table = result.find('table')
torrent['hash'] = result_table\
.find('tr')\
.find_all('td')[-1].contents[0].lower()
torrent['magnet'] = result\
.find('a', {'href': re.compile(r'^magnet:.*')}).attrs['href']
torrent['categories'] = [
self._category_map.get(
result_table.find_all('tr')[1].find_all(
'td'
)[-1].find('a').contents[0].strip().lower(),
items.TorrentCategory.Unknown
)
]
# handle missing torrent fields
(torrent['uploaded'], torrent['uploader'],) = (None, None,)
yield torrent
[docs] def parse(self, response):
""" Required first level page parser.
:param response: The response instance from ``start_requests``
:type response: scrapy.Request
:returns: Yields additional scrapy requests
:rtype: list[scrapy.Request]
"""
soup = self.get_soup(response.text)
try:
results = soup\
.find('table', {'class': 'table2'})\
.find_all('tr')[1:]
except AttributeError:
return
for result in results:
torrent = items.Torrent(spider=self.name)
name_link = result.find(
'div', {'class': 'tt-name'}
).find_all('a')[-1]
torrent['name'] = name_link.contents[0].strip()
torrent['source'] = furl.furl(response.url).set(
path=name_link.attrs['href'], args={}
).url
torrent['size'] = self.parse_size(result.find_all(
'td', {'class': 'tdnormal'}
)[-1].contents[0].strip())
torrent['seeders'] = int(result.find(
'td', {'class': 'tdseed'}
).contents[0].strip().replace(',', ''))
torrent['leechers'] = int(result.find(
'td', {'class': 'tdleech'}
).contents[0].strip().replace(',', ''))
torrent_request = scrapy.Request(
torrent['source'],
callback=self._parse_torrent
)
torrent_request.meta['torrent'] = torrent
yield torrent_request