class get_torrent(ItemElement): klass = Torrent obj_name = CleanText('//h2[has-class("h2fiche")]', default=NotAvailable) obj_description = CleanHTML('//div[@id="textefiche"]', default=NotAvailable) obj_seeders = CleanText( '//div[@id="infosficher"]//span[has-class("seed_ok")]') & Type( type=int) obj_leechers = CleanText('(//div[@id="infosficher"]/span)[3]') & Type( type=int) obj_magnet = NotAvailable obj_id = Regexp(CleanText('//h2[has-class("h2fiche")]/a/@href'), '.*dl-torrent/(.*).html') obj_url = Format('http://www.cpasbien.cm%s', CleanText('//a[@id="telecharger"]/@href')) def obj_size(self): rawsize = CleanText('(//div[@id="infosficher"]/span)[1]')(self) rawsize = rawsize.replace(',', '.').strip() nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper().replace('O', 'B') size = get_bytes_size(nsize, usize) return size obj_files = NotAvailable obj_filename = CleanText(Regexp( CleanText('//a[@id="telecharger"]/@href'), '.*telechargement/(.*)'), default=NotAvailable)
class get_torrent(ItemElement): klass = Torrent def obj_id(self): return self.page.url.split('/')[-1] def obj_url(self): return NotAvailable obj_name = CleanText('//div[@id="title"]') obj_magnet = CleanText( '//div[@class="download"]/a[starts-with(@href, "magnet:")]/@href') obj_date = Date( CleanText( '//div[@id="details"]//dt[.="Uploaded:"]/following-sibling::dd[1]' )) obj_size = Type(Regexp( CleanText( '//div[@id="details"]//dt[.="Size:"]/following-sibling::dd[1]' ), r'\((\d+) Bytes\)', '\\1'), type=float) obj_seeders = Type(CleanText( '//div[@id="details"]//dt[.="Seeders:"]/following-sibling::dd[1]'), type=int) obj_leechers = Type(CleanText( '//div[@id="details"]//dt[.="Leechers:"]/following-sibling::dd[1]' ), type=int) obj_description = RawText('//div[@class="nfo"]/pre', children=True)
class item(ItemElement): klass = Torrent obj_id = Regexp(CleanText('.//a[has-class("titre")]/@href'), '.*dl-torrent/(.*).html') obj_name = CleanText('.//a[has-class("titre")]', default=NotAvailable) obj_magnet = NotAvailable obj_seeders = CleanText('.//div[has-class("up")]', default=NotAvailable) & Type(type=int) obj_leechers = CleanText('.//div[has-class("down")]', default=NotAvailable) & Type(type=int) obj_description = NotLoaded obj_files = NotLoaded def obj_url(self): href = CleanText('.//a[has-class("titre")]/@href')(self) subid = href.split('/')[-1].replace('.html', '.torrent') return 'http://www.cpasbien.cm/telechargement/%s' % subid def obj_size(self): rawsize = CleanText('./div[has-class("poid")]')(self) rawsize = rawsize.replace(',', '.').strip() nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper().replace('O', 'B') size = get_bytes_size(nsize, usize) return size obj_filename = Format( '%s.torrent', Regexp(CleanText('.//a[has-class("titre")]/@href'), '/([^/]*)\.html'))
class get_location(ItemElement): klass = IpLocation obj_id = Regexp( CleanText('//h1/strong[starts-with(.,"IP Address Information")]'), r'- ([.\d]+)') obj_city = CleanText('//td[.//strong[text()="City"]]', children=False) obj_country = CleanText('//td[.//strong[text()="Country"]]', children=False) obj_region = CleanText('//td[.//strong[text()="Region"]]', children=False) obj_zipcode = CleanText('//td[.//strong[text()="Postcode"]]', children=False) obj_host = CleanText('//td[.//strong[text()="Domain Name"]]', children=False, default=NotAvailable) obj_isp = CleanText('//td[.//strong[text()="ISP"]]', children=False) obj_lt = Regexp( CleanText('//td[.//strong[text()="Coordinates of City"]]', children=False), r'\(([\d.-]+), [\d.-]+\)') & Type(type=float) obj_lg = Regexp( CleanText('//td[.//strong[text()="Coordinates of City"]]', children=False), r'\([\d.-]+, ([\d.-]+)\)') & Type(type=float)
class get_recipe(ItemElement): klass = Recipe obj_id = Env('_id') obj_title = CleanText('//h1') obj_preparation_time = Type(Regexp(CleanText('//li[@class="time"]/span'), ".* (\d*) min"), type=int) obj_cooking_time = Type(Regexp(CleanText('//li[@class="time-cooking"]/span'), ".* (\d*) min"), type=int) def obj_nb_person(self): nb_pers = Regexp(CleanText('//div[@class="row ingredients"]/div/p'), '.*pour (\d+) personnes', default=0)(self) return [nb_pers] if nb_pers else NotAvailable def obj_ingredients(self): i = [] ingredients = XPath('//ul[@class="ingredientsList"]/li', default=[])(self) for ingredient in ingredients: i.append(CleanText('.')(ingredient)) return i obj_instructions = Join(u'\n- ', '//div[@class="recipe-prepa"]/ol/li', newline=True, addBefore='- ') obj_thumbnail_url = CleanText('//div[has-class("toprecipeImage")]/img/@src', default=NotAvailable) obj_picture_url = CleanText('//div[has-class("toprecipeImage")]/img/@src', default=NotAvailable)
class get_recipe(ItemElement): klass = Recipe obj_id = Env('id') obj_title = CleanText('//h1[@class="m_title"]') obj_preparation_time = Type(CleanText('//span[@class="preptime"]'), type=int) obj_cooking_time = Type(CleanText('//span[@class="cooktime"]'), type=int) def obj_nb_person(self): nb_pers = Regexp(CleanText( '//p[@class="m_content_recette_ingredients"]/span[1]'), '.*\(pour (\d+) personnes\)', default=0)(self) return [nb_pers] if nb_pers else NotAvailable def obj_ingredients(self): ingredients = CleanText( '//p[@class="m_content_recette_ingredients"]', default='')(self).split('-') if len(ingredients) > 1: return ingredients[1:] obj_instructions = CleanHTML('//div[@class="m_content_recette_todo"]') obj_thumbnail_url = CleanText( '//a[@class="m_content_recette_illu"]/img/@src', default=NotAvailable) obj_picture_url = CleanText( '//a[@class="m_content_recette_illu"]/img/@src', default=NotAvailable)
class item(ItemElement): klass = Torrent obj_id = Regexp(CleanText('./td[2]/div/a[@class="detLink"]/@href'), r'^/torrent/(\d+)/', '\\1') obj_name = Regexp(CleanText('./td[2]/div/a[@class="detLink"]/@title'), r'Details for (.*)$', '\\1') obj_magnet = CleanText('./td[2]/a[title="Download this torrent using magnet"]/@href') obj_date = Date(Regexp(CleanText('./td[2]/font'), r'Uploaded ([^,]+),', '\\1'), fuzzy=True) obj_seeders = Type(CleanText('./td[3]'), type=int) obj_leechers = Type(CleanText('./td[4]'), type=int) def obj_size(self): value, unit = Regexp(CleanText('./td[2]/font'), r'Size ([\d\.]+ [^,]+),', '\\1')(self).split(' ') return get_bytes_size(float(value), unit)
class get_torrent(ItemElement): klass = Torrent def obj_description(self): desctxt = CleanHTML('//div[has-class("description")]/article')( self) strippedlines = '\n'.join([ s.strip() for s in desctxt.split('\n') if re.search(r'\[[0-9]+\]', s) is None ]) description = re.sub(r'\s\s+', '\n\n', strippedlines) return description obj_name = CleanText( '//div[has-class("torrentDetails")]/h2/span/text()') obj_id = CleanText('//input[@id="torrent-id"][1]/@value') def obj_url(self): fullid = CleanText('//input[@id="torrent-id"][1]/@value')(self) downurl = 'https://www.t411.in/torrents/download/?id=%s' % fullid return downurl obj_filename = CleanText( '//div[@class="accordion"]//tr[th="Torrent"]/td') def obj_size(self): rawsize = CleanText( '//div[@class="accordion"]//tr[th="Taille totale"]/td')(self) nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper() size = get_bytes_size(nsize, usize) return size def obj_files(self): res = [] for f in Type( '//div[@class="accordion"]/h3[text()="Liste des Fichiers"]\ /following-sibling::div[1]//tr', type=list)(self)[1:]: res.append(CleanText(f)(self)) return res obj_seeders = CleanText( '//div[@class="details"]//td[@class="up"]') & Type(type=int) obj_leechers = CleanText( '//div[@class="details"]//td[@class="down"]') & Type(type=int) obj_magnet = NotAvailable
def obj_price(self): if not self.env['_json']: return return Type(CleanText(Dict('offers/price', default="0")), type=float, default=0)(self.env['_json'])
class item(ItemElement): klass = Torrent obj_id = Regexp(CleanText('./dt/a/@href'), r'/([0-9a-f]+)', '\\1') obj_name = CleanText('./dt/a') obj_date = CleanText('./dd/span[2]/@title') & Date(default=None, parse_func=parse_timestamp) obj_seeders = CleanText('./dd/span[4]', replace=[(',', '')]) & Type(type=int) obj_leechers = CleanText('./dd/span[5]', replace=[(',', '')]) & Type(type=int) def obj_size(self): data = CleanText('./dd/span[3]')(self) if data: value, unit = data.split() return get_bytes_size(float(value), unit) else: return float("NaN")
def obj_files(self): res = [] for f in Type( '//div[@class="accordion"]/h3[text()="Liste des Fichiers"]\ /following-sibling::div[1]//tr', type=list)(self)[1:]: res.append(CleanText(f)(self)) return res
class get_torrent(ItemElement): klass = Torrent obj_description = CleanText('//div[@id="desc"]', default=NotAvailable) obj_seeders = CleanText( '(//div[has-class("seedBlock")]/strong)[1]') & Type(type=int) obj_leechers = CleanText( '(//div[has-class("leechBlock")]/strong)[1]') & Type(type=int) obj_name = CleanText('//h1[has-class("novertmarg")]//span', default=NotAvailable) obj_magnet = CleanText( '//div[has-class("downloadButtonGroup")]//a[starts-with(@href,"magnet")]/@href', default=NotAvailable) obj_id = Regexp(CleanText('//h1[has-class("novertmarg")]/a/@href'), '.*-t([0-9]*)\.html') def obj_url(self): href = CleanText( '//div[has-class("downloadButtonGroup")]//a[starts-with(@href,"//")]/@href' )(self) return u'https:%s' % href def obj_size(self): rawsize = CleanText( '//span[has-class("folder") or has-class("folderopen")]')(self) rawsize = rawsize.split(': ')[-1].split(')')[0].strip() rawsize = rawsize.replace(',', '.') nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper() size = get_bytes_size(nsize, usize) return size def obj_files(self): res = [] for f in Type('//td[has-class("torFileName")]', type=list)(self): res.append(CleanText(f)(self)) return res obj_filename = CleanText(Regexp( CleanText( '//div[has-class("downloadButtonGroup")]//a[starts-with(@href,"//")]/@href' ), '.*title=(.*)'), default=NotAvailable)
class item(ItemElement): klass = Subtitle obj_name = CleanText('.//td/a[@alt="Subtitles\' page"]') obj_nb_cd = Type(CleanText(TableCell('cd')), type=int) obj_language = CleanText(TableCell('language')) obj_url = AbsoluteLink( './/td/div[has-class("pull-left")]/a[@alt="Download subtitles."]' ) obj_id = Regexp(Field('url'), r'/(-*\w*)/download$', r'\1')
class item(ItemElement): klass = Account obj_id = Regexp(Attr('.//a', 'href'), r'(\d+)') & Type(type=int) obj_label = CleanText('./td[1]') obj_balance = CleanDecimal('./td[2]', replace_dots=True) def obj_url(self): return (u'%s%s' % (self.page.browser.BASEURL, Link(u'.//a[1]')(self)))
class get_torrent(ItemElement): klass = Torrent obj_id = Regexp(CleanText('//div[@class="trackers"]/h2'), r'hash ([0-9a-f]+)', '\\1') obj_name = CleanText('//div[@class="downlinks"]/h2/span') obj_date = CleanText('//div[@class="downlinks"]/div/span/@title') & Date(default=None) obj_size = CleanText('//div[@class="files"]/div/@title', replace=[(',', ''), ('b', '')]) & \ Type(type=float) def obj_seeders(self): try: return max([int(_.text.replace(',', '')) for _ in self.xpath('//div[@class="trackers"]/dl/dd/span[@class="u"]')]) except ValueError: return NotAvailable def obj_leechers(self): try: return max([int(_.text.replace(',', '')) for _ in self.xpath('//div[@class="trackers"]/dl/dd/span[@class="d"]')]) except ValueError: return NotAvailable def obj_url(self): return self.page.browser.BASEURL + \ Regexp(CleanText('//div[@class="trackers"]/h2'), r'hash ([0-9a-f]+)', '\\1')(self) def obj_files(self): def traverse_nested_lists(ul, result, depth=0): for li in ul.xpath('./li'): sub_uls = li.xpath('./ul') if sub_uls: result.append(("| " * depth) + ("%s" % li.text)) for sub_ul in sub_uls: traverse_nested_lists(sub_ul, result, depth+1) else: try: size = li.xpath('span')[0].text except: size = "" result.append(("| " * depth) + ("%s [%s]" % (li.text, size))) result = [] traverse_nested_lists(self.xpath('//div[@class="files"]/ul')[0], result) return result def obj_magnet(self): hsh = Regexp(CleanText('//div[@class="trackers"]/h2'), r'hash ([0-9a-f]+)', '\\1')(self) name = "dn=%s" % quote_plus(CleanText('//div[@class="downlinks"]/h2/span')(self)) trackers = ["tr=%s" % _.text for _ in self.xpath('//div[@class="trackers"]/dl/dt')] return "&".join(["magnet:?xt=urn:btih:%s" % hsh, name] + trackers) def obj_description(self): return u"Torrent files available at:\n" + \ u"\n\n".join(self.xpath('//div[@class="downlinks"]/dl/dt/a/@href'))
class item(ItemElement): klass = Torrent obj_id = Regexp(CleanText('./td[2]/a/@href'), '/torrents/([0-9]+)/(\w+)', '\\1') obj_name = Regexp(CleanText('./td[2]/a/@href'), '/torrents/([0-9]+)/([-\w]+)', '\\2') obj_seeders = CleanText('./td[6]/span[text()]') & Type(type=int) obj_leechers = CleanText('./td[7]/span[text()]') & Type(type=int) obj_description = NotLoaded obj_files = NotLoaded obj_filename = Format('%s.torrent', obj_name) obj_magnet = NotAvailable obj_url = CleanText('./td[2]/a/@href') def obj_size(self): rawsize = CleanText('./td[5]')(self) nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper() size = get_bytes_size(nsize, usize) return size
class item(ItemElement): klass = Torrent obj_id = Regexp( CleanText( './/div[@class="torrentname"]//a[@class="cellMainLink"]/@href' ), '.*-t([0-9]*).html') obj_name = CleanText('.//a[@class="cellMainLink"]', default=NotAvailable) obj_magnet = CleanText( './/div[has-class("iaconbox")]//a[starts-with(@href,"magnet")]/@href', default=NotAvailable) obj_seeders = CleanText( './/td[has-class("green") and has-class("center")]', default=NotAvailable) & Type(type=int) obj_leechers = CleanText( './/td[has-class("red") and has-class("center")]', default=NotAvailable) & Type(type=int) obj_description = NotLoaded obj_files = NotLoaded def obj_url(self): href = CleanText( './/div[has-class("iaconbox")]//a[starts-with(@href,"//")]/@href' )(self) return 'https:%s' % href def obj_size(self): rawsize = CleanText('./td[2]')(self) rawsize = rawsize.replace(',', '.') nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper() size = get_bytes_size(nsize, usize) return size obj_filename = CleanText(Regexp( CleanText( './/div[has-class("iaconbox")]//a[starts-with(@href,"//")]/@href' ), '.*title=(.*)'), default=NotAvailable)
class get_location(ItemElement): klass = IpLocation obj_id = CleanText('//ul/li[starts-with(.,"IP address :")]/strong') obj_city = CleanText(Regexp( CleanText('//ul/li[starts-with(.,"City :")]/text()'), 'City : (.*)'), default=NotAvailable) obj_country = CleanText(Regexp( CleanText('//ul/li[starts-with(.,"Country :")]/text()'), 'Country : (.*)'), default=NotAvailable) obj_region = CleanText(Regexp( CleanText('//ul/li[starts-with(.,"State/Province :")]/text()'), 'State/Province : (.*)'), default=NotAvailable) obj_lt = CleanText(Regexp( CleanText('//ul/li[starts-with(.,"Latitude :")]/text()'), 'Latitude : (.*)'), default=NotAvailable) & Type(type=float) obj_lg = CleanText(Regexp( CleanText('//ul/li[starts-with(.,"Longitude :")]/text()'), 'Longitude : (.*)'), default=NotAvailable) & Type(type=float) obj_zipcode = CleanText(Regexp( CleanText('//ul/li[starts-with(.,"Zip or postal code :")]/text()'), 'Zip or postal code : (.*)'), default=NotAvailable) obj_host = CleanText(Regexp( CleanText('//ul/li[starts-with(.,"Hostname :")]/text()'), 'Hostname : (.*)'), default=NotAvailable)
class item(ItemElement): klass = Torrent obj_id = Regexp(CleanText('./td[3]/a/@href'), '/torrents/nfo/\?id=(.*)') obj_name = CleanText('./td[2]/a/@title') obj_seeders = CleanText('./td[8]') & Type(type=int) obj_leechers = CleanText('./td[9]') & Type(type=int) obj_description = NotLoaded obj_files = NotLoaded obj_filename = Format('%s.torrent', CleanText('./td[2]/a/@title')) obj_magnet = NotAvailable def obj_url(self): fullid = Regexp(CleanText('./td[3]/a/@href'), '/torrents/nfo/\?id=(.*)')(self) downurl = self.page.browser.download.build(id=fullid) return downurl def obj_size(self): rawsize = CleanText('./td[6]')(self) nsize = float(rawsize.split()[0]) usize = rawsize.split()[-1].upper() size = get_bytes_size(nsize, usize) return size
class get_video(ItemElement): klass = YoupornVideo obj_author = CleanText('//div[has-class("submitByLink")]') #obj_date = Date('//div[@id="stats-date"]') obj_duration = NotAvailable obj_ext = 'mp4' obj_id = Env('id') obj_rating = CleanText('//div[@class="videoRatingPercentage"]' ) & Regexp(pattern=r'(..)%') & Type(type=int) obj_rating_max = 100 obj_thumbnail = NotAvailable obj_title = CleanText('//h1') obj_url = Link('//div[@id="downloadModal"]//a[1]')
class get_video(ItemElement): klass = YoupornVideo obj_author = CleanText('//div[@class="author-block--line"][1]' ) & Regexp(pattern=r'By: (.*)') #obj_date = Date('//div[@id="stats-date"]') obj_duration = NotAvailable obj_ext = 'mp4' obj_id = Env('id') obj_rating = CleanText('//div[@class="rating-percentage"]') & Regexp( pattern=r'(..)%') & Type(type=int) obj_rating_max = 100 obj_thumbnail = NotAvailable obj_title = CleanText('//h1') obj_url = Link('//ul[@class="downloadList"]/li[2]/a')
class get_event(ItemElement): klass = BaseCalendarEvent obj_summary = CleanText('//div[@id="sectionHead"]/h1') obj_description = CleanHTML('//div[@id="event-item"]/div[3]/p[2]') obj_price = CleanDecimal(Regexp( CleanText('//aside[@id="detail"]/ul/li[3]'), r'Cost /[^\d]*([\d ,.]+).', default=''), default=None) obj_location = Regexp(CleanText('//aside[@id="detail"]/ul/li[2]'), r'Venue / (.+)') obj_booked_entries = Type( CleanText('//h1[@id="MembersFavouriteCount"]'), type=int) obj_status = STATUS.CONFIRMED obj_category = CATEGORIES.CONCERT _date = Date(CleanText('//aside[@id="detail"]/ul/li[1]/a[1]')) def obj_start_date(self): start_time = Time( Regexp(CleanText('//aside[@id="detail"]/ul/li[1]'), r'(\d{2}:\d{2}) -'))(self) return CombineDate(self._date, start_time)(self) def obj_end_date(self): end_time = Time( Regexp(CleanText('//aside[@id="detail"]/ul/li[1]'), r'- (\d{2}:\d{2})'))(self) end_date = CombineDate(self._date, end_time)(self) if end_date > self.obj_start_date(): end_date += timedelta(days=1) return end_date def obj_ticket(self): li_class = Attr('//li[@id="tickets"]//li[1]', 'class', default=None)(self) if li_class: if li_class == 'closed': return TICKET.CLOSED else: return TICKET.AVAILABLE return TICKET.NOTAVAILABLE
class item(ItemElement): klass = YoupornVideo def obj_thumbnail(self): thumbnail_url = Attr('./img', 'src')(self) thumbnail = Thumbnail(thumbnail_url) thumbnail.url = thumbnail.id return thumbnail obj_author = NotAvailable obj_duration = CSS('span.duration') & CleanText() & Duration() obj_id = Attr('../..', 'data-video-id') obj_rating = CleanText('./span/i') & Regexp( pattern=r'(..)%') & Type(type=int) obj_rating_max = 100 obj_title = CleanText('./p') obj_url = NotAvailable
class item(ItemElement): klass = YoupornVideo def obj_thumbnail(self): thumbnail_url = Attr('.//img', 'data-original')(self) thumbnail = Thumbnail(thumbnail_url) thumbnail.url = thumbnail.id return thumbnail obj_author = NotAvailable obj_duration = CleanText( './/div[has-class("video-duration")]') & Duration() obj_id = Attr('.', 'data-video-id') obj_rating = CleanText( './/span[has-class("video-box-percentage")]') & Regexp( pattern=r'(\d+)%') & Type(type=int) obj_rating_max = 100 obj_title = CleanText('.//div[has-class("video-box-title")]')
def obj_nb_person(self): return [Type(CleanText('//span[@class="yield"]'), type=int)(self)]
class get_video(ItemElement): klass = YoupornVideo obj_author = CleanText('//div[has-class("submitByLink")]') #obj_date = Date('//div[@id="stats-date"]') obj_duration = NotAvailable obj_ext = 'mp4' obj_id = Env('id') obj_rating = CleanText('//div[@class="videoRatingPercentage"]') & Regexp(pattern=r'(\d+)%') & Type(type=int) obj_rating_max = 100 obj_thumbnail = NotAvailable obj_title = CleanText('//h1') def obj_url(self): return loads(re.search('videoUrl":(".*?")', self.page.text).group(1))
def get_date(self, _time): m = re.match('.*/events\?start_date=(\d{4})-(\d{2})-\d{2}', self.page.url) if m: day = Type(CleanText('./ancestor::td/div[@class="day_number"]'), type=int)(self) start_date = date(year=int(m.group(1)), month=int(m.group(2)), day=day) return datetime.combine(start_date, _time)
def obj_files(self): res = [] for f in Type('//td[has-class("torFileName")]', type=list)(self): res.append(CleanText(f)(self)) return res