Пример #1
0
 def iter_torrents(self):
     for tr in self.document.getiterator('tr'):
         if tr.attrib.get('class', '') == 'hlRow':
             # sometimes the first tr also has the attribute hlRow
             # i use that to ditinct it from the others
             if 'onmouseout' in tr.attrib:
                 atitle = tr.getchildren()[2].getchildren()[1]
                 title = atitle.text
                 if not title:
                     title = ''
                 for bold in atitle.getchildren():
                     if bold.text:
                         title += bold.text
                     if bold.tail:
                         title += bold.tail
                 idt = tr.getchildren()[2].getchildren()[0].attrib.get('href', '')
                 idt = idt.split('/')[2]
                 size = tr.getchildren()[3].text
                 u = size[-2:]
                 size = float(size[:-3])
                 seed = tr.getchildren()[4].text
                 leech = tr.getchildren()[5].text
                 url = 'https://isohunt.com/download/%s/mon_joli_torrent.torrent' % idt
                 torrent = Torrent(idt, title)
                 torrent.url = url
                 torrent.size = get_bytes_size(size, u)
                 torrent.seeders = int(seed)
                 torrent.leechers = int(leech)
                 yield torrent
Пример #2
0
 def obj_size(self):
     rawsize = CleanText('(//div[@id="infosficher"]/span)[1]')(self)
     rawsize = rawsize.replace(',', '.').strip()
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper().replace('O', 'B')
     size = get_bytes_size(nsize, usize)
     return size
Пример #3
0
 def iter_torrents(self):
     for tr in self.document.getiterator('tr'):
         if tr.attrib.get('class', '') == 'hlRow':
             # sometimes the first tr also has the attribute hlRow
             # i use that to ditinct it from the others
             if 'onmouseout' in tr.attrib:
                 atitle = tr.getchildren()[2].getchildren()[1]
                 title = atitle.text
                 if not title:
                     title = ''
                 for bold in atitle.getchildren():
                     if bold.text:
                         title += bold.text
                     if bold.tail:
                         title += bold.tail
                 idt = tr.getchildren()[2].getchildren()[0].attrib.get(
                     'href', '')
                 idt = idt.split('/')[2]
                 size = tr.getchildren()[3].text
                 u = size[-2:]
                 size = float(size[:-3])
                 seed = tr.getchildren()[4].text
                 leech = tr.getchildren()[5].text
                 url = 'https://isohunt.com/download/%s/mon_joli_torrent.torrent' % idt
                 torrent = Torrent(idt, title)
                 torrent.url = url
                 torrent.size = get_bytes_size(size, u)
                 torrent.seeders = int(seed)
                 torrent.leechers = int(leech)
                 yield torrent
Пример #4
0
 def obj_size(self):
     rawsize = CleanText('./td[2]')(self)
     rawsize = rawsize.replace(',','.')
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper()
     size = get_bytes_size(nsize,usize)
     return size
Пример #5
0
 def obj_size(self):
     rawsize = CleanText('//div[has-class("files")]/../h5')(self)
     s = rawsize.split(',')[-1].replace(')', '')
     nsize = float(re.sub(r'[A-Za-z]', '', s))
     usize = re.sub(r'[.0-9 ]', '', s).upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #6
0
 def iter_torrents(self):
     for div in self.parser.select(self.document.getroot(),'div.list_tor'):
         name = NotAvailable
         size = NotAvailable
         seeders = NotAvailable
         leechers = NotAvailable
         right_div = self.parser.select(div,'div.list_tor_right',1)
         try:
             seeders = int(self.parser.select(right_div,'b.green',1).text)
         except ValueError:
             seeders = 0
         try:
             leechers = int(self.parser.select(right_div,'b.red',1).text)
         except ValueError:
             leechers = 0
         sizep = self.parser.select(right_div,'p')[0]
         sizespan = self.parser.select(sizep,'span')[0]
         nsize = float(sizespan.text_content().split(':')[1].split()[0])
         usize = sizespan.text_content().split()[-1].upper()
         size = get_bytes_size(nsize,usize)
         a = self.parser.select(div,'a.list_tor_title',1)
         href = a.attrib.get('href','')
         name = unicode(a.text_content())
         id = unicode(href.strip('/').split('.html')[0])
         torrent = Torrent(id,name)
         torrent.url = NotLoaded
         torrent.filename = id
         torrent.magnet = NotLoaded
         torrent.size = size
         torrent.seeders = seeders
         torrent.leechers = leechers
         torrent.description = NotLoaded
         torrent.files = NotLoaded
         yield torrent
Пример #7
0
 def obj_size(self):
     rawsize = CleanText('./td[2]')(self)
     rawsize = rawsize.replace(',', '.')
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #8
0
 def obj_size(self):
     rawsize = CleanText('//div[has-class("files")]/../h5')(self)
     s = rawsize.split(',')[-1].replace(')', '')
     nsize = float(re.sub(r'[A-Za-z]', '', s))
     usize = re.sub(r'[.0-9 ]', '', s).upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #9
0
 def obj_size(self):
     rawsize = CleanText('./div[has-class("poid")]')(self)
     rawsize = rawsize.replace(',', '.').strip()
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper().replace('O', 'B')
     size = get_bytes_size(nsize, usize)
     return size
Пример #10
0
 def obj_size(self):
     data = CleanText('./dd/span[3]')(self)
     if data:
         value, unit = data.split()
         return get_bytes_size(float(value), unit)
     else:
         return float("NaN")
Пример #11
0
    def get_torrent(self, id):
        seed = 0
        leech = 0
        description = NotAvailable
        url = NotAvailable
        title = NotAvailable
        for div in self.document.getiterator('div'):
            if div.attrib.get('id', '') == 'desc':
                try:
                    description = div.text_content().strip()
                except UnicodeDecodeError:
                    description = 'Description with invalid UTF-8.'
            elif div.attrib.get('class', '') == 'seedBlock':
                if div.getchildren()[1].text is not None:
                    seed = int(div.getchildren()[1].text)
                else:
                    seed = 0
            elif div.attrib.get('class', '') == 'leechBlock':
                if div.getchildren()[1].text is not None:
                    leech = int(div.getchildren()[1].text)
                else:
                    leech = 0

        title = self.parser.select(self.document.getroot(),
                                   'h1.torrentName span', 1)
        title = title.text

        for a in self.document.getiterator('a'):
            if ('Download' in a.attrib.get('title', '')) \
            and ('torrent file' in a.attrib.get('title', '')):
                url = a.attrib.get('href', '')

        size = 0
        u = ''
        for span in self.document.getiterator('span'):
            # sometimes there are others span, this is not so sure but the size of the children list
            # is enough to know if this is the right span
            if (span.attrib.get('class', '') == 'folder' \
                or span.attrib.get('class', '') == 'folderopen') \
            and len(span.getchildren()) > 2:
                size = span.getchildren()[1].tail
                u = span.getchildren()[2].text
                size = float(size.split(': ')[1].replace(',', '.'))

        files = []
        for td in self.document.getiterator('td'):
            if td.attrib.get('class', '') == 'torFileName':
                files.append(td.text)

        torrent = Torrent(id, title)
        torrent.url = url
        if torrent.url:
            torrent.filename = parse_qs(urlsplit(url).query).get(
                'title', [None])[0]
        torrent.size = get_bytes_size(size, u)
        torrent.seeders = int(seed)
        torrent.leechers = int(leech)
        torrent.description = description
        torrent.files = files
        return torrent
Пример #12
0
 def obj_size(self):
     rawsize = CleanText(
         '//div[@class="accordion"]//tr[th="Taille totale"]/td')(self)
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #13
0
    def get_max_sizes(self):
        max_size = None
        for item in self.doc.getroot().cssselect('.config'):
            if not item.text:
                continue
            match = re.search(r'File size is limited to (\d+) ([A-Za-z]+)',
                              item.text)
            if match:
                max_size = int(
                    get_bytes_size(int(match.group(1)), match.group(2)))
                break

        async_size = 16 * 1024 * 1024
        for item in self.doc.xpath('//script'):
            if not item.text:
                continue
            match = re.search(r'upload \(.*, (\d+)\)', item.text)
            if match:
                async_size = int(match.group(1))
                break

        self.logger.debug('max size = %s, max part size = %s', max_size,
                          async_size)

        return max_size, async_size
Пример #14
0
    def iter_torrents(self):
        for tr in self.document.getiterator('tr'):
            if tr.attrib.get('class', '') == 'odd' or tr.attrib.get('class', '') == ' even':
                if not 'id' in tr.attrib:
                    continue
                title = tr.getchildren()[0].getchildren()[1].getchildren()[1].text
                if not title:
                    title = ''
                for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren():
                    title += red.text_content()
                idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
                    .replace('.html', '')

                # look for url
                for a in tr.getchildren()[0].getiterator('a'):
                    if '.torrent' in a.attrib.get('href', ''):
                        url = a.attrib['href']

                size = tr.getchildren()[1].text
                u = tr.getchildren()[1].getchildren()[0].text
                size = size = size.replace(',', '.')
                size = float(size)
                seed = tr.getchildren()[4].text
                leech = tr.getchildren()[5].text

                torrent = Torrent(idt, title)
                torrent.url = url
                torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0]
                torrent.size = get_bytes_size(size, u)
                torrent.seeders = int(seed)
                torrent.leechers = int(leech)
                yield torrent
Пример #15
0
 def obj_size(self):
     rawsize = CleanText('./td[last()-3]')(self)
     nsize = float(re.sub(r'[A-Za-z]', '', rawsize))
     usize = re.sub(r'[.0-9]', '',
                    rawsize).strip().replace('o', 'B').upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #16
0
    def get_torrent(self, id):
        trs = self.document.getroot().cssselect('table.torrent_info_tbl tr')

        # magnet
        download = trs[2].cssselect('td a')[0]
        if download.attrib['href'].startswith('magnet:'):
            magnet = unicode(download.attrib['href'])

            query = urlparse(magnet).query  # xt=urn:btih:<...>&dn=<...>
            btih = parse_qs(query)['xt'][0]  # urn:btih:<...>
            ih = btih.split(':')[-1]

        name = unicode(trs[3].cssselect('td')[1].text)

        value, unit = trs[5].cssselect('td')[1].text.split()

        valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split()
        delta = timedelta(**{valueunit: float(valueago)})
        date = datetime.now() - delta

        files = [unicode(tr.cssselect('td')[1].text) for tr in trs[15:]]
        torrent = Torrent(ih, name)
        torrent.url = unicode(self.url)
        torrent.size = get_bytes_size(float(value), unit)
        torrent.magnet = magnet
        torrent.seeders = NotAvailable
        torrent.leechers = NotAvailable
        torrent.description = NotAvailable
        torrent.files = files
        torrent.filename = NotAvailable
        torrent.date = date

        return torrent
Пример #17
0
 def obj_size(self):
     rawsize = CleanText('(//div[@id="infosficher"]/span)[1]')(self)
     rawsize = rawsize.replace(',','.').strip()
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper().replace('O','B')
     size = get_bytes_size(nsize,usize)
     return size
Пример #18
0
 def obj_size(self):
     rawsize = CleanText('./div[has-class("poid")]')(self)
     rawsize = rawsize.replace(',','.').strip()
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper().replace('O','B')
     size = get_bytes_size(nsize,usize)
     return size
Пример #19
0
    def get_torrent(self, id):
        seed = 0
        leech = 0
        description = NotAvailable
        url = NotAvailable
        title = NotAvailable
        for div in self.document.getiterator('div'):
            if div.attrib.get('id', '') == 'desc':
                try:
                    description = div.text_content().strip()
                except UnicodeDecodeError:
                    description = 'Description with invalid UTF-8.'
            elif div.attrib.get('class', '') == 'seedBlock':
                if div.getchildren()[1].text is not None:
                    seed = int(div.getchildren()[1].text)
                else:
                    seed = 0
            elif div.attrib.get('class', '') == 'leechBlock':
                if div.getchildren()[1].text is not None:
                    leech = int(div.getchildren()[1].text)
                else:
                    leech = 0

        title = self.parser.select(self.document.getroot(),
                'h1.torrentName span', 1)
        title = title.text

        for a in self.document.getiterator('a'):
            if ('Download' in a.attrib.get('title', '')) \
            and ('torrent file' in a.attrib.get('title', '')):
                url = a.attrib.get('href', '')

        size = 0
        u = ''
        for span in self.document.getiterator('span'):
            # sometimes there are others span, this is not so sure but the size of the children list
            # is enough to know if this is the right span
            if (span.attrib.get('class', '') == 'folder' \
                or span.attrib.get('class', '') == 'folderopen') \
            and len(span.getchildren()) > 2:
                size = span.getchildren()[1].tail
                u = span.getchildren()[2].text
                size = float(size.split(': ')[1].replace(',', '.'))

        files = []
        for td in self.document.getiterator('td'):
            if td.attrib.get('class', '') == 'torFileName':
                files.append(td.text)

        torrent = Torrent(id, title)
        torrent.url = url
        if torrent.url:
            torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0]
        torrent.size = get_bytes_size(size, u)
        torrent.seeders = int(seed)
        torrent.leechers = int(leech)
        torrent.description = description
        torrent.files = files
        return torrent
Пример #20
0
 def obj_size(self):
     rawsize = CleanText('//span[has-class("folder") or has-class("folderopen")]')(self)
     rawsize = rawsize.split(': ')[-1].split(')')[0].strip()
     rawsize = rawsize.replace(',','.')
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper()
     size = get_bytes_size(nsize,usize)
     return size
Пример #21
0
    def get_torrent(self, id):
        title = ''
        url = 'https://isohunt.com/download/%s/%s.torrent' % (id, id)
        for a in self.document.getiterator('a'):
            if 'Search more torrents of' in a.attrib.get('title', ''):
                title = a.tail
        seed = -1
        leech = -1
        tip_id = "none"
        for span in self.document.getiterator('span'):
            if span.attrib.get('style', '') == 'color:green;' and ('ShowTip' in span.attrib.get('onmouseover', '')):
                seed = span.tail.split(' ')[1]
                tip_id = span.attrib.get('onmouseover', '').split("'")[1]
        for div in self.document.getiterator('div'):
            # find the corresponding super tip which appears on super mouse hover!
            if div.attrib.get('class', '') == 'dirs ydsf' and tip_id in div.attrib.get('id', ''):
                leech = div.getchildren()[0].getchildren()[1].tail.split(' ')[2]
            # the <b> with the size in it doesn't have a distinction
            # have to get it by higher
            elif div.attrib.get('id', '') == 'torrent_details':
                size = div.getchildren()[6].getchildren()[0].getchildren()[0].text
                u = size[-2:]
                size = float(size[:-3])

        # files and description (uploader's comment)
        description = 'No description'
        files = []
        count_p_found = 0
        for p in self.document.getiterator('p'):
            if p.attrib.get('style', '') == 'line-height:1.2em;margin-top:1.8em':
                count_p_found += 1
                if count_p_found == 1:
                    if p.getchildren()[1].tail != None:
                        description = p.getchildren()[1].tail
                if count_p_found == 2:
                    if p.getchildren()[0].text == 'Directory:':
                        files.append(p.getchildren()[0].tail.strip() + '/')
                    else:
                        files.append(p.getchildren()[0].tail.strip())

        for td in self.document.getiterator('td'):
            if td.attrib.get('class', '') == 'fileRows':
                filename = td.text
                for slash in td.getchildren():
                    filename += '/'
                    filename += slash.tail
                files.append(filename)

        #--------------------------TODO

        torrent = Torrent(id, title)
        torrent.url = url
        torrent.size = get_bytes_size(size, u)
        torrent.seeders = int(seed)
        torrent.leechers = int(leech)
        torrent.description = description
        torrent.files = files
        return torrent
Пример #22
0
 def obj_size(self):
     rawsize = CleanText(
         '//table[has-class("informations")]//td[text()="Taille totale"]/following-sibling::td'
     )(self)
     nsize = float(re.sub(r'[A-Za-z]', '', rawsize))
     usize = re.sub(r'[.0-9]', '',
                    rawsize).strip().replace('o', 'B').upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #23
0
 def obj_size(self):
     rawsize = CleanText(
         '//span[has-class("folder") or has-class("folderopen")]')(self)
     rawsize = rawsize.split(': ')[-1].split(')')[0].strip()
     rawsize = rawsize.replace(',', '.')
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #24
0
 def obj_size(self):
     rawsize = Regexp(
         CleanText(
             './/div[has-class("list_tor_right")]/p[1]/span[1]'),
         'Size: (.*)$', '\\1')(self)
     nsize = float(re.sub(r'[A-Za-z]', '', rawsize))
     usize = re.sub(r'[.0-9 ]', '', rawsize).upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #25
0
    def iter_torrents(self):
        for tr in self.document.getiterator('tr'):
            if tr.attrib.get('class', '') == 'odd' or tr.attrib.get(
                    'class', '') == ' even':
                magnet = NotAvailable
                url = NotAvailable
                if not 'id' in tr.attrib:
                    continue
                title = tr.getchildren()[0].getchildren()[1].getchildren(
                )[1].text
                if not title:
                    title = u''
                else:
                    title = unicode(title)
                for red in tr.getchildren()[0].getchildren()[1].getchildren(
                )[1].getchildren():
                    title += red.text_content()
                idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
                    .replace('.html', '')

                # look for url
                for a in self.parser.select(tr, 'div.iaconbox a'):
                    href = a.attrib.get('href', '')
                    if href.startswith('magnet'):
                        magnet = unicode(href)
                    elif href.startswith('http'):
                        url = unicode(href)
                    elif href.startswith('//'):
                        url = u'https:%s' % href

                size = tr.getchildren()[1].text
                u = tr.getchildren()[1].getchildren()[0].text
                size = size = size.replace(',', '.')
                size = float(size)
                seed = tr.getchildren()[4].text
                leech = tr.getchildren()[5].text

                torrent = Torrent(idt, title)
                torrent.url = url
                torrent.magnet = magnet
                torrent.description = NotLoaded
                torrent.files = NotLoaded
                torrent.filename = unicode(
                    parse_qs(urlsplit(url).query).get('title', [None])[0])
                torrent.size = get_bytes_size(size, u)
                torrent.seeders = int(seed)
                torrent.leechers = int(leech)
                yield torrent
Пример #26
0
    def get_torrent(self):
        seed = 0
        leech = 0
        description = NotAvailable
        url = NotAvailable
        magnet = NotAvailable
        title = NotAvailable
        id = unicode(self.browser.geturl().split('.html')[0].split('/')[-1])

        div = self.parser.select(self.document.getroot(),'div#middle_content',1)
        title = u'%s'%self.parser.select(self.document.getroot(),'div#middle_content > h1',1).text
        slblock_values = self.parser.select(div,'div.sl_block b')
        if len(slblock_values) >= 2:
            seed = slblock_values[0].text
            leech = slblock_values[1].text
        href_t = self.parser.select(div,'a.down',1).attrib.get('href','')
        url = u'http://%s%s'%(self.browser.DOMAIN,href_t)
        magnet = unicode(self.parser.select(div,'a.magnet',1).attrib.get('href',''))

        divtabs = self.parser.select(div,'div#tabs',1)
        files_div = self.parser.select(divtabs,'div.body > div.doubleblock > div.leftblock')
        files = []
        if len(files_div) > 0:
            size_text = self.parser.select(files_div,'h5',1).text
            for b in self.parser.select(files_div,'b'):
                div = b.getparent()
                files.append(div.text_content())
        else:
            size_text = self.parser.select(divtabs,'h5',1).text_content()
        size_text = size_text.split('(')[1].split(')')[0].strip()
        size = float(size_text.split(',')[1].strip(string.letters))
        u = size_text.split(',')[1].strip().translate(None,string.digits).strip('.').strip().upper()
        div_desc = self.parser.select(divtabs,'div#descriptionContent')
        if len(div_desc) > 0:
            description = unicode(div_desc[0].text_content())

        torrent = Torrent(id, title)
        torrent.url = url
        torrent.filename = id
        torrent.magnet = magnet
        torrent.size = get_bytes_size(size, u)
        torrent.seeders = int(seed)
        torrent.leechers = int(leech)
        torrent.description = description
        torrent.files = files
        return torrent
Пример #27
0
    def iter_torrents(self):
        for tr in self.document.getiterator('tr'):
            if tr.attrib.get('class', '') == 'odd' or tr.attrib.get('class', '') == ' even':
                magnet = NotAvailable
                url = NotAvailable
                if not 'id' in tr.attrib:
                    continue
                title = tr.getchildren()[0].getchildren()[1].getchildren()[1].text
                if not title:
                    title = u''
                else:
                    title = unicode(title)
                for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren():
                    title += red.text_content()
                idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
                    .replace('.html', '')

                # look for url
                for a in self.parser.select(tr, 'div.iaconbox a'):
                    href = a.attrib.get('href', '')
                    if href.startswith('magnet'):
                        magnet = unicode(href)
                    elif href.startswith('http'):
                        url = unicode(href)
                    elif href.startswith('//'):
                        url = u'https:%s' % href

                size = tr.getchildren()[1].text
                u = tr.getchildren()[1].getchildren()[0].text
                size = size = size.replace(',', '.')
                size = float(size)
                seed = tr.getchildren()[4].text
                leech = tr.getchildren()[5].text

                torrent = Torrent(idt, title)
                torrent.url = url
                torrent.magnet = magnet
                torrent.description = NotLoaded
                torrent.files = NotLoaded
                torrent.filename = unicode(parse_qs(urlsplit(url).query).get('title', [None])[0])
                torrent.size = get_bytes_size(size, u)
                torrent.seeders = int(seed)
                torrent.leechers = int(leech)
                yield torrent
Пример #28
0
    def iter_torrents(self):
        try:
            table = self.document.getroot().cssselect('table.torrent_name_tbl')
        except BrokenPageError:
            return
        for i in range(0, len(table), 2):
            # Title
            title = table[i].cssselect('td.torrent_name a')[0]
            name = unicode(title.text)
            url = unicode(title.attrib['href'])

            # Other elems
            elems = table[i + 1].cssselect('td')

            magnet = unicode(elems[0].cssselect('a')[0].attrib['href'])

            query = urlparse(magnet).query  # xt=urn:btih:<...>&dn=<...>
            btih = parse_qs(query)['xt'][0]  # urn:btih:<...>
            ih = btih.split(':')[-1]

            value, unit = elems[2].cssselect('span.attr_val')[0].text.split()

            valueago, valueunit, _ = elems[5].cssselect(
                'span.attr_val')[0].text.split()
            delta = timedelta(**{valueunit: float(valueago)})
            date = datetime.now() - delta

            url = unicode('https://btdigg.org/search?info_hash=%s' % ih)

            torrent = Torrent(ih, name)
            torrent.url = url
            torrent.size = get_bytes_size(float(value), unit)
            torrent.magnet = magnet
            torrent.seeders = NotAvailable
            torrent.leechers = NotAvailable
            torrent.description = NotAvailable
            torrent.files = NotAvailable
            torrent.date = date
            yield torrent
Пример #29
0
 def iter_torrents(self):
     for tr in self.document.getiterator('tr'):
         if tr.attrib.get('class', '') == 'hlRow':
             # sometimes the first tr also has the attribute hlRow
             # i use that to ditinct it from the others
             if 'onmouseout' in tr.attrib:
                 size = NotAvailable
                 seed = NotAvailable
                 leech = NotAvailable
                 atitle = tr.getchildren()[2].getchildren()[1]
                 title = unicode(atitle.text)
                 if not title:
                     title = u''
                 for bold in atitle.getchildren():
                     if bold.text:
                         title += bold.text
                     if bold.tail:
                         title += bold.tail
                 idt = tr.getchildren()[2].getchildren()[0].attrib.get(
                     'href', '')
                 idt = idt.split('/')[2]
                 size = tr.getchildren()[3].text
                 u = size[-2:]
                 size = float(size[:-3])
                 sseed = tr.getchildren()[4].text
                 sleech = tr.getchildren()[5].text
                 if sseed is not None and sseed != "":
                     seed = int(sseed)
                 if sleech is not None and sleech != "":
                     leech = int(sleech)
                 url = u'https://isohunt.com/download/%s/mon_joli_torrent.torrent' % idt
                 torrent = Torrent(idt, title)
                 torrent.url = url
                 torrent.size = get_bytes_size(size, u)
                 torrent.seeders = seed
                 torrent.leechers = leech
                 torrent.description = NotLoaded
                 torrent.files = NotLoaded
                 yield torrent
Пример #30
0
    def get_max_sizes(self):
        max_size = None
        for item in self.doc.getroot().xpath('//p[has-class("config")]'):
            if not item.text:
                continue
            match = re.search(r'File size is limited to (\d+) ([A-Za-z]+)', item.text)
            if match:
                max_size = int(get_bytes_size(int(match.group(1)), match.group(2)))
                break

        async_size = 16 * 1024 * 1024
        for item in self.doc.xpath('//script'):
            if not item.text:
                continue
            match = re.search(r'upload \(.*, (\d+)\)', item.text)
            if match:
                async_size = int(match.group(1))
                break

        self.logger.debug('max size = %s, max part size = %s', max_size, async_size)

        return max_size, async_size
Пример #31
0
    def iter_torrents(self):
        try:
            table = self.document.getroot().cssselect('table.torrent_name_tbl')
        except BrokenPageError:
            return
        for i in range(0, len(table), 2):
            # Title
            title = table[i].cssselect('td.torrent_name a')[0]
            name = unicode(title.text)
            url = unicode(title.attrib['href'])

            # Other elems
            elems = table[i+1].cssselect('td')

            magnet = unicode(elems[0].cssselect('a')[0].attrib['href'])

            query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
            btih = parse_qs(query)['xt'][0] # urn:btih:<...>
            ih = btih.split(':')[-1]

            value, unit = elems[2].cssselect('span.attr_val')[0].text.split()

            valueago, valueunit, _ = elems[5].cssselect('span.attr_val')[0].text.split()
            delta = timedelta(**{valueunit: float(valueago)})
            date = datetime.now() - delta

            url = unicode('https://btdigg.org/search?info_hash=%s' % ih)

            torrent = Torrent(ih, name)
            torrent.url = url
            torrent.size = get_bytes_size(float(value), unit)
            torrent.magnet = magnet
            torrent.seeders = NotAvailable
            torrent.leechers = NotAvailable
            torrent.description = NotAvailable
            torrent.files = NotAvailable
            torrent.date = date
            yield torrent
Пример #32
0
    def get_torrent(self, id):
        trs = self.document.getroot().cssselect('table.torrent_info_tbl tr')

        # magnet
        download = trs[2].cssselect('td a')[0]
        if download.attrib['href'].startswith('magnet:'):
            magnet = unicode(download.attrib['href'])

            query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
            btih = parse_qs(query)['xt'][0] # urn:btih:<...>
            ih = btih.split(':')[-1]

        name = unicode(trs[3].cssselect('td')[1].text)

        value, unit  = trs[5].cssselect('td')[1].text.split()

        valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split()
        delta = timedelta(**{valueunit: float(valueago)})
        date = datetime.now() - delta


        files = []
        for tr in trs[15:]:
            files.append(unicode(tr.cssselect('td')[1].text))

        torrent = Torrent(ih, name)
        torrent.url = unicode(self.url)
        torrent.size = get_bytes_size(float(value), unit)
        torrent.magnet = magnet
        torrent.seeders = NotAvailable
        torrent.leechers = NotAvailable
        torrent.description = NotAvailable
        torrent.files = files
        torrent.filename = NotAvailable
        torrent.date = date

        return torrent
Пример #33
0
    def iter_torrents(self):
        for tr in self.document.getiterator('tr'):
            if tr.attrib.get('class', '') == 'odd' or tr.attrib.get(
                    'class', '') == ' even':
                if not 'id' in tr.attrib:
                    continue
                title = tr.getchildren()[0].getchildren()[1].getchildren(
                )[1].text
                if not title:
                    title = ''
                for red in tr.getchildren()[0].getchildren()[1].getchildren(
                )[1].getchildren():
                    title += red.text_content()
                idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
                    .replace('.html', '')

                # look for url
                for a in tr.getchildren()[0].getiterator('a'):
                    if '.torrent' in a.attrib.get('href', ''):
                        url = a.attrib['href']

                size = tr.getchildren()[1].text
                u = tr.getchildren()[1].getchildren()[0].text
                size = size = size.replace(',', '.')
                size = float(size)
                seed = tr.getchildren()[4].text
                leech = tr.getchildren()[5].text

                torrent = Torrent(idt, title)
                torrent.url = url
                torrent.filename = parse_qs(urlsplit(url).query).get(
                    'title', [None])[0]
                torrent.size = get_bytes_size(size, u)
                torrent.seeders = int(seed)
                torrent.leechers = int(leech)
                yield torrent
Пример #34
0
 def obj_size(self):
     value, unit = Regexp(CleanText('./td[2]/font'), r'Size ([\d\.]+ [^,]+),', '\\1')(self).split(' ')
     return get_bytes_size(float(value), unit)
Пример #35
0
    def get_torrent(self, id):
        seed = 0
        leech = 0
        description = NotAvailable
        url = NotAvailable
        magnet = NotAvailable
        title = NotAvailable
        for div in self.document.getiterator('div'):
            if div.attrib.get('id', '') == 'desc':
                try:
                    description = unicode(div.text_content().strip())
                except UnicodeDecodeError:
                    description = 'Description with invalid UTF-8.'
            elif div.attrib.get('class', '') == 'seedBlock':
                if div.getchildren()[1].text is not None:
                    seed = int(div.getchildren()[1].text)
                else:
                    seed = 0
            elif div.attrib.get('class', '') == 'leechBlock':
                if div.getchildren()[1].text is not None:
                    leech = int(div.getchildren()[1].text)
                else:
                    leech = 0

        title = self.parser.select(self.document.getroot(),
                                   'h1.novertmarg span', 1)
        title = unicode(title.text)

        for a in self.parser.select(self.document.getroot(),
                                    'div.downloadButtonGroup a'):
            href = a.attrib.get('href', '')
            if href.startswith('magnet'):
                magnet = unicode(href)
            elif href.startswith('//'):
                url = u'https:%s' % href
            elif href.startswith('http'):
                url = unicode(href)

        size = 0
        u = ''
        for span in self.document.getiterator('span'):
            # sometimes there are others span, this is not so sure but the size of the children list
            # is enough to know if this is the right span
            if (span.attrib.get('class', '') in ['folder', 'folderopen']
                    and len(span.getchildren()) > 2):
                size = span.getchildren()[1].tail
                u = span.getchildren()[2].text
                size = float(size.split(': ')[1].replace(',', '.'))

        files = [
            td.text for td in self.document.getiterator('td')
            if td.attrib.get('class', '') == 'torFileName'
        ]

        torrent = Torrent(id, title)
        torrent.url = url
        if torrent.url:
            torrent.filename = parse_qs(urlsplit(url).query).get(
                'title', [None])[0]
        torrent.magnet = magnet
        torrent.size = get_bytes_size(size, u)
        torrent.seeders = int(seed)
        torrent.leechers = int(leech)
        if description == '':
            description = NotAvailable
        torrent.description = description
        torrent.files = files
        return torrent
Пример #36
0
 def obj_size(self):
     rawsize = Regexp(CleanText('.//div[has-class("list_tor_right")]/p[1]/span[1]'), 'Size: (.*)$', '\\1')(self)
     nsize = float(re.sub(r'[A-Za-z]', '', rawsize))
     usize = re.sub(r'[.0-9 ]', '', rawsize).upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #37
0
    def get_torrent(self, id):
        table = self.browser.parser.select(self.document.getroot(), "div.thin", 1)

        h2 = table.xpath(".//h2")
        if len(h2) > 0:
            title = u"".join([txt.strip() for txt in h2[0].itertext()])
        else:
            title = self.browser.parser.select(table, "div.title_text", 1).text

        torrent = Torrent(id, title)
        if "." in id:
            torrentid = id.split(".", 1)[1]
        else:
            torrentid = id
        table = self.browser.parser.select(self.document.getroot(), "table.torrent_table")
        if len(table) == 0:
            table = self.browser.parser.select(self.document.getroot(), "div.main_column", 1)
            is_table = False
        else:
            table = table[0]
            is_table = True

        for tr in table.findall("tr" if is_table else "div"):
            if is_table and "group_torrent" in tr.attrib.get("class", ""):
                tds = tr.findall("td")

                if not len(tds) == 5:
                    continue

                url = tds[0].find("span").find("a").attrib["href"]
                m = self.TORRENTID_REGEXP.match(url)
                if not m:
                    warning("ID not found")
                    continue
                if m.group(1) != torrentid:
                    continue

                torrent.url = self.format_url(url)
                size, unit = tds[1].text.split()
                torrent.size = get_bytes_size(float(size.replace(",", "")), unit)
                torrent.seeders = int(tds[3].text)
                torrent.leechers = int(tds[4].text)
                break
            elif (
                not is_table
                and tr.attrib.get("class", "").startswith("torrent_widget")
                and tr.attrib.get("class", "").endswith("pad")
            ):
                url = tr.cssselect("a[title=Download]")[0].attrib["href"]
                m = self.TORRENTID_REGEXP.match(url)
                if not m:
                    warning("ID not found")
                    continue
                if m.group(1) != torrentid:
                    continue

                torrent.url = self.format_url(url)
                size, unit = tr.cssselect("div.details_title strong")[-1].text.strip("()").split()
                torrent.size = get_bytes_size(float(size.replace(",", "")), unit)
                torrent.seeders = int(tr.cssselect("img[title=Seeders]")[0].tail)
                torrent.leechers = int(tr.cssselect("img[title=Leechers]")[0].tail)
                break

        if not torrent.url:
            warning("Torrent %s not found in list" % torrentid)
            return None

        div = self.parser.select(self.document.getroot(), "div.main_column", 1)
        for box in div.cssselect("div.box"):
            title = None
            body = None

            title_t = box.cssselect("div.head")
            if len(title_t) > 0:
                title_t = title_t[0]
                if title_t.find("strong") is not None:
                    title_t = title_t.find("strong")
                if title_t.text is not None:
                    title = title_t.text.strip()

            body_t = box.cssselect("div.body,div.desc")
            if body_t:
                body = html2text(self.parser.tostring(body_t[-1])).strip()

            if title and body:
                if torrent.description is NotLoaded:
                    torrent.description = u""
                torrent.description += u"%s\n\n%s\n" % (title, body)

        divs = self.document.getroot().cssselect(
            "div#files_%s,div#filelist_%s,tr#torrent_%s td" % (torrentid, torrentid, torrentid)
        )
        if divs:
            torrent.files = []
            for div in divs:
                table = div.find("table")
                if table is None:
                    continue
                for tr in table:
                    if tr.attrib.get("class", None) != "colhead_dark":
                        torrent.files.append(tr.find("td").text)

        return torrent
Пример #38
0
 def obj_size(self):
     rawsize = CleanText('//div[@class="accordion"]//tr[th="Taille totale"]/td')(self)
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #39
0
    def get_torrent(self, id):
        table = self.browser.parser.select(self.document.getroot(), 'div.thin',
                                           1)

        h2 = table.xpath('.//h2')
        if len(h2) > 0:
            title = u''.join([txt.strip() for txt in h2[0].itertext()])
        else:
            title = self.browser.parser.select(table, 'div.title_text', 1).text

        torrent = Torrent(id, title)
        torrentid = id.split('.', 1)[1] if '.' in id else id
        table = self.browser.parser.select(self.document.getroot(),
                                           'table.torrent_table')
        if len(table) == 0:
            table = self.browser.parser.select(self.document.getroot(),
                                               'div.main_column', 1)
            is_table = False
        else:
            table = table[0]
            is_table = True

        for tr in table.findall('tr' if is_table else 'div'):
            if is_table and 'group_torrent' in tr.attrib.get('class', ''):
                tds = tr.findall('td')

                if len(tds) != 5:
                    continue

                url = tds[0].find('span').find('a').attrib['href']
                m = self.TORRENTID_REGEXP.match(url)
                if not m:
                    warning('ID not found')
                    continue
                if m.group(1) != torrentid:
                    continue

                torrent.url = self.format_url(url)
                size, unit = tds[1].text.split()
                torrent.size = get_bytes_size(float(size.replace(',', '')),
                                              unit)
                torrent.seeders = int(tds[3].text)
                torrent.leechers = int(tds[4].text)
                break
            elif not is_table and tr.attrib.get('class', '').startswith('torrent_widget') \
                    and tr.attrib.get('class', '').endswith('pad'):
                url = tr.cssselect('a[title=Download]')[0].attrib['href']
                m = self.TORRENTID_REGEXP.match(url)
                if not m:
                    warning('ID not found')
                    continue
                if m.group(1) != torrentid:
                    continue

                torrent.url = self.format_url(url)
                size, unit = tr.cssselect(
                    'div.details_title strong')[-1].text.strip('()').split()
                torrent.size = get_bytes_size(float(size.replace(',', '')),
                                              unit)
                torrent.seeders = int(
                    tr.cssselect('img[title=Seeders]')[0].tail)
                torrent.leechers = int(
                    tr.cssselect('img[title=Leechers]')[0].tail)
                break

        if not torrent.url:
            warning('Torrent %s not found in list' % torrentid)
            return None

        div = self.parser.select(self.document.getroot(), 'div.main_column', 1)
        for box in div.cssselect('div.box'):
            title = None
            body = None

            title_t = box.cssselect('div.head')
            if len(title_t) > 0:
                title_t = title_t[0]
                if title_t.find('strong') is not None:
                    title_t = title_t.find('strong')
                if title_t.text is not None:
                    title = title_t.text.strip()

            body_t = box.cssselect('div.body,div.desc')
            if body_t:
                body = html2text(self.parser.tostring(body_t[-1])).strip()

            if title and body:
                if torrent.description is NotLoaded:
                    torrent.description = u''
                torrent.description += u'%s\n\n%s\n' % (title, body)

        divs = self.document.getroot().cssselect(
            'div#files_%s,div#filelist_%s,tr#torrent_%s td' %
            (torrentid, torrentid, torrentid))
        if divs:
            torrent.files = []
            for div in divs:
                table = div.find('table')
                if table is None:
                    continue
                for tr in table:
                    if tr.attrib.get('class', None) != 'colhead_dark':
                        torrent.files.append(tr.find('td').text)

        return torrent
Пример #40
0
 def obj_size(self):
     value, unit = CleanText(self.ROOT + '/tr[6]/td[2]')(self).split()
     return get_bytes_size(float(value), unit)
Пример #41
0
 def obj_size(self):
     rawsize = CleanText('./td[last()-3]')(self)
     nsize = float(re.sub(r'[A-Za-z]', '', rawsize))
     usize = re.sub(r'[.0-9]', '', rawsize).strip().replace('o', 'B').upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #42
0
    def iter_torrents(self):
        table = self.document.getroot().cssselect('table.torrent_table')
        if not table:
            table = self.document.getroot().cssselect('table#browse_torrent_table')
        if table:
            table = table[0]
            current_group = None
            for tr in table.findall('tr'):
                if tr.attrib.get('class', '') == 'colhead':
                    # ignore
                    continue
                if tr.attrib.get('class', '') == 'group':
                    tds = tr.findall('td')
                    current_group = u''
                    div = tds[-6]
                    if div.getchildren()[0].tag == 'div':
                        div = div.getchildren()[0]
                    for a in div.findall('a'):
                        if not a.text:
                            continue
                        if current_group:
                            current_group += ' - '
                        current_group += a.text
                elif tr.attrib.get('class', '').startswith('group_torrent') or \
                        tr.attrib.get('class', '').startswith('torrent'):
                    tds = tr.findall('td')

                    title = current_group
                    if len(tds) == 7:
                        # Under a group
                        i = 0
                    elif len(tds) in (8, 9):
                        # An alone torrent
                        i = len(tds) - 1
                        while i >= 0 and tds[i].find('a') is None:
                            i -= 1
                    else:
                        # Useless title
                        continue

                    if title:
                        title += u' (%s)' % tds[i].find('a').text
                    else:
                        title = ' - '.join([a.text for a in tds[i].findall('a')])
                    url = urlparse.urlparse(tds[i].find('a').attrib['href'])
                    params = parse_qs(url.query)
                    if 'torrentid' in params:
                        id = '%s.%s' % (params['id'][0], params['torrentid'][0])
                    else:
                        url = tds[i].find('span').find('a').attrib['href']
                        m = self.TORRENTID_REGEXP.match(url)
                        if not m:
                            continue
                        id = '%s.%s' % (params['id'][0], m.group(1))
                    try:
                        size, unit = tds[i + 3].text.split()
                    except ValueError:
                        size, unit = tds[i + 2].text.split()
                    size = get_bytes_size(float(size.replace(',', '')), unit)
                    seeders = int(tds[-2].text)
                    leechers = int(tds[-1].text)

                    torrent = Torrent(id, title)
                    torrent.url = self.format_url(url)
                    torrent.size = size
                    torrent.seeders = seeders
                    torrent.leechers = leechers
                    yield torrent
                else:
                    debug('unknown attrib: %s' % tr.attrib)
Пример #43
0
    def get_torrent(self, id):
        table = self.browser.parser.select(self.document.getroot(), 'div.thin', 1)

        h2 = table.xpath('.//h2')
        if len(h2) > 0:
            title = u''.join([txt.strip() for txt in h2[0].itertext()])
        else:
            title = self.browser.parser.select(table, 'div.title_text', 1).text

        torrent = Torrent(id, title)
        if '.' in id:
            torrentid = id.split('.', 1)[1]
        else:
            torrentid = id
        table = self.browser.parser.select(self.document.getroot(), 'table.torrent_table')
        if len(table) == 0:
            table = self.browser.parser.select(self.document.getroot(), 'div.main_column', 1)
            is_table = False
        else:
            table = table[0]
            is_table = True

        for tr in table.findall('tr' if is_table else 'div'):
            if is_table and 'group_torrent' in tr.attrib.get('class', ''):
                tds = tr.findall('td')

                if not len(tds) == 5:
                    continue

                url = tds[0].find('span').find('a').attrib['href']
                m = self.TORRENTID_REGEXP.match(url)
                if not m:
                    warning('ID not found')
                    continue
                if m.group(1) != torrentid:
                    continue

                torrent.url = self.format_url(url)
                size, unit = tds[1].text.split()
                torrent.size = get_bytes_size(float(size.replace(',', '')), unit)
                torrent.seeders = int(tds[3].text)
                torrent.leechers = int(tds[4].text)
                break
            elif not is_table and tr.attrib.get('class', '').startswith('torrent_widget') \
                    and tr.attrib.get('class', '').endswith('pad'):
                url = tr.cssselect('a[title=Download]')[0].attrib['href']
                m = self.TORRENTID_REGEXP.match(url)
                if not m:
                    warning('ID not found')
                    continue
                if m.group(1) != torrentid:
                    continue

                torrent.url = self.format_url(url)
                size, unit = tr.cssselect('div.details_title strong')[-1].text.strip('()').split()
                torrent.size = get_bytes_size(float(size.replace(',', '')), unit)
                torrent.seeders = int(tr.cssselect('img[title=Seeders]')[0].tail)
                torrent.leechers = int(tr.cssselect('img[title=Leechers]')[0].tail)
                break

        if not torrent.url:
            warning('Torrent %s not found in list' % torrentid)
            return None

        div = self.parser.select(self.document.getroot(), 'div.main_column', 1)
        for box in div.cssselect('div.box'):
            title = None
            body = None

            title_t = box.cssselect('div.head')
            if len(title_t) > 0:
                title_t = title_t[0]
                if title_t.find('strong') is not None:
                    title_t = title_t.find('strong')
                if title_t.text is not None:
                    title = title_t.text.strip()

            body_t = box.cssselect('div.body,div.desc')
            if body_t:
                body = html2text(self.parser.tostring(body_t[-1])).strip()

            if title and body:
                if torrent.description is NotLoaded:
                    torrent.description = u''
                torrent.description += u'%s\n\n%s\n' % (title, body)

        divs = self.document.getroot().cssselect('div#files_%s,div#filelist_%s,tr#torrent_%s td' % (torrentid, torrentid, torrentid))
        if divs:
            torrent.files = []
            for div in divs:
                table = div.find('table')
                if table is None:
                    continue
                for tr in table:
                    if tr.attrib.get('class', None) != 'colhead_dark':
                        torrent.files.append(tr.find('td').text)

        return torrent
Пример #44
0
    def iter_torrents(self):
        table = self.document.getroot().cssselect("table.torrent_table")
        if not table:
            table = self.document.getroot().cssselect("table#browse_torrent_table")
        if table:
            table = table[0]
            current_group = None
            for tr in table.findall("tr"):
                if tr.attrib.get("class", "") == "colhead":
                    # ignore
                    continue
                if tr.attrib.get("class", "") == "group":
                    tds = tr.findall("td")
                    current_group = u""
                    div = tds[-6]
                    if div.getchildren()[0].tag == "div":
                        div = div.getchildren()[0]
                    for a in div.findall("a"):
                        if not a.text:
                            continue
                        if current_group:
                            current_group += " - "
                        current_group += a.text
                elif tr.attrib.get("class", "").startswith("group_torrent") or tr.attrib.get("class", "").startswith(
                    "torrent"
                ):
                    tds = tr.findall("td")

                    title = current_group
                    if len(tds) == 7:
                        # Under a group
                        i = 0
                    elif len(tds) in (8, 9):
                        # An alone torrent
                        i = len(tds) - 1
                        while i >= 0 and tds[i].find("a") is None:
                            i -= 1
                    else:
                        # Useless title
                        continue

                    if title:
                        title += u" (%s)" % tds[i].find("a").text
                    else:
                        title = " - ".join([a.text for a in tds[i].findall("a")])
                    url = urlparse.urlparse(tds[i].find("a").attrib["href"])
                    params = parse_qs(url.query)
                    if "torrentid" in params:
                        id = "%s.%s" % (params["id"][0], params["torrentid"][0])
                    else:
                        url = tds[i].find("span").find("a").attrib["href"]
                        m = self.TORRENTID_REGEXP.match(url)
                        if not m:
                            continue
                        id = "%s.%s" % (params["id"][0], m.group(1))
                    try:
                        size, unit = tds[i + 3].text.split()
                    except ValueError:
                        size, unit = tds[i + 2].text.split()
                    size = get_bytes_size(float(size.replace(",", "")), unit)
                    seeders = int(tds[-2].text)
                    leechers = int(tds[-1].text)

                    torrent = Torrent(id, title)
                    torrent.url = self.format_url(url)
                    torrent.size = size
                    torrent.seeders = seeders
                    torrent.leechers = leechers
                    yield torrent
                else:
                    debug("unknown attrib: %s" % tr.attrib)
Пример #45
0
 def obj_size(self):
     rawsize = CleanText('(.//td[has-class("tdnormal")])[2]')(self)
     nsize = float(re.sub(r'[A-Za-z]', '', rawsize))
     usize = re.sub(r'[.0-9 ]', '', rawsize).upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #46
0
 def obj_size(self):
     s = CleanText('//td/b[text()="Size"]/../../td[2]')(self)
     nsize = float(re.sub(r'[A-Za-z]', '', s))
     usize = re.sub(r'[.0-9 ]', '', s).upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #47
0
 def obj_size(self):
     value, unit = CleanText('./td/table/tr/td[2]/span[@class="attr_val"]')(self).split()
     return get_bytes_size(float(value), unit)
Пример #48
0
    def iter_torrents(self):
        table = self.document.getroot().cssselect('table.torrent_table')
        if not table:
            table = self.document.getroot().cssselect(
                'table#browse_torrent_table')
        if table:
            table = table[0]
            current_group = None
            for tr in table.findall('tr'):
                if tr.attrib.get('class', '') == 'colhead':
                    # ignore
                    continue
                if tr.attrib.get('class', '') == 'group':
                    tds = tr.findall('td')
                    current_group = u''
                    div = tds[-6]
                    if div.getchildren()[0].tag == 'div':
                        div = div.getchildren()[0]
                    for a in div.findall('a'):
                        if not a.text:
                            continue
                        if current_group:
                            current_group += ' - '
                        current_group += a.text
                elif tr.attrib.get('class', '').startswith('group_torrent') or \
                        tr.attrib.get('class', '').startswith('torrent'):
                    tds = tr.findall('td')

                    title = current_group
                    if len(tds) == 7:
                        # Under a group
                        i = 0
                    elif len(tds) in (8, 9):
                        # An alone torrent
                        i = len(tds) - 1
                        while i >= 0 and tds[i].find('a') is None:
                            i -= 1
                    else:
                        # Useless title
                        continue

                    if title:
                        title += u' (%s)' % tds[i].find('a').text
                    else:
                        title = ' - '.join(
                            [a.text for a in tds[i].findall('a')])
                    url = urlparse.urlparse(tds[i].find('a').attrib['href'])
                    params = parse_qs(url.query)
                    if 'torrentid' in params:
                        id = '%s.%s' % (params['id'][0],
                                        params['torrentid'][0])
                    else:
                        url = tds[i].find('span').find('a').attrib['href']
                        m = self.TORRENTID_REGEXP.match(url)
                        if not m:
                            continue
                        id = '%s.%s' % (params['id'][0], m.group(1))
                    try:
                        size, unit = tds[i + 3].text.split()
                    except ValueError:
                        size, unit = tds[i + 2].text.split()
                    size = get_bytes_size(float(size.replace(',', '')), unit)
                    seeders = int(tds[-2].text)
                    leechers = int(tds[-1].text)

                    torrent = Torrent(id, title)
                    torrent.url = self.format_url(url)
                    torrent.size = size
                    torrent.seeders = seeders
                    torrent.leechers = leechers
                    yield torrent
                else:
                    debug('unknown attrib: %s' % tr.attrib)
Пример #49
0
 def obj_size(self):
     value, unit = Regexp(CleanText('./td[2]/font'),
                          r'Size ([\d\.]+ [^,]+),',
                          '\\1')(self).split(' ')
     return get_bytes_size(float(value), unit)
Пример #50
0
    def get_torrent(self, id):
        title = NotAvailable
        size = NotAvailable
        url = 'https://isohunt.com/download/%s/%s.torrent' % (id, id)
        title = unicode(
            self.parser.select(self.document.getroot(),
                               'head > meta[name=title]',
                               1).attrib.get('content', ''))
        seed = NotAvailable
        leech = NotAvailable
        tip_id = "none"
        for span in self.document.getiterator('span'):
            if span.attrib.get('style', '') == 'color:green;' and (
                    'ShowTip' in span.attrib.get('onmouseover', '')):
                seed = int(span.tail.split(' ')[1])
                tip_id = span.attrib.get('onmouseover', '').split("'")[1]
        for div in self.document.getiterator('div'):
            # find the corresponding super tip which appears on super mouse hover!
            if div.attrib.get('class',
                              '') == 'dirs ydsf' and tip_id in div.attrib.get(
                                  'id', ''):
                leech = int(
                    div.getchildren()[0].getchildren()[1].tail.split(' ')[2])
            # the <b> with the size in it doesn't have a distinction
            # have to get it by higher
            elif div.attrib.get('id', '') == 'torrent_details':
                size = div.getchildren()[6].getchildren()[0].getchildren(
                )[0].text
                u = size[-2:]
                size = float(size[:-3])
                size = get_bytes_size(size, u)

        # files and description (uploader's comment)
        description = NotAvailable
        files = []
        count_p_found = 0
        for p in self.document.getiterator('p'):
            if p.attrib.get('style',
                            '') == 'line-height:1.2em;margin-top:1.8em':
                count_p_found += 1
                if count_p_found == 1:
                    if p.getchildren()[1].tail is not None:
                        description = unicode(p.getchildren()[1].tail)
                if count_p_found == 2:
                    if p.getchildren()[0].text == 'Directory:':
                        files.append(p.getchildren()[0].tail.strip() + '/')
                    else:
                        files.append(p.getchildren()[0].tail.strip())

        for td in self.document.getiterator('td'):
            if td.attrib.get('class', '') == 'fileRows':
                filename = td.text
                for slash in td.getchildren():
                    filename += '/'
                    filename += slash.tail
                files.append(filename)

        torrent = Torrent(id, title)
        torrent.url = url
        torrent.size = size
        torrent.seeders = seed
        torrent.leechers = leech
        torrent.description = description
        torrent.files = files
        return torrent
Пример #51
0
 def obj_size(self):
     value, unit = CleanText(self.ROOT + '/tr[6]/td[2]')(self).split()
     return get_bytes_size(float(value), unit)
Пример #52
0
 def obj_size(self):
     rawsize = CleanText('//table[has-class("informations")]//td[text()="Taille totale"]/following-sibling::td')(self)
     nsize = float(re.sub(r'[A-Za-z]', '', rawsize))
     usize = re.sub(r'[.0-9]', '', rawsize).strip().replace('o', 'B').upper()
     size = get_bytes_size(nsize, usize)
     return size
Пример #53
0
 def obj_size(self):
     value, unit = CleanText(
         './td/table/tr/td[2]/span[@class="attr_val"]')(
             self).split()
     return get_bytes_size(float(value), unit)