示例#1
0
def __get_dom_content(html, name, match):
    try:
        if match.endswith('/>'): return ''
        # override tag name with tag from match if possible
        tag = re.match(r'<([^\s/>]+)', match)
        if tag: name = tag.group(1)
        start_str = '<%s' % name
        end_str = "</%s" % name
        # start/end tags without matching case cause issues
        start = html.find(match)
        end = html.find(end_str, start)
        pos = html.find(start_str, start + 1)

        while pos < end and pos != -1:  # Ignore too early </endstr> return
            tend = html.find(end_str, end + len(end_str))
            if tend != -1: end = tend
            pos = html.find(start_str, pos + 1)

        if start == -1 and end == -1: result = ''
        elif start > -1 and end > -1: result = html[start + len(match):end]
        elif end > -1: result = html[:end]
        elif start > -1: result = html[start + len(match):]
        else: result = ''
        return result
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return ''
示例#2
0
def sources(specified_folders=None):
	try:
		sourceDict = []
		sourceFolder = getScraperFolder()
		sourceFolderLocation = os.path.join(os.path.dirname(__file__), sourceFolder)
		sourceSubFolders = [x[1] for x in os.walk(sourceFolderLocation)][0]
		sourceSubFolders = [x for x in sourceSubFolders if  '__pycache__' not in x]
		if specified_folders:
			sourceSubFolders = specified_folders
		for i in sourceSubFolders:
			for loader, module_name, is_pkg in walk_packages([os.path.join(sourceFolderLocation, i)]):
				if is_pkg: continue
				if enabledCheck(module_name):
					try:
						module = loader.find_module(module_name).load_module(module_name)
						sourceDict.append((module_name, module.source()))
					except Exception as e:
						if debug:
							from fenomscrapers.modules import log_utils
							log_utils.log('Error: Loading module: "%s": %s' % (module_name, e), level=log_utils.LOGWARNING)
		return sourceDict
	except:
		from fenomscrapers.modules import log_utils
		log_utils.error()
		return []
示例#3
0
def remove_lang(release_info):
    if not release_info: return False
    try:
        if any(value in release_info for value in DUBBED): return True
        if any(value in release_info for value in SUBS): return True
        if home_getProperty('fenom.filter.undesirables') == 'true':
            undesirables = get_undesirables()
            if any(value in release_info for value in undesirables):
                return True
        if home_getProperty('fenom.filter.foreign.single.audio') == 'true':
            if any(value in release_info for value in LANG) and not any(
                    value in release_info
                    for value in ['.eng.', '.en.', 'english']):
                return True
            if any(value in release_info for value in ABV_LANG) and not any(
                    value in release_info
                    for value in ['.eng.', '.en.', 'english']):
                return True
        if release_info.endswith('.srt.') and not any(
                value in release_info
                for value in ['with.srt', '.avi', '.mkv', '.mp4']):
            return True
        return False
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return False
示例#4
0
def get(function, duration, *args):
    """
	:param function: Function to be executed
	:param duration: Duration of validity of cache in hours
	:param args: Optional arguments for the provided function
	"""
    try:
        key = _hash_function(function, args)
        cache_result = cache_get(key)
        if cache_result:
            result = literal_eval(cache_result['value'])
            if _is_cache_valid(cache_result['date'], duration):
                return result

        fresh_result = repr(function(*args))
        invalid = False
        try:  # Sometimes None is returned as a string instead of None type for "fresh_result"
            if not fresh_result: invalid = True
            elif fresh_result == 'None' or fresh_result == '' or fresh_result == '[]' or fresh_result == '{}':
                invalid = True
            elif len(fresh_result) == 0:
                invalid = True
        except:
            pass

        if invalid:  # If the cache is old, but we didn't get "fresh_result", return the old cache
            if cache_result: return result
            else: return None
        else:
            cache_insert(key, fresh_result)
            return literal_eval(fresh_result)
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return None
示例#5
0
def enabledCheck(module_name):
	try:
		if getSetting('provider.' + module_name) == 'true': return True
		else: return False
	except:
		from fenomscrapers.modules import log_utils
		log_utils.error()
		return True
示例#6
0
def getScraperFolder():
	try:
		sourceSubFolders = [x[1] for x in os.walk(os.path.dirname(__file__))][0]
		return [i for i in sourceSubFolders if 'fenomscrapers' in i.lower()][0]
	except:
		from fenomscrapers.modules import log_utils
		log_utils.error()
		return 'sources_fenomscrapers'
示例#7
0
def strip_non_ascii_and_unprintable(text):
    try:
        result = ''.join(char for char in text if char in printable)
        return result.encode('ascii', errors='ignore').decode('ascii',
                                                              errors='ignore')
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return text
示例#8
0
def six_decode(txt, char='utf-8'):
    try:
        if isPY3 and isinstance(txt, binary_type):
            txt = txt.decode(char)
        return txt
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return txt
示例#9
0
def ensure_text(s, encoding='utf-8', errors='strict'):
    try:
        if isinstance(s, binary_type):
            return s.decode(encoding, errors)
        elif isinstance(s, text_type):
            return s
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return s
def normalize(title):
    try:
        title = ''.join(c for c in unicodedata.normalize(
            'NFKD', py_tools.ensure_text(py_tools.ensure_str(title)))
                        if unicodedata.category(c) != 'Mn')
        return str(title)
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return title
示例#11
0
 def parseJSString(self, s):
     try:
         offset = 1 if s[0] == '+' else 0
         val = int(
             eval(
                 s.replace('!+[]', '1').replace('!![]', '1').replace(
                     '[]', '0').replace('(', 'str(')[offset:]))
         return val
     except:
         from fenomscrapers.modules import log_utils
         log_utils.error()
示例#12
0
def release_title_format(release_title):
    try:
        release_title = release_title.lower().replace(
            "'", "").lstrip('.').rstrip('.')
        fmt = '.%s.' % re.sub(r'[^a-z0-9-~]+', '.', release_title).replace(
            '.-.', '-').replace('-.', '-').replace('.-', '-').replace(
                '--', '-')
        return fmt
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return release_title
示例#13
0
def check_title(title, aliases, release_title, hdlr, year, years=None):
    try:
        aliases = aliases_to_array(jsloads(aliases))
    except:
        aliases = None
    title_list = []
    title_list_append = title_list.append
    if aliases:
        for item in aliases:
            try:
                alias = item.replace('!', '').replace('(', '').replace(
                    ')', '').replace('&', 'and').replace(year, '')
                # alias = re.sub(r'[^A-Za-z0-9\s\.-]+', '', alias)
                if years:  # for movies only, scraper to pass None for episodes
                    for i in years:
                        alias = alias.replace(i, '')
                if alias in title_list: continue
                title_list_append(alias)
            except:
                from fenomscrapers.modules import log_utils
                log_utils.error()
    try:
        match = True
        title = title.replace('!',
                              '').replace('(',
                                          '').replace(')',
                                                      '').replace('&', 'and')
        # title = re.sub(r'[^A-Za-z0-9\s\.-]+', '', title)
        title_list_append(title)

        release_title = release_title_format(
            release_title)  # converts to .lower()
        h = hdlr.lower()
        t = release_title.split(h)[0].replace(year,
                                              '').replace('(', '').replace(
                                                  ')', '').replace('&', 'and')
        if years:
            for i in years:
                t = t.split(i)[0]
        t = t.split('2160p')[0].split('4k')[0].split('1080p')[0].split(
            '720p')[0]
        if all(cleantitle.get(i) != cleantitle.get(t) for i in title_list):
            match = False
        if years:  # for movies only, scraper to pass None for episodes
            if not any(value in release_title for value in years):
                match = False
        else:
            if h not in release_title: match = False
        return match
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return match
示例#14
0
def parseDOM(html, name='', attrs=None, ret=False):
    try:
        if attrs:
            attrs = dict((key, re.compile(value + ('$' if value else '')))
                         for key, value in py_tools.iteritems(attrs))
        results = dom_parser.parse_dom(html, name, attrs, ret)
        if ret: results = [result.attrs[ret.lower()] for result in results]
        else: results = [result.content for result in results]
        return results
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
示例#15
0
def __get_dom_elements(item, name, attrs):
    try:
        if not attrs:
            pattern = r'(<%s(?:\s[^>]*>|/?>))' % name
            this_list = re.findall(pattern, item, re.M | re.S | re.I)
        else:
            last_list = None

            for key, value in py_tools.iteritems(attrs):
                value_is_regex = isinstance(value, re_type)
                value_is_str = isinstance(value, py_tools.string_types)
                pattern = r'''(<{tag}[^>]*\s{key}=(?P<delim>['"])(.*?)(?P=delim)[^>]*>)'''.format(
                    tag=name, key=key)
                re_list = re.findall(pattern, item, re.M | re.S | re.I)
                if value_is_regex:
                    this_list = [
                        r[0] for r in re_list if re.match(value, r[2])
                    ]
                else:
                    temp_value = [value] if value_is_str else value
                    this_list = [
                        r[0] for r in re_list
                        if set(temp_value) <= set(r[2].split(' '))
                    ]

                if not this_list:
                    has_space = (value_is_regex and ' ' in value.pattern) or (
                        value_is_str and ' ' in value)
                    if not has_space:
                        pattern = r'''(<{tag}[^>]*\s{key}=((?:[^\s>]|/>)*)[^>]*>)'''.format(
                            tag=name, key=key)
                        re_list = re.findall(pattern, item, re.M | re.S | re.I)
                        if value_is_regex:
                            this_list = [
                                r[0] for r in re_list if re.match(value, r[1])
                            ]
                        else:
                            this_list = [
                                r[0] for r in re_list if value == r[1]
                            ]

                if last_list is None: last_list = this_list
                else:
                    last_list = [
                        item for item in this_list if item in last_list
                    ]
            this_list = last_list
        return this_list
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return this_list
示例#16
0
def aliases_to_array(aliases, filter=None):
    try:
        if all(isinstance(x, str) for x in aliases): return aliases
        if not filter: filter = []
        if isinstance(filter, str): filter = [filter]
        return [
            x.get('title') for x in aliases
            if not filter or x.get('country') in filter
        ]
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return []
示例#17
0
def _basic_request(url, headers=None, post=None, timeout='30', limit=None):
    try:
        try:
            headers.update(headers)
        except:
            headers = {}
        req = urllib2.Request(url, data=post)
        _add_request_header(req, headers)
        response = urllib2.urlopen(req, timeout=int(timeout))
        return _get_result(response, limit)
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
示例#18
0
def ensure_str(s, encoding='utf-8', errors='strict'):
    from fenomscrapers.modules import log_utils
    try:
        if not isinstance(s, (text_type, binary_type)):
            return log_utils.log("not expecting type '%s'" % type(s), __name__,
                                 log_utils.LOGDEBUG)
        if isPY2 and isinstance(s, text_type):
            s = s.encode(encoding, errors)
        elif isPY3 and isinstance(s, binary_type):
            s = s.decode(encoding, errors)
        return s
    except:
        log_utils.error()
        return s
示例#19
0
def clean_name(release_title):
    try:
        unwanted = [
            '[.www.tamilrockers.com.]', 'tamilrockers.com',
            'www.tamilrockers.com', 'www.tamilrockers.ws',
            'www.tamilrockers.pl', '[.www.torrenting.com.]',
            'www.torrenting.com', 'www.torrenting.org', 'www-torrenting-com',
            'www-torrenting-org', '[katmoviehd.eu]', '[katmoviehd.to]',
            '[katmoviehd.tv]', '+katmoviehd.pw+', 'katmoviehd-pw',
            '[.www.torrent9.uno.]', '[www.torrent9.ph.]', 'www.torrent9.nz',
            '[.torrent9.tv.]', '[.torrent9.cz.]', '[ torrent9.cz ]',
            'torrent9-cz-.-', '[agusiq.torrents.pl]', '[agusiq-torrents.pl]',
            'agusiq-torrents-pl', '[.oxtorrent.com.]', '[oxtorrent-com]',
            'oxtorrent-com', '[movcr.com]', 'www.movcr.tv', 'movcr-com',
            'www.movcr.to', '[ex-torrenty.org]', '[xtorrenty.org]',
            'xtorrenty.org', '[acesse.]', '[acesse-hd-elite-me]',
            '[acesse.hd-elite.me].', '[torrentcouch.net]',
            '[torrentcouch-net]', '[.www.cpasbien.cm.]', '[.www.cpasbien.pw.]',
            '[auratorrent.pl].nastoletni.wilkoak', '[auratorrent.pl]',
            '[.www.nextorrent.site.]', '[nextorrent.net]',
            '[www.scenetime.com]', 'www.scenetime.com', '[kst.vn]', 'kst-vn',
            '[itfriend]', '[itf]', '(imax)', '.imax.', 'www.2movierulz.ac',
            'www.2movierulz.ms', 'www.3movierulz.com', 'www.3movierulz.tv',
            '[zooqle.com]', '[horriblesubs]', '[gktorrent.com]',
            '[.www.omgtorrent.com.]', '[3d.hentai]', '[dark.media]',
            '[devil-torrents.pl]', 'mkvcinemas.live', '[filetracker.pl]',
            'www.bludv.tv', 'ramin.djawadi', '[prof]', '[reup]',
            '[.www.speed.cd.]', '[-bde4.com]', 'extramovies.casa', '[ah]',
            '[ul]', '+13.+', 'taht.oyunlar', 'crazy4tv.com', '[tv]',
            '[noobsubs]', '[.freecourseweb.com.]', 'karibu', '989pa.com',
            '[aletorrenty.pl]', 'best-torrents-net', '[.www.torrentday.com.]',
            '1-3-3-8.com', 'ssrmovies.club', 'www.tamilmv.bid',
            'www.1tamilmv.org', '[h3h2.com]'
        ]
        if release_title.lower().startswith('rifftrax'):
            return release_title  # removed by "undesirables" anyway so exit
        release_title = strip_non_ascii_and_unprintable(release_title).lstrip(
            '/ ').replace(' ', '.')
        for i in unwanted:
            if release_title.lower().startswith(i):
                pattern = r'\%s' % i if i.startswith('[') or i.startswith(
                    '+') else r'%s' % i
                release_title = re.sub(r'^%s' % pattern, '', release_title, 1,
                                       re.I)
        release_title = release_title.lstrip(' 4.-[](){}')
        return release_title
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return release_title
示例#20
0
def _get_result(response, limit=None):
    try:
        if limit == '0': result = response.read(224 * 1024)
        elif limit: result = response.read(int(limit) * 1024)
        else: result = response.read(5242880)
        try:
            encoding = response.headers["Content-Encoding"]
        except:
            encoding = None
        if encoding == 'gzip':
            result = gzip.GzipFile(fileobj=StringIO(result)).read()
        return result
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
示例#21
0
def info_from_name(release_title,
                   title,
                   year,
                   hdlr=None,
                   episode_title=None,
                   season=None,
                   pack=None):
    try:
        release_title = release_title.lower().replace('&',
                                                      'and').replace("'", "")
        release_title = re.sub(r'[^a-z0-9]+', '.', release_title)
        title = title.lower().replace('&', 'and').replace("'", "")
        title = re.sub(r'[^a-z0-9]+', '.', title)
        name_info = release_title.replace(title, '').replace(year, '')
        if hdlr: name_info = name_info.replace(hdlr.lower(), '')
        if episode_title:
            episode_title = episode_title.lower().replace('&', 'and').replace(
                "'", "")
            episode_title = re.sub(r'[^a-z0-9]+', '.', episode_title)
            name_info = name_info.replace(episode_title, '')
        if pack:
            if pack == 'season':
                season_fill = season.zfill(2)
                str1_replace = [
                    '.s%s' % season,
                    '.s%s' % season_fill,
                    '.season.%s' % season,
                    '.season%s' % season,
                    '.season.%s' % season_fill,
                    '.season%s' % season_fill, 'complete'
                ]
                for i in str1_replace:
                    name_info = name_info.replace(i, '')
            elif pack == 'show':
                str2_replace = [
                    '.all.seasons', 'seasons', 'season', 'the.complete',
                    'complete', 'all.torrent', 'total.series', 'tv.series',
                    'series', 'edited', 's1', 's01'
                ]
                for i in str2_replace:
                    name_info = name_info.replace(i, '')
        name_info = name_info.lstrip('.').rstrip('.')
        name_info = '.%s.' % name_info
        return name_info
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return release_title
示例#22
0
def _size(siz):
    try:
        if siz in ['0', 0, '', None]: return 0, ''
        div = 1 if siz.lower().endswith(('gb', 'gib')) else 1024
        # if ',' in siz and siz.lower().endswith(('mb', 'mib')): siz = size.replace(',', '')
        # elif ',' in siz and siz.lower().endswith(('gb', 'gib')): siz = size.replace(',', '.')
        # float_size = float(re.sub(r'[^0-9|/.|/,]', '', siz.replace(',', '.'))) / div
        float_size = float(
            re.sub(r'[^0-9|/.|/,]', '', siz.replace(',', ''))
        ) / div  #comma issue where 2,750 MB or 2,75 GB (sometimes replace with "." and sometimes not)
        str_size = '%.2f GB' % float_size
        return float_size, str_size
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return 0, ''
示例#23
0
def get_release_quality(release_info, release_link=None):
    try:
        quality = None
        info = []
        if release_info: quality = get_qual(release_info)
        if not quality:
            if release_link:
                release_link = release_link.lower()
                quality = get_qual(release_link)
                if not quality: quality = 'SD'
            else: quality = 'SD'
        return quality, info
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return 'SD', []
示例#24
0
def __get_attribs(element):
    try:
        attribs = {}
        for match in re.finditer(
                r'''\s+(?P<key>[^=]+)=\s*(?:(?P<delim>["'])(?P<value1>.*?)(?P=delim)|(?P<value2>[^"'][^>\s]*))''',
                element):
            match = match.groupdict()
            value1 = match.get('value1')
            value2 = match.get('value2')
            value = value1 if value1 is not None else value2
            if value is None: continue
            attribs[match['key'].lower().strip()] = value
        return attribs
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return attribs
示例#25
0
 def get(self, netloc, ua, timeout):
     try:
         headers = {'User-Agent': ua, 'Referer': netloc}
         result = _basic_request(netloc, headers=headers, timeout=timeout)
         match = re.findall(r'xhr\.open\("GET","([^,]+),', result, re.I)
         if not match: return False
         url_Parts = match[0].split('"')
         url_Parts[1] = '1680'
         url = urljoin(netloc, ''.join(url_Parts))
         match = re.findall(r'rid\s*?=\s*?([0-9a-zA-Z]+)', url_Parts[0])
         if not match: return False
         headers['Cookie'] = 'rcksid=%s' % match[0]
         result = _basic_request(url, headers=headers, timeout=timeout)
         return self.getCookieString(result, headers['Cookie'])
     except:
         from fenomscrapers.modules import log_utils
         log_utils.error()
示例#26
0
def __top_domain(url):
    try:  #Py2
        from urlparse import urlparse
    except ImportError:  #Py3
        from urllib.parse import urlparse
    try:
        elements = urlparse(url)
        domain = elements.netloc or elements.path
        domain = domain.split('@')[-1].split(':')[0]
        regex = r"(?:www\.)?([\w\-]*\.[\w\-]{2,3}(?:\.[\w\-]{2,3})?)$"
        res = re.search(regex, domain)
        if res: domain = res.group(1)
        domain = domain.lower()
        return domain
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
示例#27
0
def get(title):
    try:
        if not title: return
        try:
            title = py_tools.ensure_str(title)
        except:
            pass
        title = re.sub(r'&#(\d+);', '', title).lower()
        title = re.sub(r'(&#[0-9]+)([^;^0-9]+)', '\\1;\\2', title)
        title = title.replace('&quot;', '\"').replace('&amp;', '&')
        title = re.sub(
            r'\n|([\[({].+?[})\]])|([:;–\-"\',!_.?~$@])|\s', '', title
        )  # stop trying to remove alpha characters "vs" or "v", they're part of a title
        return title
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return title
示例#28
0
def cache_get(key):
    try:
        dbcon = get_connection()
        dbcur = get_connection_cursor(dbcon)
        ck_table = dbcur.execute(
            '''SELECT * FROM sqlite_master WHERE type='table' AND name='cache';'''
        ).fetchone()
        if not ck_table: return None
        results = dbcur.execute('''SELECT * FROM cache WHERE key=?''',
                                (key, )).fetchone()
        return results
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return None
    finally:
        dbcur.close()
        dbcon.close()
示例#29
0
def _add_request_header(_request, headers):
    try:
        if not headers: headers = {}
        if py_tools.isPY3:
            scheme = _request.type
            host = _request.host
        else:
            scheme = _request.get_type()
            host = _request.get_host()
        referer = headers.get(
            'Referer') if 'Referer' in headers else '%s://%s/' % (scheme, host)
        _request.add_unredirected_header('Host', host)
        _request.add_unredirected_header('Referer', referer)
        for key in headers:
            _request.add_header(key, headers[key])
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
示例#30
0
def _replaceHTMLCodes(txt):
    try:
        if not txt: return ''
        txt = re.sub(r"(&#[0-9]+)([^;^0-9]+)", "\\1;\\2", txt)
        txt = unescape(txt)
        txt = txt.replace("&quot;", "\"")
        txt = txt.replace("&amp;", "&")
        txt = txt.replace("&lt;", "<")
        txt = txt.replace("&gt;", ">")
        txt = txt.replace("&#38;", "&")
        txt = txt.replace("&nbsp;", "")
        txt = txt.replace('&#8230;', '...')
        txt = txt.replace('&#8217;', '\'')
        txt = txt.replace('&#8211;', '-')
        txt = txt.strip()
        return txt
    except:
        from fenomscrapers.modules import log_utils
        log_utils.error()
        return txt