def get_movie_urls(self, title, year=None): try: title = deogonkify(title) s = "Fetching: " + title logger.info(s) query_url = 'http://fdb.pl/szukaj?query=' + urllib.quote_plus( title) contents = urllib.urlopen(query_url).read() results = [] for line in contents.split('\n'): line_match = re.match( '^\s*<a href="http://fdb.pl/film/(\d+-[^"]+)">(.*) \((\d{4}(?:/I+)?)\)</a>\s*$', line) if not line_match: continue result_url = 'http://fdb.pl/film/' + line_match.group( 1).strip() result_title = line_match.group(2).strip() result_year = line_match.group(3).strip() if (year == None) or (str(year)[0:4] == result_year[0:4]): results += [{ 'url': result_url, 'title': result_title, 'year': result_year }] return results except Exception, e: logger.exception(e)
def get_movie_urls(self, title, year = None): try: title = deogonkify(title) query_url = 'http://www.filmweb.pl/szukaj?c=film&q=' + urllib.quote_plus(title) contents = urllib.urlopen(query_url).read() results = [] continued = False for line in contents.split('\n'): if continued: continued = False line_match = re.match('^\s*\((\d{4})\).*$', line) if not line_match: continue result_year = line_match.group(1).strip() if (year == None) or (str(year)[0:4] == result_year[0:4]): results += [{'url': result_url, 'title': result_title, 'year': result_year}] line_match = re.match('^\s*<a class="searchResultTitle" href="([^"]+)">(.*)</a>\s*$', line) if not line_match: continue result_url = line_match.group(1).strip() result_title = line_match.group(2).strip() continued = True return results except Exception, e: logger.exception(e)
def get_movie_urls(self, title, year=None): try: title = deogonkify(title) query_url = 'http://www.filmweb.pl/szukaj?c=film&q=' + urllib.quote_plus( title) contents = urllib.urlopen(query_url).read() results = [] continued = False for line in contents.split('\n'): if continued: continued = False line_match = re.match('^\s*\((\d{4})\).*$', line) if not line_match: continue result_year = line_match.group(1).strip() if (year == None) or (str(year)[0:4] == result_year[0:4]): results += [{ 'url': result_url, 'title': result_title, 'year': result_year }] line_match = re.match( '^\s*<a class="searchResultTitle" href="([^"]+)">(.*)</a>\s*$', line) if not line_match: continue result_url = line_match.group(1).strip() result_title = line_match.group(2).strip() continued = True return results except Exception, e: logger.exception(e)
def __init__(self, *args, **kw): from film20.utils.texts import deogonkify initial = kw.get("initial") if initial and "username" in initial: initial["username"] = re.sub("[^\w]", "", deogonkify(unicode(initial["username"]))) super(SSORegistrationForm, self).__init__(*args, **kw)
def get_movie_urls(self, title, year=None): title = deogonkify(title) s = "Fetching: " + title logger.info(s) query_url = "http://uk.rottentomatoes.com/search/full_search.php?search=" + urllib.quote_plus( title) try: # windows-1252 contents = (codecs.getreader("windows-1252")( urllib.urlopen(query_url))).read() results = [] continued = False for line in contents.split('\n'): if continued: line_match = re.match( '^\s*<td class="lastCol date" width="15%"><p><strong>(\d{4})<\/strong><\/p><\/td>.*$', line) if not line_match: continue result_year = line_match.group(1).strip() # only record the result if the year matches (or is null) if (year == None) or (str(year)[0:4] == result_year[0:4]): results += [{ 'url': result_url, 'title': result_title, 'year': result_year }] else: continued = False else: line_match = re.match( '^\s*<a href="/m/([^"]+)">(.+)</a>\s*$', line) if not line_match: continue result_url = "http://rottentomatoes.com/m/" + line_match.group( 1).strip() # result_title = line_match.group(2).strip() result_title = title continued = True return results except UnicodeDecodeError, e: logger.exception(e) return None
def get_movie_urls(self, title, year = None): try: title = deogonkify(title) s = "Fetching: " + title logger.info(s) query_url = 'http://fdb.pl/szukaj?query=' + urllib.quote_plus(title) contents = urllib.urlopen(query_url).read() results = [] for line in contents.split('\n'): line_match = re.match('^\s*<a href="http://fdb.pl/film/(\d+-[^"]+)">(.*) \((\d{4}(?:/I+)?)\)</a>\s*$', line) if not line_match: continue result_url = 'http://fdb.pl/film/' + line_match.group(1).strip() result_title = line_match.group(2).strip() result_year = line_match.group(3).strip() if (year == None) or (str(year)[0:4] == result_year[0:4]): results += [{'url': result_url, 'title': result_title, 'year': result_year}] return results except Exception, e: logger.exception(e)
def get_movie_urls(self, title, year = None): title = deogonkify(title) s = "Fetching: " + title logger.info(s) query_url = "http://uk.rottentomatoes.com/search/full_search.php?search=" + urllib.quote_plus(title) try: # windows-1252 contents = (codecs.getreader("windows-1252")(urllib.urlopen(query_url))).read() results = [] continued = False for line in contents.split('\n'): if continued: line_match = re.match('^\s*<td class="lastCol date" width="15%"><p><strong>(\d{4})<\/strong><\/p><\/td>.*$', line) if not line_match: continue result_year = line_match.group(1).strip() # only record the result if the year matches (or is null) if (year == None) or (str(year)[0:4] == result_year[0:4]): results += [{'url': result_url, 'title': result_title, 'year': result_year}] else: continued = False else: line_match = re.match('^\s*<a href="/m/([^"]+)">(.+)</a>\s*$', line) if not line_match: continue result_url = "http://rottentomatoes.com/m/"+line_match.group(1).strip() # result_title = line_match.group(2).strip() result_title = title continued = True return results except UnicodeDecodeError, e: logger.exception(e) return None
def format(cls, txt): from film20.utils.texts import deogonkify return str(deogonkify(txt)[0:160])
def clean_username(username): from film20.utils.texts import deogonkify return re.sub("[^\w]", "", deogonkify(unicode(username)))