示例#1
0
 def get_movie_urls(self, title, year=None):
     try:
         title = deogonkify(title)
         s = "Fetching: " + title
         logger.info(s)
         query_url = 'http://fdb.pl/szukaj?query=' + urllib.quote_plus(
             title)
         contents = urllib.urlopen(query_url).read()
         results = []
         for line in contents.split('\n'):
             line_match = re.match(
                 '^\s*<a href="http://fdb.pl/film/(\d+-[^"]+)">(.*) \((\d{4}(?:/I+)?)\)</a>\s*$',
                 line)
             if not line_match:
                 continue
             result_url = 'http://fdb.pl/film/' + line_match.group(
                 1).strip()
             result_title = line_match.group(2).strip()
             result_year = line_match.group(3).strip()
             if (year == None) or (str(year)[0:4] == result_year[0:4]):
                 results += [{
                     'url': result_url,
                     'title': result_title,
                     'year': result_year
                 }]
         return results
     except Exception, e:
         logger.exception(e)
示例#2
0
 def get_movie_urls(self, title, year = None):
    try:
       title = deogonkify(title) 
       query_url = 'http://www.filmweb.pl/szukaj?c=film&q=' + urllib.quote_plus(title)
       contents = urllib.urlopen(query_url).read()
       results = []
       continued = False
       for line in contents.split('\n'):
          if continued:
             continued = False
             line_match = re.match('^\s*\((\d{4})\).*$', line)            
             if not line_match:
                 continue
             result_year = line_match.group(1).strip()
             if (year == None) or (str(year)[0:4] == result_year[0:4]):
                results += [{'url': result_url, 'title': result_title, 'year': result_year}]
          line_match = re.match('^\s*<a class="searchResultTitle" href="([^"]+)">(.*)</a>\s*$', line)
          if not line_match:
             continue
          result_url = line_match.group(1).strip()
          result_title = line_match.group(2).strip()
          continued = True
       return results
    except Exception, e:
        logger.exception(e)
示例#3
0
 def get_movie_urls(self, title, year=None):
     try:
         title = deogonkify(title)
         query_url = 'http://www.filmweb.pl/szukaj?c=film&q=' + urllib.quote_plus(
             title)
         contents = urllib.urlopen(query_url).read()
         results = []
         continued = False
         for line in contents.split('\n'):
             if continued:
                 continued = False
                 line_match = re.match('^\s*\((\d{4})\).*$', line)
                 if not line_match:
                     continue
                 result_year = line_match.group(1).strip()
                 if (year == None) or (str(year)[0:4] == result_year[0:4]):
                     results += [{
                         'url': result_url,
                         'title': result_title,
                         'year': result_year
                     }]
             line_match = re.match(
                 '^\s*<a class="searchResultTitle" href="([^"]+)">(.*)</a>\s*$',
                 line)
             if not line_match:
                 continue
             result_url = line_match.group(1).strip()
             result_title = line_match.group(2).strip()
             continued = True
         return results
     except Exception, e:
         logger.exception(e)
示例#4
0
    def __init__(self, *args, **kw):
        from film20.utils.texts import deogonkify

        initial = kw.get("initial")
        if initial and "username" in initial:
            initial["username"] = re.sub("[^\w]", "", deogonkify(unicode(initial["username"])))
        super(SSORegistrationForm, self).__init__(*args, **kw)
示例#5
0
    def get_movie_urls(self, title, year=None):
        title = deogonkify(title)
        s = "Fetching: " + title
        logger.info(s)
        query_url = "http://uk.rottentomatoes.com/search/full_search.php?search=" + urllib.quote_plus(
            title)
        try:
            # windows-1252
            contents = (codecs.getreader("windows-1252")(
                urllib.urlopen(query_url))).read()

            results = []
            continued = False
            for line in contents.split('\n'):
                if continued:
                    line_match = re.match(
                        '^\s*<td class="lastCol date" width="15%"><p><strong>(\d{4})<\/strong><\/p><\/td>.*$',
                        line)
                    if not line_match:
                        continue

                    result_year = line_match.group(1).strip()

                    # only record the result if the year matches (or is null)
                    if (year == None) or (str(year)[0:4] == result_year[0:4]):
                        results += [{
                            'url': result_url,
                            'title': result_title,
                            'year': result_year
                        }]
                    else:
                        continued = False
                else:
                    line_match = re.match(
                        '^\s*<a href="/m/([^"]+)">(.+)</a>\s*$', line)
                    if not line_match:
                        continue
                    result_url = "http://rottentomatoes.com/m/" + line_match.group(
                        1).strip()
                    #            result_title = line_match.group(2).strip()
                    result_title = title
                    continued = True
            return results
        except UnicodeDecodeError, e:
            logger.exception(e)
            return None
示例#6
0
 def get_movie_urls(self, title, year = None):
    try:
       title = deogonkify(title) 
       s = "Fetching: " + title
       logger.info(s)
       query_url = 'http://fdb.pl/szukaj?query=' + urllib.quote_plus(title)
       contents = urllib.urlopen(query_url).read()
       results = []
       for line in contents.split('\n'):
          line_match = re.match('^\s*<a href="http://fdb.pl/film/(\d+-[^"]+)">(.*) \((\d{4}(?:/I+)?)\)</a>\s*$', line)
          if not line_match:
             continue
          result_url = 'http://fdb.pl/film/' + line_match.group(1).strip()
          result_title = line_match.group(2).strip()
          result_year = line_match.group(3).strip()
          if (year == None) or (str(year)[0:4] == result_year[0:4]):
             results += [{'url': result_url, 'title': result_title, 'year': result_year}]
       return results
    except Exception, e:
       logger.exception(e)
示例#7
0
   def get_movie_urls(self, title, year = None):    
      title = deogonkify(title) 
      s = "Fetching: " + title
      logger.info(s)
      query_url = "http://uk.rottentomatoes.com/search/full_search.php?search=" + urllib.quote_plus(title)
      try:
         # windows-1252
         contents = (codecs.getreader("windows-1252")(urllib.urlopen(query_url))).read()

         results = []
         continued = False
         for line in contents.split('\n'):
            if continued:
               line_match = re.match('^\s*<td class="lastCol date" width="15%"><p><strong>(\d{4})<\/strong><\/p><\/td>.*$', line)
               if not line_match:
                   continue               
                   
               result_year = line_match.group(1).strip()

               # only record the result if the year matches (or is null)
               if (year == None) or (str(year)[0:4] == result_year[0:4]):
                  results += [{'url': result_url, 'title': result_title, 'year': result_year}]
               else:
                   continued = False
            else:      
                line_match = re.match('^\s*<a href="/m/([^"]+)">(.+)</a>\s*$', line)
                if not line_match:
                   continue
                result_url = "http://rottentomatoes.com/m/"+line_match.group(1).strip()
                #            result_title = line_match.group(2).strip()
                result_title = title
                continued = True
         return results
      except UnicodeDecodeError, e:
          logger.exception(e)
          return None
示例#8
0
    def format(cls, txt):
        from film20.utils.texts import deogonkify

        return str(deogonkify(txt)[0:160])
示例#9
0
 def format(cls, txt):
     from film20.utils.texts import deogonkify
     return str(deogonkify(txt)[0:160])
示例#10
0
def clean_username(username):
    from film20.utils.texts import deogonkify

    return re.sub("[^\w]", "", deogonkify(unicode(username)))