def handle(self, *args, **kwargs): try: low = int(args[0]) high = int(args[1]) except (ValueError, IndexError): print self.args return chunk = 1000 for x in range(low, high, chunk): print x, "/", (high), (float(x - low) / (high - low)) * 100 for tweet in Tweet.objects.all()[x:x + chunk]: results = re.finditer(link_regex, tweet.text, re.I | re.DOTALL) for match in results: short_url = match.group(2).split("&")[0] try: url = ShortUrl.objects.get(short=short_url).url except ShortUrl.DoesNotExist: url, created = CanonicalUrl.objects.get_or_create( url=get_long_url(short_url)) ShortUrl.objects.get_or_create( short=short_url, url=url, ) #print tweet.pk, ":", short_url, "=>", url #print tweet.pk tweet.urls.add(url)
def parse_tweet(tweet_id): print tweet_id return tweet = Tweet.objects.get(pk=tweet_id) results = re.finditer(link_regex, tweet.text, re.I | re.DOTALL) for match in results: short_url = match.group(2).split("&")[0] # Retry transaction try: url = ShortUrl.objects.get(short=short_url).url except ShortUrl.DoesNotExist: # This is blocking and long (waiting for network) long_url = get_long_url(short_url) url, created = CanonicalUrl.objects.get_or_create(url=long_url) # Use get_or_create because another thread might have created it # while we were getting the long url. ShortUrl.objects.get_or_create(short=short_url, url=url) finally: tweet.urls.add(url)
def parse_tweet(tweet_id): print tweet_id return tweet = Tweet.objects.get(pk=tweet_id) results = re.finditer(link_regex, tweet.text, re.I | re.DOTALL) for match in results: short_url = match.group(2).split("&")[0] # Retry transaction try: url = ShortUrl.objects.get(short=short_url).url except ShortUrl.DoesNotExist: # This is blocking and long (waiting for network) long_url = get_long_url(short_url) url, created = CanonicalUrl.objects.get_or_create( url=long_url ) # Use get_or_create because another thread might have created it # while we were getting the long url. ShortUrl.objects.get_or_create( short=short_url, url=url ) finally: tweet.urls.add(url)
def handle(self, *args, **kwargs): try: low = int(args[0]) high = int(args[1]) except (ValueError, IndexError): print self.args return chunk = 1000 for x in range(low, high, chunk): print x, "/", (high), (float(x - low) / (high - low)) * 100 for tweet in Tweet.objects.all()[x : x + chunk]: results = re.finditer(link_regex, tweet.text, re.I | re.DOTALL) for match in results: short_url = match.group(2).split("&")[0] try: url = ShortUrl.objects.get(short=short_url).url except ShortUrl.DoesNotExist: url, created = CanonicalUrl.objects.get_or_create(url=get_long_url(short_url)) ShortUrl.objects.get_or_create(short=short_url, url=url) # print tweet.pk, ":", short_url, "=>", url # print tweet.pk tweet.urls.add(url)