def update_websearch(self, data_iterator): websearch_list = self.incoming["websearch"] = list() for entry in data_iterator: if entry.tags[0].term == "web query": obj = {} obj['engine'] = self.search_engine obj['guid'] = smart_unicode(urlparse.urlsplit(entry.guid)[2].replace("/searchhistory/", "")) obj['query'] = smart_unicode(entry.title) obj['timestamp'] = datetime.datetime(tzinfo=tzinfo.FixedOffset(0), *entry.updated_parsed[:6]) websearch_list.append( obj ) elif entry.tags[0].term == "web result": obj = {} obj['guid'] = smart_unicode(entry.query_guid) obj['title'] = smart_unicode(entry.title) obj['url'] = smart_unicode(entry.link) self.websearch_results.append( obj ) def post_handle_item(self, item_instance, model_instance, data, created): results = [ result for result in self.websearch_results if result['guid'] == data['guid'] ] for result_data in results: result,created = WebSearchResult.objects.get_or_create( title = result_data['title'], url = result_data['url'], search = model_instance ) register_provider( GoogleSearchProvider )
def get_default_fields(self, model_cls): fields = super(DeliciousProvider,self).get_default_fields(model_cls) return [ field for field in fields if field.name != 'thumbnail' and field.name != 'thumbnail_url' ] def get_custom_data_interface_instance(self, interface_cls): return interface_cls(settings.DELICIOUS_USERNAME,settings.DELICIOUS_PASSWORD) def update_bookmark(self, delicious): last_update_date = Item.objects.get_last_update_of_model(Bookmark) bookmarks = self.incoming['bookmark'] = list() last_post_date = utils.parsedate(delicious.posts.update().get("time")) if last_post_date <= last_update_date: log.info("Skipping update: last update date: %s; last post date: %s", last_update_date, last_post_date) return for datenode in reversed(list(delicious.posts.dates().getiterator('date'))): dt = utils.parsedate(datenode.get("date")) if dt > last_update_date: xml = delicious.posts.get(dt=dt.strftime("%Y-%m-%d")) for post in xml.getiterator('post'): info = dict((k, smart_unicode(post.get(k))) for k in post.keys()) info['tags'] = info['tag'] info['url'] = info['href'] info['timestamp'] = utils.parsedate(info['time']) bookmarks.append( info ) register_provider( DeliciousProvider )
# Git chokes on the 1969-12-31 sentinal returned by # get_last_update_of_model, so fix that up. if last_update_date.date() == datetime.date(1969, 12, 31): last_update_date = datetime.datetime(1970, 1, 1) working_dir, repo = self.create_local_repo(repository) commits = repo.commits_since(since=last_update_date.strftime("%Y-%m-%d")) log.debug("Handling %s commits", len(commits)) for commit in reversed(commits): if commit.author.email == repository.username: log.debug("Handling [%s] from %s", commit.id[:7], repository.url) # stored as UTC timestamp = datetime.datetime.fromtimestamp(time.mktime(commit.committed_date)) if utils.JELLYROLL_ADJUST_DATETIME: timestamp = utils.utc_to_local_timestruct(commit.committed_date) obj = {} obj['revision'] = commit.id obj['repository'] = repository obj['message'] = smart_unicode(commit.message) obj['timestamp'] = timestamp commit_list.append( obj ) log.debug("Removing working dir %s.", working_dir) shutil.rmtree(working_dir) register_provider(GitSCMProvider)
class SubversionProvider(CodeRepositoryProvider): """ """ class Meta(CodeRepositoryProvider): repository_type = "svn" modules = ('pysvn',) def update_codecommit_svn(self, repository, last_update_date, commit_list): # TODO: investigate issues with last_update_date, etc. rev = pysvn.Revision(pysvn.opt_revision_kind.date, time.mktime(last_update_date.timetuple())) c = pysvn.Client() for revision_entry in reversed(c.log(repository.url, revision_end=rev)): revision = revision_entry.revision if revision_entry.author == repository.username: log.debug("Handling [%s] from %s" % (revision.number, repository.url)) timestamp = datetime.datetime.fromtimestamp(revision_entry.date) obj = {} obj['revision'] = str(revision.number) obj['repository'] = repository obj['message'] = smart_unicode(revision_entry.message) obj['timestamp'] = timestamp commit_list.append( obj ) register_provider(SubversionProvider)
def source_id(self, model_cls, extra): return md5.new(smart_str(extra["url"])).hexdigest() def update_video(self, client): video_list = self.incoming["video"] = list() feed = client.GetUserFavoritesFeed() for entry in feed.entry: obj = {} obj["url"] = entry.link[0].href try: obj["title"] = smart_unicode(entry.title.text) except DjangoUnicodeDecodeError: return tags = list() # HACK: avoid the last category which appears to # simply be a link to the schema for video objects? for category in entry.category[:-1]: tags.append(category.term) obj["tags"] = " ".join(tags) obj["timestamp"] = dateutil.parser.parse(entry.published.text) obj["source"] = self.source video_list.append(obj) register_provider(YoutubeProvider)
] tags = set() for url in urls: tags.update(self.tags_for_url(url)) def tags_for_url(self, url): tags = set() try: xml = utils.getxml(url) except HttpLib2Error, e: if e.code == 408: return "" else: raise except SyntaxError: return "" for t in xml.getiterator("tag"): count = utils.safeint(t.find("count").text) if count >= getattr(settings, 'LASTFM_TAG_USAGE_THRESHOLD', 15): tag = slugify(smart_unicode(t.find("name").text)) tags.add(tag[:50]) return tags # Memoize tags to avoid unnecessary API calls. tag_cache = {} tags_for_url = memoize(tags_for_url, tag_cache, 1) register_provider( LastfmProvider )
message_text = message_text.replace('\n','') # remove URLs referenced in message content # TODO: fix ungainly code below links = [ link for link in URL_RE.findall(message_text) ] link_ctr = 1 link_dict = {} for link in URL_RE.finditer(message_text): link_dict[link.group(0)] = link_ctr link_ctr += 1 generate_link_num = lambda obj: "[%d]"%link_dict[obj.group(0)] message_text = URL_RE.sub(generate_link_num,message_text) # remove leading username message_text = USERNAME_RE.sub('',message_text) # check for RT-type retweet syntax message_text = RT_RE.sub(self.transform_retweet,message_text) # replace @user references with links to their timeline message_text = USER_RE.sub(self.transform_user_ref_to_link,message_text) # extract defacto #tag style tweet tags tags = ' '.join( [tag[1:] for tag in TAG_RE.findall(message_text)] ) message_text = TAG_RE.sub('',message_text) return (message_text.strip(),links,tags) if not hasattr(settings,'TWITTER_TRANSFORM_MSG') or \ not settings.TWITTER_TRANSFORM_MSG: log.info("Disabling message transforms") TwitterProvider.parse_message = lambda self, msg: ( msg, list(), "" ) register_provider( TwitterProvider )
data_interface = self.DATA_INTERFACES['photo'] model_instance.exif = self.convert_exif( data_interface.photos.getExif( photo_id=data['photo_id'], secret=data['secret'])) model_instance.save() def post_handle_default(self, model_instance, model_str, model_cls, data, created): if model_instance.__class__ == Photoset: data_interface = self.DATA_INTERFACES['photoset'] page = 1 while True: resp = data_interface.photosets.getPhotos( user_id=settings.FLICKR_USER_ID, photoset_id=model_instance.photoset_id, extras="license,date_taken", per_page="500", page=str(page), media="photos") photos = resp["photoset"] if page > photos["pages"]: return for photodict in photos["photo"]: try: photo = Photo.objects.get(photo_id=smart_unicode(photodict["id"])) model_instance.photos.add(photo) except Photo.DoesNotExist: log.debug( "Photo object corresponding to the record %s could not be found for photoset %s" % \ (photodict,model_instance) ) page += 1 register_provider( FlickrProvider )