Exemplo n.º 1
0
    def update_websearch(self, data_iterator):
        websearch_list = self.incoming["websearch"] = list()
        for entry in data_iterator:
            if entry.tags[0].term == "web query":
                obj = {}
                obj['engine'] = self.search_engine
                obj['guid'] = smart_unicode(urlparse.urlsplit(entry.guid)[2].replace("/searchhistory/", ""))
                obj['query'] = smart_unicode(entry.title)
                obj['timestamp'] = datetime.datetime(tzinfo=tzinfo.FixedOffset(0), *entry.updated_parsed[:6])

                websearch_list.append( obj )

            elif entry.tags[0].term == "web result":
                obj = {}
                obj['guid'] = smart_unicode(entry.query_guid)
                obj['title'] = smart_unicode(entry.title)
                obj['url'] = smart_unicode(entry.link)
                
                self.websearch_results.append( obj )

    def post_handle_item(self, item_instance, model_instance, data, created):
        results = [ result for result in self.websearch_results if result['guid'] == data['guid'] ]
        for result_data in results:
            result,created = WebSearchResult.objects.get_or_create(
                title = result_data['title'],
                url = result_data['url'],
                search = model_instance
                )

register_provider( GoogleSearchProvider )
Exemplo n.º 2
0
    def get_default_fields(self, model_cls):
        fields = super(DeliciousProvider,self).get_default_fields(model_cls)
        return [ field for field in fields if field.name != 'thumbnail' and field.name != 'thumbnail_url' ]

    def get_custom_data_interface_instance(self, interface_cls):
        return interface_cls(settings.DELICIOUS_USERNAME,settings.DELICIOUS_PASSWORD)

    def update_bookmark(self, delicious):
        last_update_date = Item.objects.get_last_update_of_model(Bookmark)
        bookmarks = self.incoming['bookmark'] = list()

        last_post_date = utils.parsedate(delicious.posts.update().get("time"))
        if last_post_date <= last_update_date:
            log.info("Skipping update: last update date: %s; last post date: %s", last_update_date, last_post_date)
            return

        for datenode in reversed(list(delicious.posts.dates().getiterator('date'))):
            dt = utils.parsedate(datenode.get("date"))
            if dt > last_update_date:
                xml = delicious.posts.get(dt=dt.strftime("%Y-%m-%d"))
                for post in xml.getiterator('post'):
                    info = dict((k, smart_unicode(post.get(k))) for k in post.keys())

                    info['tags'] = info['tag']
                    info['url'] = info['href']
                    info['timestamp'] = utils.parsedate(info['time'])

                    bookmarks.append( info )

register_provider( DeliciousProvider )
Exemplo n.º 3
0
        # Git chokes on the 1969-12-31 sentinal returned by 
        # get_last_update_of_model, so fix that up.
        if last_update_date.date() == datetime.date(1969, 12, 31):
            last_update_date = datetime.datetime(1970, 1, 1)

        working_dir, repo = self.create_local_repo(repository)
        commits = repo.commits_since(since=last_update_date.strftime("%Y-%m-%d"))
        log.debug("Handling %s commits", len(commits))

        for commit in reversed(commits):
            if commit.author.email == repository.username:
                log.debug("Handling [%s] from %s", commit.id[:7], repository.url)

                # stored as UTC
                timestamp = datetime.datetime.fromtimestamp(time.mktime(commit.committed_date))
                if utils.JELLYROLL_ADJUST_DATETIME:
                    timestamp = utils.utc_to_local_timestruct(commit.committed_date)

                obj = {}
                obj['revision'] = commit.id
                obj['repository'] = repository
                obj['message'] = smart_unicode(commit.message)
                obj['timestamp'] = timestamp
                
                commit_list.append( obj )

        log.debug("Removing working dir %s.", working_dir)
        shutil.rmtree(working_dir)

register_provider(GitSCMProvider)
Exemplo n.º 4
0
class SubversionProvider(CodeRepositoryProvider):
    """
    

    """
    class Meta(CodeRepositoryProvider):
        repository_type = "svn"
        modules = ('pysvn',)

    def update_codecommit_svn(self, repository, last_update_date, commit_list):
        # TODO: investigate issues with last_update_date, etc.
        rev = pysvn.Revision(pysvn.opt_revision_kind.date, time.mktime(last_update_date.timetuple()))
        c = pysvn.Client()

        for revision_entry in reversed(c.log(repository.url, revision_end=rev)):
            revision = revision_entry.revision
            if revision_entry.author == repository.username:
                log.debug("Handling [%s] from %s" % (revision.number, repository.url))
                timestamp = datetime.datetime.fromtimestamp(revision_entry.date)
                
                obj = {}
                obj['revision'] = str(revision.number)
                obj['repository'] = repository
                obj['message'] = smart_unicode(revision_entry.message)
                obj['timestamp'] = timestamp
                
                commit_list.append( obj )

register_provider(SubversionProvider)
Exemplo n.º 5
0
    def source_id(self, model_cls, extra):
        return md5.new(smart_str(extra["url"])).hexdigest()

    def update_video(self, client):
        video_list = self.incoming["video"] = list()
        feed = client.GetUserFavoritesFeed()
        for entry in feed.entry:
            obj = {}

            obj["url"] = entry.link[0].href

            try:
                obj["title"] = smart_unicode(entry.title.text)
            except DjangoUnicodeDecodeError:
                return

            tags = list()
            # HACK: avoid the last category which appears to
            #       simply be a link to the schema for video objects?
            for category in entry.category[:-1]:
                tags.append(category.term)
            obj["tags"] = " ".join(tags)

            obj["timestamp"] = dateutil.parser.parse(entry.published.text)
            obj["source"] = self.source

            video_list.append(obj)


register_provider(YoutubeProvider)
Exemplo n.º 6
0
            ]
        tags = set()
        for url in urls:
            tags.update(self.tags_for_url(url))
        
    def tags_for_url(self, url):
        tags = set()
        try:
            xml = utils.getxml(url)
        except HttpLib2Error, e:
            if e.code == 408:
                return ""
            else:
                raise
        except SyntaxError:
            return ""
        for t in xml.getiterator("tag"):
            count = utils.safeint(t.find("count").text)
            if count >= getattr(settings, 'LASTFM_TAG_USAGE_THRESHOLD', 15):
                tag = slugify(smart_unicode(t.find("name").text))
                tags.add(tag[:50])

        return tags

    # Memoize tags to avoid unnecessary API calls.
    tag_cache = {}
    tags_for_url = memoize(tags_for_url, tag_cache, 1)


register_provider( LastfmProvider )
Exemplo n.º 7
0
        message_text = message_text.replace('\n','')
        # remove URLs referenced in message content
        # TODO: fix ungainly code below
        links = [ link for link in URL_RE.findall(message_text) ]
        link_ctr = 1
        link_dict = {}
        for link in URL_RE.finditer(message_text):
            link_dict[link.group(0)] = link_ctr
            link_ctr += 1
        generate_link_num = lambda obj: "[%d]"%link_dict[obj.group(0)]
        message_text = URL_RE.sub(generate_link_num,message_text)
        # remove leading username
        message_text = USERNAME_RE.sub('',message_text)
        # check for RT-type retweet syntax
        message_text = RT_RE.sub(self.transform_retweet,message_text)
        # replace @user references with links to their timeline
        message_text = USER_RE.sub(self.transform_user_ref_to_link,message_text)
        # extract defacto #tag style tweet tags
        tags = ' '.join( [tag[1:] for tag in TAG_RE.findall(message_text)] )
        message_text = TAG_RE.sub('',message_text)

        return (message_text.strip(),links,tags)

    if not hasattr(settings,'TWITTER_TRANSFORM_MSG') or \
       not settings.TWITTER_TRANSFORM_MSG:

        log.info("Disabling message transforms")
        TwitterProvider.parse_message = lambda self, msg: ( msg, list(), "" )

register_provider( TwitterProvider )
Exemplo n.º 8
0
            data_interface = self.DATA_INTERFACES['photo']
            model_instance.exif = self.convert_exif(
                data_interface.photos.getExif(
                    photo_id=data['photo_id'], secret=data['secret']))
            model_instance.save()

    def post_handle_default(self, model_instance, model_str, model_cls, data, created):
        if model_instance.__class__ == Photoset:
            data_interface = self.DATA_INTERFACES['photoset']
            page = 1
            while True:
                resp = data_interface.photosets.getPhotos(
                    user_id=settings.FLICKR_USER_ID, photoset_id=model_instance.photoset_id,
                    extras="license,date_taken",  per_page="500", page=str(page), media="photos")

                photos = resp["photoset"]
                if page > photos["pages"]:
                    return

                for photodict in photos["photo"]:
                    try:
                        photo = Photo.objects.get(photo_id=smart_unicode(photodict["id"]))
                        model_instance.photos.add(photo)
                    except Photo.DoesNotExist:
                        log.debug( "Photo object corresponding to the record %s could not be found for photoset %s" % \
                                       (photodict,model_instance) )

                page += 1

register_provider( FlickrProvider )