示例#1
0
def update():
    flickr = FlickrClient(settings.FLICKR_API_KEY)
    
    # Preload the list of licenses
    licenses = licenses = flickr.photos.licenses.getInfo()
    licenses = dict((l["id"], smart_unicode(l["url"])) for l in licenses["licenses"]["license"])
    
    # Handle update by pages until we see photos we've already handled
    last_update_date = Item.objects.get_last_update_of_model(Photo)
    page = 1
    while True:
        log.debug("Fetching page %s of photos", page)
        resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID, extras="license,date_taken,date_upload", per_page="500", page=str(page))
        photos = resp["photos"]
        if page > photos["pages"]:
            log.debug("Ran out of photos; stopping.")
            break
            
        for photodict in photos["photo"]:
            timestamp = datetime.datetime.fromtimestamp(utils.safeint(photodict["dateupload"]))#utils.parsedate(str(photodict["dateupload"]))
            if timestamp < last_update_date:
                log.debug("Hit an old photo (taken %s; last update was %s); stopping.", timestamp, last_update_date)
                break
            
            photo_id = utils.safeint(photodict["id"])
            license = licenses[photodict["license"]]
            secret = smart_unicode(photodict["secret"])
            _handle_photo(flickr, photo_id, secret, license, timestamp)
            
        page += 1 

    _get_flickr_photos()   
示例#2
0
def _handle_photo(flickr, photo_id, secret, license, timestamp):
    info = flickr.photos.getInfo(photo_id=photo_id, secret=secret)["photo"]
    server_id = utils.safeint(info["server"])
    farm_id = utils.safeint(info["farm"])
    o_secret = smart_unicode(info["originalsecret"])
    taken_by = smart_unicode(info["owner"]["path_alias"])
    if info["title"]["_content"]:
        title = smart_unicode(info["title"]["_content"])
    else: 
        title = photo_id
    description = smart_unicode(info["description"]["_content"])
    comment_count = utils.safeint(info["comments"]["_content"])
    date_uploaded = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["posted"]))
    date_updated = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["lastupdate"]))
    
    log.debug("Handling photo: %r (taken %s)" % (title, timestamp))
    photo, created = Photo.objects.get_or_create(
        photo_id      = str(photo_id),
        defaults = dict(
            server_id     = server_id,
            farm_id       = farm_id,
            secret        = secret,
            o_secret      = o_secret,
            taken_by      = taken_by,
            title         = title,
            description   = description,
            comment_count = comment_count,
            date_uploaded = date_uploaded,
            date_updated  = date_updated,
        )
    )

    if created:
        photo.exif = _convert_exif(flickr.photos.getExif(photo_id=photo_id, secret=secret))
    else:
        photo.server_id     = server_id
        photo.farm_id       = farm_id
        photo.secret        = secret
        photo.o_secret      = o_secret
        photo.taken_by      = taken_by
        photo.cc_license    = license
        photo.title         = title
        photo.description   = description
        photo.comment_count = comment_count
        photo.date_uploaded = date_uploaded
        photo.date_updated  = date_updated
    photo.save()
    
    return Item.objects.create_or_update(
        instance = photo, 
        timestamp = timestamp,
        tags = _convert_tags(info["tags"]),
        source = __name__,
    )
示例#3
0
def _handle_photo(flickr, photo_id, secret, license, timestamp):
    info = flickr.photos.getInfo(photo_id=photo_id, secret=secret)["photo"]
    server_id = utils.safeint(info["server"])
    farm_id = utils.safeint(info["farm"])
    taken_by = smart_unicode(info["owner"]["username"])
    title = smart_unicode(info["title"]["_content"])
    description = smart_unicode(info["description"]["_content"])
    comment_count = utils.safeint(info["comments"]["_content"])
    date_uploaded = datetime.datetime.fromtimestamp(
        utils.safeint(info["dates"]["posted"]))
    date_updated = datetime.datetime.fromtimestamp(
        utils.safeint(info["dates"]["lastupdate"]))

    log.debug("Handling photo: %r (taken %s)" % (title, timestamp))
    photo, created = Photo.objects.get_or_create(
        photo_id=str(photo_id),
        defaults=dict(
            server_id=server_id,
            farm_id=farm_id,
            secret=secret,
            taken_by=taken_by,
            cc_license=license,
            title=title,
            description=description,
            comment_count=comment_count,
            date_uploaded=date_uploaded,
            date_updated=date_updated,
        ))
    if created:
        photo.exif = _convert_exif(
            flickr.photos.getExif(photo_id=photo_id, secret=secret))
    else:
        photo.server_id = server_id
        photo.farm_id = farm_id
        photo.secret = secret
        photo.taken_by = taken_by
        photo.cc_license = license
        photo.title = title
        photo.description = description
        photo.comment_count = comment_count
        photo.date_uploaded = date_uploaded
        photo.date_updated = date_updated
    photo.save()

    return Item.objects.create_or_update(
        instance=photo,
        timestamp=timestamp,
        tags=_convert_tags(info["tags"]),
        source=__name__,
    )
示例#4
0
    def update_photo(self, flickr):
        last_update_date = Item.objects.get_last_update_of_model(Photo)
        log.debug("Last update date: %s", last_update_date)

        licenses = licenses = flickr.photos.licenses.getInfo()
        licenses = dict((l["id"], smart_unicode(l["url"])) for l in licenses["licenses"]["license"])

        page = 1
        keep_working = True
        photo_list = self.incoming["photo"] = list()
        while True:
            log.debug("Fetching page %s of photos", page)
            resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID, extras="license,date_taken", 
                                                 per_page="500", page=str(page))
            photos = resp["photos"]
            if page > photos["pages"]:
                log.debug("Ran out of photos; stopping.")
                return

            for photodict in photos["photo"]:
                timestamp = utils.parsedate(str(photodict["datetaken"]))
                if timestamp < last_update_date:
                    log.debug("Hit an old photo (taken %s; last update was %s); stopping.", 
                              timestamp, last_update_date)
                    break

                obj = {}
                obj['photo_id'] = smart_unicode(photodict["id"])
                obj['cc_license'] = licenses[photodict["license"]]
                obj['secret'] = smart_unicode(photodict["secret"])

                info = flickr.photos.getInfo(photo_id=obj['photo_id'], secret=obj['secret'])["photo"]

                obj['server_id'] = utils.safeint(info["server"])
                obj['farm_id'] = utils.safeint(info["farm"])
                obj['taken_by'] = smart_unicode(info["owner"]["username"])
                obj['title'] = smart_unicode(info["title"]["_content"])
                obj['description'] = smart_unicode(info["description"]["_content"])
                obj['comment_count'] = utils.safeint(info["comments"]["_content"])
                obj['date_uploaded'] = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["posted"]))
                obj['date_updated'] = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["lastupdate"]))

                obj['tags'] = self.convert_tags(info["tags"])
                obj['timestamp'] = timestamp
                obj['photoset'] = None

                photo_list.append( obj )
            page += 1
示例#5
0
文件: lastfm.py 项目: davej/jellyroll
def _tags_for_track(artist_name, track_name):
    """
    Get the top tags for a track. Also fetches tags for the artist. Only
    includes tracks that break a certain threshold of usage, defined by
    settings.LASTFM_TAG_USAGE_THRESHOLD (which defaults to 15).
    """
    
    urls = [
        ARTIST_TAGS_URL % (urllib.quote(artist_name)),
        TRACK_TAGS_URL % (urllib.quote(artist_name), urllib.quote(track_name)),
    ]
    tags = set()
    for url in urls:
        log.debug("Fetching tags from %r", url)
        try:
            xml = utils.getxml(url)
        except HttpLib2Error, e:
            if e.code == 408:
                return ""
            else:
                raise
        for t in xml.getiterator("tag"):
            count = utils.safeint(t.find("count").text)
            if count >= getattr(settings, 'LASTFM_TAG_USAGE_THRESHOLD', 15):
                tags.add(slugify(smart_unicode(t.find("name").text)))            
示例#6
0
    def update_photoset(self, flickr):
        resp = flickr.people.getInfo(user_id=settings.FLICKR_USER_ID)
        person = resp["person"]
        base_url = smart_unicode(person["photosurl"]["_content"])

        resp = flickr.photosets.getList(user_id=settings.FLICKR_USER_ID)
        sets = resp["photosets"]
        photoset_list = self.incoming["photoset"] = list()
        for photosetdict in sets["photoset"]:

            obj = {}
            obj['photoset_id'] = smart_unicode(photosetdict["id"])
            obj['timestamp'] = datetime.datetime.now()
            obj['url'] = "%s/sets/%s/" % (base_url, obj['photoset_id'])
            obj['secret'] = smart_unicode(photosetdict["secret"])
            obj['server_id'] = utils.safeint(photosetdict["server"])
            obj['farm_id'] = utils.safeint(photosetdict["farm"])
            obj['title'] = smart_unicode(photosetdict["title"]["_content"])
            obj['description'] = smart_unicode(photosetdict["description"]["_content"])

            photoset_list.append( obj )
示例#7
0
def update():
    flickr = FlickrClient(settings.FLICKR_API_KEY)

    # Preload the list of licenses
    licenses = licenses = flickr.photos.licenses.getInfo()
    licenses = dict((l["id"], smart_unicode(l["url"]))
                    for l in licenses["licenses"]["license"])

    # Handle update by pages until we see photos we've already handled
    last_update_date = Item.objects.get_last_update_of_model(Photo)
    page = 1
    while True:
        log.debug("Fetching page %s of photos", page)
        resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID,
                                             extras="license,date_taken",
                                             per_page="500",
                                             page=str(page))
        photos = resp["photos"]
        if page > photos["pages"]:
            log.debug("Ran out of photos; stopping.")
            break

        for photodict in photos["photo"]:
            timestamp = utils.parsedate(str(photodict["datetaken"]))
            if timestamp < last_update_date:
                log.debug(
                    "Hit an old photo (taken %s; last update was %s); stopping.",
                    timestamp, last_update_date)
                break

            photo_id = utils.safeint(photodict["id"])
            license = licenses[photodict["license"]]
            secret = smart_unicode(photodict["secret"])
            _handle_photo(flickr, photo_id, secret, license, timestamp)

        page += 1
示例#8
0
    for url in urls:
        tags.update(_tags_for_url(url))
        
def _tags_for_url(url):
    tags = set()
    try:
        xml = utils.getxml(url)
    except HttpLib2Error, e:
        if e.code == 408:
            return ""
        else:
            raise
    except SyntaxError:
        return ""
    for t in xml.getiterator("tag"):
        count = utils.safeint(t.find("count").text)
        if count >= getattr(settings, 'LASTFM_TAG_USAGE_THRESHOLD', 15):
            tag = slugify(smart_unicode(t.find("name").text))
            tags.add(tag[:50])
    
    return tags
            
# Memoize tags to avoid unnecessary API calls.
_tag_cache = {}
_tags_for_url = memoize(_tags_for_url, _tag_cache, 1)

@transaction.commit_on_success
def _handle_track(artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags):
    t = Track(
        artist_name = artist_name,
        track_name  = track_name,
示例#9
0
        tags.update(_tags_for_url(url))


def _tags_for_url(url):
    tags = set()
    try:
        xml = utils.getxml(url)
    except HttpLib2Error, e:
        if e.code == 408:
            return ""
        else:
            raise
    except SyntaxError:
        return ""
    for t in xml.getiterator("tag"):
        count = utils.safeint(t.find("count").text)
        if count >= getattr(settings, 'LASTFM_TAG_USAGE_THRESHOLD', 15):
            tag = slugify(smart_unicode(t.find("name").text))
            tags.add(tag[:50])

    return tags


# Memoize tags to avoid unnecessary API calls.
_tag_cache = {}
_tags_for_url = memoize(_tags_for_url, _tag_cache, 1)


@transaction.commit_on_success
def _handle_track(artist_name, artist_mbid, track_name, track_mbid, url,
                  timestamp, tags):