def update(): flickr = FlickrClient(settings.FLICKR_API_KEY) # Preload the list of licenses licenses = licenses = flickr.photos.licenses.getInfo() licenses = dict((l["id"], smart_unicode(l["url"])) for l in licenses["licenses"]["license"]) # Handle update by pages until we see photos we've already handled last_update_date = Item.objects.get_last_update_of_model(Photo) page = 1 while True: log.debug("Fetching page %s of photos", page) resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID, extras="license,date_taken,date_upload", per_page="500", page=str(page)) photos = resp["photos"] if page > photos["pages"]: log.debug("Ran out of photos; stopping.") break for photodict in photos["photo"]: timestamp = datetime.datetime.fromtimestamp(utils.safeint(photodict["dateupload"]))#utils.parsedate(str(photodict["dateupload"])) if timestamp < last_update_date: log.debug("Hit an old photo (taken %s; last update was %s); stopping.", timestamp, last_update_date) break photo_id = utils.safeint(photodict["id"]) license = licenses[photodict["license"]] secret = smart_unicode(photodict["secret"]) _handle_photo(flickr, photo_id, secret, license, timestamp) page += 1 _get_flickr_photos()
def _handle_photo(flickr, photo_id, secret, license, timestamp): info = flickr.photos.getInfo(photo_id=photo_id, secret=secret)["photo"] server_id = utils.safeint(info["server"]) farm_id = utils.safeint(info["farm"]) o_secret = smart_unicode(info["originalsecret"]) taken_by = smart_unicode(info["owner"]["path_alias"]) if info["title"]["_content"]: title = smart_unicode(info["title"]["_content"]) else: title = photo_id description = smart_unicode(info["description"]["_content"]) comment_count = utils.safeint(info["comments"]["_content"]) date_uploaded = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["posted"])) date_updated = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["lastupdate"])) log.debug("Handling photo: %r (taken %s)" % (title, timestamp)) photo, created = Photo.objects.get_or_create( photo_id = str(photo_id), defaults = dict( server_id = server_id, farm_id = farm_id, secret = secret, o_secret = o_secret, taken_by = taken_by, title = title, description = description, comment_count = comment_count, date_uploaded = date_uploaded, date_updated = date_updated, ) ) if created: photo.exif = _convert_exif(flickr.photos.getExif(photo_id=photo_id, secret=secret)) else: photo.server_id = server_id photo.farm_id = farm_id photo.secret = secret photo.o_secret = o_secret photo.taken_by = taken_by photo.cc_license = license photo.title = title photo.description = description photo.comment_count = comment_count photo.date_uploaded = date_uploaded photo.date_updated = date_updated photo.save() return Item.objects.create_or_update( instance = photo, timestamp = timestamp, tags = _convert_tags(info["tags"]), source = __name__, )
def _handle_photo(flickr, photo_id, secret, license, timestamp): info = flickr.photos.getInfo(photo_id=photo_id, secret=secret)["photo"] server_id = utils.safeint(info["server"]) farm_id = utils.safeint(info["farm"]) taken_by = smart_unicode(info["owner"]["username"]) title = smart_unicode(info["title"]["_content"]) description = smart_unicode(info["description"]["_content"]) comment_count = utils.safeint(info["comments"]["_content"]) date_uploaded = datetime.datetime.fromtimestamp( utils.safeint(info["dates"]["posted"])) date_updated = datetime.datetime.fromtimestamp( utils.safeint(info["dates"]["lastupdate"])) log.debug("Handling photo: %r (taken %s)" % (title, timestamp)) photo, created = Photo.objects.get_or_create( photo_id=str(photo_id), defaults=dict( server_id=server_id, farm_id=farm_id, secret=secret, taken_by=taken_by, cc_license=license, title=title, description=description, comment_count=comment_count, date_uploaded=date_uploaded, date_updated=date_updated, )) if created: photo.exif = _convert_exif( flickr.photos.getExif(photo_id=photo_id, secret=secret)) else: photo.server_id = server_id photo.farm_id = farm_id photo.secret = secret photo.taken_by = taken_by photo.cc_license = license photo.title = title photo.description = description photo.comment_count = comment_count photo.date_uploaded = date_uploaded photo.date_updated = date_updated photo.save() return Item.objects.create_or_update( instance=photo, timestamp=timestamp, tags=_convert_tags(info["tags"]), source=__name__, )
def update_photo(self, flickr): last_update_date = Item.objects.get_last_update_of_model(Photo) log.debug("Last update date: %s", last_update_date) licenses = licenses = flickr.photos.licenses.getInfo() licenses = dict((l["id"], smart_unicode(l["url"])) for l in licenses["licenses"]["license"]) page = 1 keep_working = True photo_list = self.incoming["photo"] = list() while True: log.debug("Fetching page %s of photos", page) resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID, extras="license,date_taken", per_page="500", page=str(page)) photos = resp["photos"] if page > photos["pages"]: log.debug("Ran out of photos; stopping.") return for photodict in photos["photo"]: timestamp = utils.parsedate(str(photodict["datetaken"])) if timestamp < last_update_date: log.debug("Hit an old photo (taken %s; last update was %s); stopping.", timestamp, last_update_date) break obj = {} obj['photo_id'] = smart_unicode(photodict["id"]) obj['cc_license'] = licenses[photodict["license"]] obj['secret'] = smart_unicode(photodict["secret"]) info = flickr.photos.getInfo(photo_id=obj['photo_id'], secret=obj['secret'])["photo"] obj['server_id'] = utils.safeint(info["server"]) obj['farm_id'] = utils.safeint(info["farm"]) obj['taken_by'] = smart_unicode(info["owner"]["username"]) obj['title'] = smart_unicode(info["title"]["_content"]) obj['description'] = smart_unicode(info["description"]["_content"]) obj['comment_count'] = utils.safeint(info["comments"]["_content"]) obj['date_uploaded'] = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["posted"])) obj['date_updated'] = datetime.datetime.fromtimestamp(utils.safeint(info["dates"]["lastupdate"])) obj['tags'] = self.convert_tags(info["tags"]) obj['timestamp'] = timestamp obj['photoset'] = None photo_list.append( obj ) page += 1
def _tags_for_track(artist_name, track_name): """ Get the top tags for a track. Also fetches tags for the artist. Only includes tracks that break a certain threshold of usage, defined by settings.LASTFM_TAG_USAGE_THRESHOLD (which defaults to 15). """ urls = [ ARTIST_TAGS_URL % (urllib.quote(artist_name)), TRACK_TAGS_URL % (urllib.quote(artist_name), urllib.quote(track_name)), ] tags = set() for url in urls: log.debug("Fetching tags from %r", url) try: xml = utils.getxml(url) except HttpLib2Error, e: if e.code == 408: return "" else: raise for t in xml.getiterator("tag"): count = utils.safeint(t.find("count").text) if count >= getattr(settings, 'LASTFM_TAG_USAGE_THRESHOLD', 15): tags.add(slugify(smart_unicode(t.find("name").text)))
def update_photoset(self, flickr): resp = flickr.people.getInfo(user_id=settings.FLICKR_USER_ID) person = resp["person"] base_url = smart_unicode(person["photosurl"]["_content"]) resp = flickr.photosets.getList(user_id=settings.FLICKR_USER_ID) sets = resp["photosets"] photoset_list = self.incoming["photoset"] = list() for photosetdict in sets["photoset"]: obj = {} obj['photoset_id'] = smart_unicode(photosetdict["id"]) obj['timestamp'] = datetime.datetime.now() obj['url'] = "%s/sets/%s/" % (base_url, obj['photoset_id']) obj['secret'] = smart_unicode(photosetdict["secret"]) obj['server_id'] = utils.safeint(photosetdict["server"]) obj['farm_id'] = utils.safeint(photosetdict["farm"]) obj['title'] = smart_unicode(photosetdict["title"]["_content"]) obj['description'] = smart_unicode(photosetdict["description"]["_content"]) photoset_list.append( obj )
def update(): flickr = FlickrClient(settings.FLICKR_API_KEY) # Preload the list of licenses licenses = licenses = flickr.photos.licenses.getInfo() licenses = dict((l["id"], smart_unicode(l["url"])) for l in licenses["licenses"]["license"]) # Handle update by pages until we see photos we've already handled last_update_date = Item.objects.get_last_update_of_model(Photo) page = 1 while True: log.debug("Fetching page %s of photos", page) resp = flickr.people.getPublicPhotos(user_id=settings.FLICKR_USER_ID, extras="license,date_taken", per_page="500", page=str(page)) photos = resp["photos"] if page > photos["pages"]: log.debug("Ran out of photos; stopping.") break for photodict in photos["photo"]: timestamp = utils.parsedate(str(photodict["datetaken"])) if timestamp < last_update_date: log.debug( "Hit an old photo (taken %s; last update was %s); stopping.", timestamp, last_update_date) break photo_id = utils.safeint(photodict["id"]) license = licenses[photodict["license"]] secret = smart_unicode(photodict["secret"]) _handle_photo(flickr, photo_id, secret, license, timestamp) page += 1
for url in urls: tags.update(_tags_for_url(url)) def _tags_for_url(url): tags = set() try: xml = utils.getxml(url) except HttpLib2Error, e: if e.code == 408: return "" else: raise except SyntaxError: return "" for t in xml.getiterator("tag"): count = utils.safeint(t.find("count").text) if count >= getattr(settings, 'LASTFM_TAG_USAGE_THRESHOLD', 15): tag = slugify(smart_unicode(t.find("name").text)) tags.add(tag[:50]) return tags # Memoize tags to avoid unnecessary API calls. _tag_cache = {} _tags_for_url = memoize(_tags_for_url, _tag_cache, 1) @transaction.commit_on_success def _handle_track(artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags): t = Track( artist_name = artist_name, track_name = track_name,
tags.update(_tags_for_url(url)) def _tags_for_url(url): tags = set() try: xml = utils.getxml(url) except HttpLib2Error, e: if e.code == 408: return "" else: raise except SyntaxError: return "" for t in xml.getiterator("tag"): count = utils.safeint(t.find("count").text) if count >= getattr(settings, 'LASTFM_TAG_USAGE_THRESHOLD', 15): tag = slugify(smart_unicode(t.find("name").text)) tags.add(tag[:50]) return tags # Memoize tags to avoid unnecessary API calls. _tag_cache = {} _tags_for_url = memoize(_tags_for_url, _tag_cache, 1) @transaction.commit_on_success def _handle_track(artist_name, artist_mbid, track_name, track_mbid, url, timestamp, tags):