def post(self): artwork_json = json.loads(self.request.get('json')) publish_date = (datetime.datetime .utcfromtimestamp(artwork_json['publishDate'] / 1000) .date()) if FeaturedArtwork.all().filter('publish_date=', publish_date).get() != None: webapp2.abort(409, message='Artwork already exists for this date.') crop_tuple = tuple(float(x) for x in json.loads(self.request.get('crop'))) new_image_url, new_thumb_url = maybe_process_image( artwork_json['imageUri'], crop_tuple, publish_date.strftime('%Y%m%d') + ' ' + artwork_json['title'] + ' ' + artwork_json['byline']) if not new_thumb_url and 'thumbUri' in artwork_json: new_thumb_url = artwork_json['thumbUri'] new_artwork = FeaturedArtwork( title=artwork_json['title'], byline=artwork_json['byline'], attribution=artwork_json['attribution'] if 'attribution' in artwork_json else None, image_url=new_image_url, thumb_url=new_thumb_url, details_url=artwork_json['detailsUri'], publish_date=publish_date) new_artwork.save() self.response.set_status(200)
def process_html(self, url, html): soup = BeautifulSoup(html) details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.find(itemprop='name').get_text() author = soup.find(itemprop='author').get_text() completion_year_el = soup.find(itemprop='dateCreated') byline = author + ((', ' + completion_year_el.get_text()) if completion_year_el else '') image_url = soup.find(id='paintingImage')['href'] if not title or not author or not image_url: self.response.out.write('Could not parse HTML') self.response.set_status(500) return publish_date = (datetime.datetime .utcfromtimestamp(int(self.request.get('publishDate')) / 1000) .date()) image_url, thumb_url = maybe_process_image(image_url, NO_CROP_TUPLE, publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline) # create the artwork entry new_artwork = FeaturedArtwork( title=title, byline=byline, image_url=image_url, thumb_url=thumb_url, details_url=details_url, publish_date=publish_date) new_artwork.save() self.response.set_status(200)
def process_html(self, url, html): soup = BeautifulSoup(html) details_url = re.sub(r"#.+", "", url, re.I | re.S) + "?utm_source=Muzei&utm_campaign=Muzei" title = soup.select("h1 span")[0].get_text() author = soup.find(itemprop="author").get_text() completion_year_el = soup.find(itemprop="dateCreated") byline = author + ((", " + completion_year_el.get_text()) if completion_year_el else "") image_url = soup.find(id="paintingImage")["href"] if not title or not author or not image_url: self.response.out.write("Could not parse HTML") self.response.set_status(500) return publish_date = datetime.datetime.utcfromtimestamp(int(self.request.get("publishDate")) / 1000).date() image_url, thumb_url = maybe_process_image( image_url, NO_CROP_TUPLE, publish_date.strftime("%Y%m%d") + " " + title + " " + byline ) # create the artwork entry new_artwork = FeaturedArtwork( title=title, byline=byline, image_url=image_url, thumb_url=thumb_url, details_url=details_url, publish_date=publish_date, ) new_artwork.save() self.response.set_status(200)
def post(self): artwork_json = json.loads(self.request.get("json")) publish_date = datetime.datetime.utcfromtimestamp(artwork_json["publishDate"] / 1000).date() if FeaturedArtwork.all().filter("publish_date=", publish_date).get() != None: webapp2.abort(409, message="Artwork already exists for this date.") crop_tuple = tuple(float(x) for x in json.loads(self.request.get("crop"))) new_image_url, new_thumb_url = backroomarthelper.maybe_process_image( artwork_json["imageUri"], crop_tuple, publish_date.strftime("%Y%m%d") + " " + artwork_json["title"] + " " + artwork_json["byline"], ) if not new_thumb_url and "thumbUri" in artwork_json: new_thumb_url = artwork_json["thumbUri"] new_artwork = FeaturedArtwork( title=artwork_json["title"], byline=artwork_json["byline"], attribution=artwork_json["attribution"] if "attribution" in artwork_json else None, image_url=new_image_url, thumb_url=new_thumb_url, details_url=artwork_json["detailsUri"], publish_date=publish_date, ) new_artwork.save() self.response.set_status(200)
def get(self): ARTWORKS = json.loads(open(os.path.join(os.path.split(__file__)[0], 'lt-artworks.json')).read()) # ARTWORKS = filter(lambda a: '_stars' in a and a['_stars'] >= 1, ARTWORKS) # Fetch latest 300 artworks (for blacklisting) latest_artworks = (FeaturedArtwork.all() .order('-publish_date') .fetch(300)) # List dates for which artwork exists dates_with_existing_art = set(a.publish_date for a in latest_artworks) # List target dates that we want artwork for, but for which no artwork exists target_dates = [date.today() + timedelta(days=n) for n in range(-1, LOOKAHEAD_DAYS)] target_dates = [d for d in target_dates if d not in dates_with_existing_art] # Create a blacklist of keys to avoid repeats blacklist = set(artwork_key(a.details_url) for a in latest_artworks) logging.debug('starting blacklist size: %d' % len(blacklist)) chosen_artworks = [] for target_date in target_dates: # Pick from available artworks, excluding artwork in the blacklist random_artwork = None while True: if len(ARTWORKS) == 0: logging.error('Ran out of artworks to choose from, cannot continue') return random_artwork = random.choice(ARTWORKS) key = artwork_key(random_artwork['detailsUri']) if key not in blacklist: # Once chosen, remove it from the list of artworks to choose next ARTWORKS.remove(random_artwork) chosen_artworks.append(random_artwork) break target_details_url = str(random_artwork['detailsUri']) logging.debug('%(date)s: setting to %(url)s' % dict(url=target_details_url, date=target_date)) # Store the new artwork if self.request.get('dry-run', '') != 'true': new_artwork = FeaturedArtwork( title=random_artwork['title'], byline=random_artwork['byline'], attribution=random_artwork['attribution'], image_url=random_artwork['imageUri'], thumb_url=random_artwork['thumbUri'], details_url=random_artwork['detailsUri'], publish_date=target_date) new_artwork.save() if self.request.get('output', '') == 'html': self.response.out.write(get_html(artworks_json=json.dumps(chosen_artworks))) # Finish up logging.debug('done')
def process_html(self, url, html): soup = BeautifulSoup(html) details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.find(itemprop='name').get_text() author = soup.find(itemprop='author').get_text() completion_year_el = soup.find(itemprop='dateCreated') byline = author + ((', ' + completion_year_el.get_text()) if completion_year_el else '') image_url = soup.find(id='paintingImage')['href'] if not title or not author or not image_url: self.response.out.write('Could not parse HTML') self.response.set_status(500) return publish_date = (datetime.datetime.utcfromtimestamp( int(self.request.get('publishDate')) / 1000).date()) image_url, thumb_url = maybe_process_image( image_url, NO_CROP_TUPLE, publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline) # create the artwork entry new_artwork = FeaturedArtwork(title=title, byline=byline, image_url=image_url, thumb_url=thumb_url, details_url=details_url, publish_date=publish_date) new_artwork.save() self.response.set_status(200)
def post(self): artwork_json = json.loads(self.request.get('json')) crop_tuple = tuple(float(x) for x in json.loads(self.request.get('crop'))) publish_date = (datetime.datetime .utcfromtimestamp(artwork_json['publishDate'] / 1000) .date()) new_image_url, new_thumb_url = maybe_process_image( artwork_json['imageUri'], crop_tuple, publish_date.strftime('%Y%m%d') + ' ' + artwork_json['title'] + ' ' + artwork_json['byline']) if not new_thumb_url and 'thumbUri' in artwork_json: new_thumb_url = artwork_json['thumbUri'] new_artwork = FeaturedArtwork( title=artwork_json['title'], byline=artwork_json['byline'], image_url=new_image_url, thumb_url=new_thumb_url, details_url=artwork_json['detailsUri'], publish_date=publish_date) new_artwork.save() self.response.set_status(200)
def post(self): artwork_json = json.loads(self.request.get('json')) publish_date = (datetime.datetime.utcfromtimestamp( artwork_json['publishDate'] / 1000).date()) if FeaturedArtwork.all().filter('publish_date=', publish_date).get() != None: webapp2.abort(409, message='Artwork already exists for this date.') crop_tuple = tuple( float(x) for x in json.loads(self.request.get('crop'))) new_image_url, new_thumb_url = backroomarthelper.maybe_process_image( artwork_json['imageUri'], crop_tuple, publish_date.strftime('%Y%m%d') + ' ' + artwork_json['title'] + ' ' + artwork_json['byline']) if not new_thumb_url and 'thumbUri' in artwork_json: new_thumb_url = artwork_json['thumbUri'] new_artwork = FeaturedArtwork( title=artwork_json['title'], byline=artwork_json['byline'], attribution=artwork_json['attribution'] if 'attribution' in artwork_json else None, image_url=new_image_url, thumb_url=new_thumb_url, details_url=artwork_json['detailsUri'], publish_date=publish_date) new_artwork.save() self.response.set_status(200)
def add_art_from_external_details_url(publish_date, url): if FeaturedArtwork.all().filter('publish_date =', publish_date).get() != None: webapp2.abort(409, message='Artwork already exists for this date.') result = urlfetch.fetch(url) if result.status_code < 200 or result.status_code >= 300: webapp2.abort(400, message='Error processing URL: HTTP %d. Content: %s' % (result.status_code, result.content)) soup = BeautifulSoup(result.content) attribution = None if re.search(r'wikiart.org', url, re.I) or re.search(r'wikipaintings.org', url, re.I): attribution = 'wikiart.org' details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.select('h1 span')[0].get_text() author = soup.find(itemprop='author').get_text() completion_year_el = soup.find(itemprop='dateCreated') byline = author + ((', ' + completion_year_el.get_text()) if completion_year_el else '') image_url = soup.find(id='paintingImage')['href'] elif re.search(r'metmuseum.org', url, re.I): attribution = 'metmuseum.org' details_url = re.sub(r'[#?].+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.find('h2').get_text() author = '' try: author = unicode(soup.find(text='Artist:').parent.next_sibling).strip() except: pass author = re.sub(r'\s*\(.*', '', author) completion_year_el = None try: completion_year_el = unicode(soup.find(text='Date:').parent.next_sibling).strip() except: pass byline = author + ((', ' + completion_year_el) if completion_year_el else '') image_url = soup.find('a', class_='download').attrs['href'] else: webapp2.abort(400, message='Unrecognized URL') if not title or not author or not image_url: webapp2.abort(500, message='Could not parse HTML') image_url, thumb_url = maybe_process_image(image_url, NO_CROP_TUPLE, publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline) # create the artwork entry new_artwork = FeaturedArtwork( title=title, byline=byline, attribution=attribution, image_url=image_url, thumb_url=thumb_url, details_url=details_url, publish_date=publish_date) new_artwork.save() return new_artwork
def get(self): ARTWORKS = json.loads( open(os.path.join(os.path.split(__file__)[0], 'lt-artworks.json')).read()) # Fetch latest 300 artworks (for blacklisting) latest_artworks = ( FeaturedArtwork.all().order('-publish_date').fetch(300)) # List dates for which artwork exists dates_with_existing_art = set(a.publish_date for a in latest_artworks) # List target dates that we want artwork for, but for which no artwork exists target_dates = [ date.today() + timedelta(days=n) for n in range(-1, LOOKAHEAD_DAYS) ] target_dates = [ d for d in target_dates if d not in dates_with_existing_art ] # Create a blacklist of keys to avoid repeats blacklist = set(artwork_key(a.details_url) for a in latest_artworks) self.response.out.write('starting blacklist size: %d<br>' % len(blacklist)) for target_date in target_dates: # Pick from available artworks, excluding artwork in the blacklist random_artwork = None while True: random_artwork = random.choice(ARTWORKS) key = artwork_key(random_artwork['detailsUri']) if key not in blacklist: # Once chosen, add to the blacklist to avoid repeats within the lookahead blacklist.add(key) break target_details_url = str(random_artwork['detailsUri']) self.response.out.write( '%(date)s: setting to <b>%(url)s</b><br>' % dict(url=target_details_url, date=target_date)) # Store the new artwork new_artwork = FeaturedArtwork( title=random_artwork['title'], byline=random_artwork['byline'], attribution=random_artwork['attribution'], image_url=random_artwork['imageUri'], thumb_url=random_artwork['thumbUri'], details_url=random_artwork['detailsUri'], publish_date=target_date) new_artwork.save() # Finish up self.response.out.write('done<br>')
def post(self): artwork_json = json.loads(self.request.get('json')) new_artwork = FeaturedArtwork( title=artwork_json['title'], byline=artwork_json['byline'], image_url=artwork_json['imageUri'], thumb_url=(artwork_json['thumbUri'] if 'thumbUri' in artwork_json else (artwork_json['imageUri'] + '!BlogSmall.jpg')), details_url=artwork_json['detailsUri'], publish_date=datetime.datetime.utcfromtimestamp( artwork_json['publishDate'] / 1000).date()) new_artwork.save() self.response.set_status(200)
def post(self): artwork_json = json.loads(self.request.get('json')) new_artwork = FeaturedArtwork( title=artwork_json['title'], byline=artwork_json['byline'], image_url=artwork_json['imageUri'], thumb_url=(artwork_json['thumbUri'] if 'thumbUri' in artwork_json else None), details_url=artwork_json['detailsUri'], publish_date=datetime.datetime .utcfromtimestamp(artwork_json['publishDate'] / 1000) .date()) new_artwork.save() self.response.set_status(200)
def process_html(self, url, html): soup = BeautifulSoup(html) if re.search(r'wikiart.org', url, re.I): details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.select('h1 span')[0].get_text() author = soup.find(itemprop='author').get_text() completion_year_el = soup.find(itemprop='dateCreated') byline = author + ((', ' + completion_year_el.get_text()) if completion_year_el else '') image_url = soup.find(id='paintingImage')['href'] elif re.search(r'metmuseum.org', url, re.I): details_url = re.sub(r'[#?].+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.find('h2').get_text() author = unicode( soup.find(text='Artist:').parent.next_sibling).strip() author = re.sub(r'\s*\(.*', '', author) completion_year_el = unicode( soup.find(text='Date:').parent.next_sibling).strip() byline = author + ( (', ' + completion_year_el) if completion_year_el else '') image_url = soup.find('a', class_='download').attrs['href'] else: self.response.out.write('Unrecognized URL') self.response.set_status(500) return if not title or not author or not image_url: self.response.out.write('Could not parse HTML') self.response.set_status(500) return publish_date = (datetime.datetime.utcfromtimestamp( int(self.request.get('publishDate')) / 1000).date()) image_url, thumb_url = maybe_process_image( image_url, NO_CROP_TUPLE, publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline) # create the artwork entry new_artwork = FeaturedArtwork(title=title, byline=byline, image_url=image_url, thumb_url=thumb_url, details_url=details_url, publish_date=publish_date) new_artwork.save() self.response.set_status(200) self.response.out.write(json.dumps(artwork_dict(new_artwork)))
def get(self): ARTWORKS = json.loads(open(os.path.join(os.path.split(__file__)[0], 'lt-artworks.json')).read()) # Fetch latest 300 artworks (for blacklisting) latest_artworks = (FeaturedArtwork.all() .order('-publish_date') .fetch(300)) # List dates for which artwork exists dates_with_existing_art = set(a.publish_date for a in latest_artworks) # List target dates that we want artwork for, but for which no artwork exists target_dates = [date.today() + timedelta(days=n) for n in range(-1, LOOKAHEAD_DAYS)] target_dates = [d for d in target_dates if d not in dates_with_existing_art] # Create a blacklist of keys to avoid repeats blacklist = set(artwork_key(a.details_url) for a in latest_artworks) self.response.out.write('starting blacklist size: %d<br>' % len(blacklist)) for target_date in target_dates: # Pick from available artworks, excluding artwork in the blacklist random_artwork = None while True: random_artwork = random.choice(ARTWORKS) key = artwork_key(random_artwork['detailsUri']) if key not in blacklist: # Once chosen, add to the blacklist to avoid repeats within the lookahead blacklist.add(key) break target_details_url = str(random_artwork['detailsUri']) self.response.out.write('%(date)s: setting to <b>%(url)s</b><br>' % dict(url=target_details_url, date=target_date)) # Store the new artwork new_artwork = FeaturedArtwork( title=random_artwork['title'], byline=random_artwork['byline'], attribution=random_artwork['attribution'], image_url=random_artwork['imageUri'], thumb_url=random_artwork['thumbUri'], details_url=random_artwork['detailsUri'], publish_date=target_date) new_artwork.save() # Finish up self.response.out.write('done<br>')
def process_html(self, url, html): soup = BeautifulSoup(html) if re.search(r'wikiart.org', url, re.I): details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.select('h1 span')[0].get_text() author = soup.find(itemprop='author').get_text() completion_year_el = soup.find(itemprop='dateCreated') byline = author + ((', ' + completion_year_el.get_text()) if completion_year_el else '') image_url = soup.find(id='paintingImage')['href'] elif re.search(r'metmuseum.org', url, re.I): details_url = re.sub(r'[#?].+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.find('h2').get_text() author = unicode(soup.find(text='Artist:').parent.next_sibling).strip() author = re.sub(r'\s*\(.*', '', author) completion_year_el = unicode(soup.find(text='Date:').parent.next_sibling).strip() byline = author + ((', ' + completion_year_el) if completion_year_el else '') image_url = soup.find('a', class_='download').attrs['href'] else: self.response.out.write('Unrecognized URL') self.response.set_status(500) return if not title or not author or not image_url: self.response.out.write('Could not parse HTML') self.response.set_status(500) return publish_date = (datetime.datetime .utcfromtimestamp(int(self.request.get('publishDate')) / 1000) .date()) image_url, thumb_url = maybe_process_image(image_url, NO_CROP_TUPLE, publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline) # create the artwork entry new_artwork = FeaturedArtwork( title=title, byline=byline, image_url=image_url, thumb_url=thumb_url, details_url=details_url, publish_date=publish_date) new_artwork.save() self.response.set_status(200) self.response.out.write(json.dumps(artwork_dict(new_artwork)))
def post(self): artwork_json = json.loads(self.request.get('json')) new_image_url, new_thumb_url = maybe_process_image( artwork_json['imageUri'], artwork_json['title'] + ' ' + artwork_json['byline']) if not new_thumb_url and 'thumbUri' in artwork_json: new_thumb_url = artwork_json['thumbUri'] new_artwork = FeaturedArtwork( title=artwork_json['title'], byline=artwork_json['byline'], image_url=new_image_url, thumb_url=new_thumb_url, details_url=artwork_json['detailsUri'], publish_date=datetime.datetime.utcfromtimestamp( artwork_json['publishDate'] / 1000).date()) new_artwork.save() self.response.set_status(200)
def post(self): artwork_json = json.loads(self.request.get("json")) crop_tuple = tuple(float(x) for x in json.loads(self.request.get("crop"))) publish_date = datetime.datetime.utcfromtimestamp(artwork_json["publishDate"] / 1000).date() new_image_url, new_thumb_url = maybe_process_image( artwork_json["imageUri"], crop_tuple, publish_date.strftime("%Y%m%d") + " " + artwork_json["title"] + " " + artwork_json["byline"], ) if not new_thumb_url and "thumbUri" in artwork_json: new_thumb_url = artwork_json["thumbUri"] new_artwork = FeaturedArtwork( title=artwork_json["title"], byline=artwork_json["byline"], image_url=new_image_url, thumb_url=new_thumb_url, details_url=artwork_json["detailsUri"], publish_date=publish_date, ) new_artwork.save() self.response.set_status(200)
def post(self): publish_date = (datetime.datetime .utcfromtimestamp(int(self.request.get('publishDate')) / 1000) .date()) if FeaturedArtwork.all().filter('publish_date =', publish_date).get() != None: webapp2.abort(409, message='Artwork already exists for this date.') url = self.request.get('externalArtworkUrl') result = urlfetch.fetch(url) if result.status_code < 200 or result.status_code >= 300: webapp2.abort(400, message='Error processing URL: HTTP %d. Content: %s' % (result.status_code, result.content)) soup = BeautifulSoup(result.content) attribution = None if re.search(r'wikiart.org', url, re.I): attribution = 'wikiart.org' details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.select('h1 span')[0].get_text() author = soup.find(itemprop='author').get_text() completion_year_el = soup.find(itemprop='dateCreated') byline = author + ((', ' + completion_year_el.get_text()) if completion_year_el else '') image_url = soup.find(id='paintingImage')['href'] elif re.search(r'metmuseum.org', url, re.I): attribution = 'metmuseum.org' details_url = re.sub(r'[#?].+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.find('h2').get_text() author = '' try: author = unicode(soup.find(text='Artist:').parent.next_sibling).strip() except: pass author = re.sub(r'\s*\(.*', '', author) completion_year_el = None try: completion_year_el = unicode(soup.find(text='Date:').parent.next_sibling).strip() except: pass byline = author + ((', ' + completion_year_el) if completion_year_el else '') image_url = soup.find('a', class_='download').attrs['href'] else: webapp2.abort(400, message='Unrecognized URL') if not title or not author or not image_url: webapp2.abort(500, message='Could not parse HTML') image_url, thumb_url = maybe_process_image(image_url, NO_CROP_TUPLE, publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline) # create the artwork entry new_artwork = FeaturedArtwork( title=title, byline=byline, attribution=attribution, image_url=image_url, thumb_url=thumb_url, details_url=details_url, publish_date=publish_date) new_artwork.save() self.response.set_status(200) self.response.out.write(json.dumps(artwork_dict(new_artwork)))
def add_art_from_external_details_url(publish_date, url): if FeaturedArtwork.all().filter('publish_date =', publish_date).get() != None: webapp2.abort(409, message='Artwork already exists for this date.') result = urlfetch.fetch(url) if result.status_code < 200 or result.status_code >= 300: webapp2.abort(400, message='Error processing URL: HTTP %d. Content: %s' % (result.status_code, result.content)) soup = BeautifulSoup(result.content, 'html.parser') attribution = None if re.search(r'wikiart.org', url, re.I) or re.search(r'wikipaintings.org', url, re.I): attribution = 'wikiart.org' details_url = re.sub(r'#.+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.find('h1').get_text() author = soup.find('a', class_='artist-name').get_text() completion_year = None try: completion_year = unicode(soup.find(text='Date:').parent.next_sibling).strip() except: pass byline = author + ((', ' + completion_year) if completion_year else '') image_url = get_wikiart_image_url(soup) elif re.search(r'metmuseum.org', url, re.I): attribution = 'metmuseum.org' details_url = re.sub(r'[#?].+', '', url, re.I | re.S) + '?utm_source=Muzei&utm_campaign=Muzei' title = soup.find('h2').get_text() author = '' try: author = unicode(soup.find(text='Artist:').parent.next_sibling).strip() except: pass author = re.sub(r'\s*\(.*', '', author) completion_year = None try: completion_year = unicode(soup.find(text='Date:').parent.next_sibling).strip() except: pass byline = author + ((', ' + completion_year) if completion_year else '') image_url = soup.find('a', class_='download').attrs['href'] else: webapp2.abort(400, message='Unrecognized URL') if not title or not author or not image_url: webapp2.abort(500, message='Could not parse HTML') image_url, thumb_url = maybe_process_image(image_url, NO_CROP_TUPLE, publish_date.strftime('%Y%m%d') + ' ' + title + ' ' + byline) # create the artwork entry new_artwork = FeaturedArtwork( title=title.strip(), byline=byline.strip(), attribution=attribution, image_url=image_url, thumb_url=thumb_url, details_url=details_url, publish_date=publish_date) new_artwork.save() return new_artwork
def get(self): ARTWORKS = json.loads( open(os.path.join(os.path.split(__file__)[0], 'lt-artworks.json')).read()) # ARTWORKS = filter(lambda a: '_stars' in a and a['_stars'] >= 1, ARTWORKS) # Fetch latest 300 artworks (for blacklisting) latest_artworks = ( FeaturedArtwork.all().order('-publish_date').fetch(300)) # List dates for which artwork exists dates_with_existing_art = set(a.publish_date for a in latest_artworks) # List target dates that we want artwork for, but for which no artwork exists target_dates = [ date.today() + timedelta(days=n) for n in range(-1, LOOKAHEAD_DAYS) ] target_dates = [ d for d in target_dates if d not in dates_with_existing_art ] # Create a blacklist of keys to avoid repeats blacklist = set(artwork_key(a.details_url) for a in latest_artworks) logging.debug('starting blacklist size: %d' % len(blacklist)) chosen_artworks = [] for target_date in target_dates: # Pick from available artworks, excluding artwork in the blacklist random_artwork = None while True: if len(ARTWORKS) == 0: logging.error( 'Ran out of artworks to choose from, cannot continue') return random_artwork = random.choice(ARTWORKS) key = artwork_key(random_artwork['detailsUri']) if key not in blacklist: # Once chosen, remove it from the list of artworks to choose next ARTWORKS.remove(random_artwork) chosen_artworks.append(random_artwork) break target_details_url = str(random_artwork['detailsUri']) logging.debug('%(date)s: setting to %(url)s' % dict(url=target_details_url, date=target_date)) # Store the new artwork if self.request.get('dry-run', '') != 'true': new_artwork = FeaturedArtwork( title=random_artwork['title'], byline=random_artwork['byline'], attribution=random_artwork['attribution'], image_url=random_artwork['imageUri'], thumb_url=random_artwork['thumbUri'], details_url=random_artwork['detailsUri'], publish_date=target_date) new_artwork.save() if self.request.get('output', '') == 'html': self.response.out.write( get_html(artworks_json=json.dumps(chosen_artworks))) # Finish up logging.debug('done')