from boto3.dynamodb.conditions import Key from extractor.query import Query from utils.utils import parseTime an_hour = 60 * 60 default_start_time = parseTime("03-03-2016 12:00") class ByHourQuery(Query): def __init__(self, tag, limit=100, plugins=[], start_time=default_start_time, interval=an_hour): super(self.__class__, self).__init__(tag, limit, plugins) self.limit = None self.filter = None self.start_time = start_time self.start_timestamp = start_time self.interval = interval self.send_timestamp = self.start_timestamp + self.interval self.conditions = self.conditions & Key('timestamp').between( self.start_timestamp, self.send_timestamp) def get_results(self): self._query_() while self.lastKey and (self.resultCount < self.limit): self._query_() return self.restructured()
from boto3.dynamodb.conditions import Key from extractor.query import Query from utils.utils import parseTime an_hour = 60 * 60 default_start_time = parseTime("03-03-2016 12:00") class ByHourQuery(Query): def __init__(self, tag, limit=100, plugins=[], start_time=default_start_time, interval=an_hour): super(self.__class__, self).__init__(tag, limit, plugins) self.limit = None self.filter = None self.start_time = start_time self.start_timestamp = start_time self.interval = interval self.send_timestamp = self.start_timestamp + self.interval self.conditions = self.conditions & Key('timestamp').between(self.start_timestamp, self.send_timestamp) def get_results(self): self._query_() while self.lastKey and (self.resultCount < self.limit): self._query_() return self.restructured()
{'start': '2015-08-1', 'end': '2015-09-1'}, {'start': '2015-11-1', 'end': '2016-12-1'} ] places = [ {'name': "NewYork", 'id': ".skCPTpTVr.Q3WKW"}, {'name': "Boston", 'id': "5MvaTZJTUbx1uPnP"}, {'name': "Beijing", 'id': "vQ6vOjpTU7_QE6S8"}, {'name': "Hongkong", 'id': "JAJiM7JTU78IjzqC"} ] results = {} for place in places: place_results = results[place.get('name')] = {} for time in times: query = Flickr({ 'place_id': place.get('id'), 'sort': 'interestingness-desc', 'per_page': 100, # 'text': 'landscape', 'min_taken_date': parseTime(time.get('start')), 'max_taken_date': parseTime(time.get('end')) }) # place_results[time.get('start')] = query.results # The above yields a lot of unused json. Below just the title, and image url. place_results[time.get('start')] = [{'title': r.get('title'),'url': r.get('url')} for r in query.results] with open("kini.json", 'w') as outfile: json.dump(results, outfile, indent=4, sort_keys=True)