def experiment_update_all_listings(): """Update all listings.""" from clair.coredata import DataStore from clair.network import EbayConnector print "====================================================================" print " Updating all listings! " print "====================================================================" ds = DataStore() ec = EbayConnector(relative("../../example-data/python-ebay.apikey")) ds.read_data(relative("../../example-data")) # print ds.listings["description"]["eb-150850751507"] print "Updating", len(ds.listings), "listings..." listings_upd = ec.update_listings(ds.listings) ds.merge_listings(listings_upd) ds.write_listings() print "finished"
class DaemonMain(object): """Main object of operation without GUI. daemon """ def __init__(self, conf_dir, data_dir, data_store=None): self.data_dir = data_dir self.server = EbayConnector(path.join(conf_dir, "python-ebay.apikey")) self.data = DataStore() if data_store is None else data_store self.recognizers = RecognizerController() def compute_next_due_time(self, curr_time, recurrence_pattern, add_random=False): """ Compute next due time for recurrent tasks. Parameters ---------- curr_time : datetime Start time of the recurrence. Current time should be used. recurrence_pattern: str How often should the task be executed? One of: * "m", "month", "monthly" * "w", "week", "weekly" * "d", "day", "daily" * "h", "hour", "hourly" add_random: bool If ``True``, add a random amount of time to the computed due time, to avoid load spikes. If ``False``, the computed times are at the start of the interval, for example at 00:00 o'clock for "daily" recurrence. Returns ------- datetime The new due time """ bymonth = None; bymonthday = None; byweekday = None; byhour = None byminute = 0; bysecond = 0 recurrence_pattern = recurrence_pattern.lower() if recurrence_pattern in ["m", "month", "monthly"]: freq = dateutil.rrule.MONTHLY byhour = 0 bymonthday = 1 rand_max = 15 * 24 * 60 * 60 #sec - 15 days elif recurrence_pattern in ["w", "week", "weekly"]: freq = dateutil.rrule.WEEKLY byhour = 0 byweekday = 0 rand_max = 3.5 * 24 * 60 * 60 #sec - 3.5 days elif recurrence_pattern in ["d", "day", "daily"]: freq = dateutil.rrule.DAILY byhour = 0 rand_max = 12 * 60 * 60 #sec - 12 hours elif recurrence_pattern in ["h", "hour", "hourly"]: freq = dateutil.rrule.HOURLY rand_max = 30 * 60 #sec - 30 minutes else: raise ValueError("Unkown recurrence_pattern: " + str(recurrence_pattern)) rrule = dateutil.rrule.rrule(freq=freq, dtstart=curr_time, count=2, bymonth=bymonth, bymonthday=bymonthday, byweekday=byweekday, byhour=byhour, byminute=byminute, bysecond=bysecond, cache=True) new_time = rrule.after(curr_time) #Add add_random component. if add_random: rand_secs = randint(0, rand_max) new_time += timedelta(seconds=rand_secs) return new_time def compute_next_wakeup_time(self): """ Compute time when application needs to wake up to execute next task. Lopps over all tasks in ``self.tasks``. Returns ------- datetime, float * Time when next task is due * Number of seconds to sleep until the next task is due. """ wakeup_time = datetime(9999, 12, 31) #The last possible month for task in self.data.tasks: wakeup_time = min(task.due_time, wakeup_time) sleep_interval = wakeup_time - datetime.utcnow() sleep_sec = max(sleep_interval.total_seconds(), 0.) return wakeup_time, sleep_sec def execute_search_task(self, task): """Search for new listings. Executes a search task.""" assert isinstance(task, SearchTask) logging.debug("Executing search task: '{id}'".format(id=task.id)) #Get new listings from server lst_found = self.server.find_listings( keywords=task.query_string, n_listings=task.n_listings, price_min=task.price_min, price_max=task.price_max, currency=task.currency) #fill in additional information, mainly for product recognition lst_found["search_tasks"].fill([task.id]) lst_found["expected_products"].fill(task.expected_products) lst_found["server"] = task.server #Sane handling of listings that are found by multiple search tasks. #Get IDs of listings that have already been found by other tasks common_ids = list(set(lst_found.index).intersection( set(self.data.listings.index))) for idx in common_ids: #Union of "search_tasks" list between existing and new listings tasks = lst_found["search_tasks"][idx] + \ self.data.listings["search_tasks"][idx] tasks = list(set(tasks)) tasks.sort() lst_found["search_tasks"][idx] = tasks #Union of "expected_products" list between existing and new listings prods = lst_found["expected_products"][idx] + \ self.data.listings["expected_products"][idx] prods = list(set(prods)) prods.sort() lst_found["expected_products"][idx] = prods self.data.merge_listings(lst_found) return list(lst_found["id"]) def execute_update_task(self, task): """ Download the complete information of known listings. Executes an update task. Tries to recognize products in updated tasks. """ assert isinstance(task, UpdateTask) logging.debug("Executing update task: '{id}'".format(id=task.id)) #Download the tasks lst_update = self.data.listings.ix[task.listings] lst_update = self.server.update_listings(lst_update) lst_update["server"] = task.server # lst_update["final_price"] = True #Use as flag, just to be sure self.data.merge_listings(lst_update) #Recognize products self.recognizers.recognize_products(lst_update.index, self.data.listings) return list(lst_update["id"]) def execute_tasks(self): """ Execute the due tasks in ``self.tasks``. Removes single shot tasks. """ logging.info("Executing due tasks.") now = datetime.utcnow() dead_tasks = [] for itask, task in enumerate(self.data.tasks): #Test is task due if task.due_time > now: continue logging.info("Executing task: {}".format(task.id)) #Search for new listings if isinstance(task, SearchTask): self.execute_search_task(task) #Update known listings elif isinstance(task, UpdateTask): self.execute_update_task(task) else: raise TypeError("Unknown task type:" + str(type(task)) + "\ntask:\n" + str(task)) #Mark non-recurrent tasks for removal if task.recurrence_pattern is None: dead_tasks.append(itask) #Compute new due time for recurrent tasks else: task.due_time = self.compute_next_due_time( datetime.utcnow(), task.recurrence_pattern, True) #Remove dead (non recurrent) tasks, after they have been executed. dead_tasks.reverse() for itask in dead_tasks: del self.data.tasks[itask] def create_final_update_tasks(self): """ Create tasks that update the listing information shortly after the auctions end. We want to know the final price of each auction. 20 auctions are updated at once. """ logging.info("Creating update tasks, to get final prices.") if len(self.data.listings) == 0: return #Create administration information if it doesn't exist try: self.data.listings["final_update_pending"] except KeyError: self.data.listings["final_update_pending"] = 0.0 #Get listings where final price is unknown and # where no final update is pending. #Note! Three-valued logic: 1., 0., nan where_no_final = ((self.data.listings["final_price"] != True) & (self.data.listings["final_update_pending"] != True)) no_final = self.data.listings[where_no_final] no_final = no_final.sort("time") if len(no_final) == 0: return #group listings into groups of 20 (max for Ebay get-items request) n_group = 20 elem_nums = range(len(no_final)) group_nums =[int(ne / n_group) for ne in elem_nums] groups = no_final.groupby(group_nums) #Create one update task for each group update_tasks = [] id_start = "update-" for i, group in groups: latest_time = group["time"].max() due_time = latest_time + timedelta(minutes=30) listing_ids = group["id"] task = UpdateTask(id=id_start + due_time.isoformat() + "-" + str(i), due_time=due_time, server=None, recurrence_pattern=None, listings=listing_ids) # print task update_tasks.append(task) self.data.add_tasks(update_tasks) #Remember the listings for which update tasks were just created self.data.listings["final_update_pending"][where_no_final] = True def run_daemon(self, nloops=-1): """ Simple main loop that downloads listings. To run a daemon from the command line call:: CommandLineHandler.daemon_main() Parameters ---------- nloops : int Number of cycles in the main loop. -1 means: loop infinitely. """ #Only load listings from one month in past and one month in future date_start = datetime.utcnow() - timedelta(days=30) date_end = datetime.utcnow() + timedelta(days=30) self.data.read_data(self.data_dir, date_start, date_end) self.recognizers.read_recognizers(self.data_dir) self.create_final_update_tasks() while nloops: #sleep until a task is due next_due_time, sleep_secs = self.compute_next_wakeup_time() logging.info("Sleeping until: {}".format(next_due_time)) time.sleep(sleep_secs) self.execute_tasks() self.create_final_update_tasks() self.data.write_listings() self.data.write_tasks() nloops -= 1