def process_item(self, item): self.logger.debug("Processing: {}".format(item[self.source.key])) time_start = time() try: with Timeout(seconds=self.timeout): processed = self.ufn.__call__(item) except Exception as e: self.logger.error(traceback.format_exc()) processed = {"error": str(e)} time_end = time() key, lu_field = self.source.key, self.source.lu_field out = { self.target.key: item[key], self.target.lu_field: self.source.lu_func[0](item[self.source.lu_field]), } if self.store_process_time: out["_process_time"] = time_end - time_start out.update(processed) return out
def process_item(self, item: List[Dict]) -> Dict[Tuple, Dict]: # type: ignore keys = list(d[self.source.key] for d in item) self.logger.debug("Processing: {}".format(keys)) time_start = time() try: with Timeout(seconds=self.timeout): processed = self.unary_function(item) processed.update({"state": "successful"}) except Exception as e: self.logger.error(traceback.format_exc()) processed = {"error": str(e), "state": "failed"} time_end = time() last_updated = [ self.source._lu_func[0](d[self.source.last_updated_field]) for d in item ] update_doc = { self.target.key: keys[0], f"{self.source.key}s": keys, self.target.last_updated_field: max(last_updated), "_bt": datetime.utcnow(), } processed.update({k: v for k, v in update_doc.items() if k not in processed}) if self.store_process_time: processed["_process_time"] = time_end - time_start return processed
def process_item(self, item: Dict): """ Generic process items to process a dictionary using a map function """ self.logger.debug("Processing: {}".format(item[self.source.key])) time_start = time() try: with Timeout(seconds=self.timeout): processed = dict(self.unary_function(item)) processed.update({"state": "successful"}) for k in [self.source.key, self.source.last_updated_field]: if k in processed: del processed[k] except Exception as e: self.logger.error(traceback.format_exc()) processed = {"error": str(e), "state": "failed"} time_end = time() key, last_updated_field = self.source.key, self.source.last_updated_field out = { self.target.key: item[key], self.target.last_updated_field: self.source._lu_func[0](item.get(last_updated_field, datetime.utcnow())), } if self.store_process_time: out["_process_time"] = time_end - time_start out.update(processed) return out
def takes_too_long(): with Timeout(seconds=1): sleep(2)