def _download_events(self, input_events: List[tuple]) -> List[tuple]: self.logger.info("Downloading events...") logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__, log_file=self.args.log_file, log_level=self.args.log_level) timestamp = datetime.now() events_by_calendar = defaultdict(list) for event in input_events: _, _, calendar_url = event events_by_calendar[calendar_url].append(event) input_tuples = [] for index, calendar_url in enumerate(events_by_calendar): events_list = events_by_calendar[calendar_url] input_tuples.append((index + 1, len(events_by_calendar), events_list, timestamp, self.args.dry_run)) with multiprocessing.Pool(32) as p: result = p.map(DownloadEvents._download_events_process, input_tuples) return [ result_tuple for result_list in result for result_tuple in result_list ]
def _download_calendars(self, input_calendars: List[dict]) -> List[tuple]: self.logger.info("Downloading calendars...") logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__, log_file=self.args.log_file, log_level=self.args.log_level) timestamp = datetime.now() input_tuples = [] for index, website_base in enumerate(input_calendars): input_tuples.append((index + 1, len(input_calendars), timestamp, website_base, self.args.dry_run)) with multiprocessing.Pool(32) as p: return p.map(DownloadCalendars._download_calendars_process, input_tuples)
def _parse_events(self, input_events: List[tuple]) -> List[tuple]: self.logger.info("Parsing events...") logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__, log_file=self.args.log_file, log_level=self.args.log_level) timestamp = datetime.now() input_tuples = [] for index, event_tuple in enumerate(input_events): _, _, _, calendar_url = event_tuple website_base = utils.get_base_by_url(calendar_url) input_tuples.append((index + 1, len(input_events), event_tuple, timestamp, website_base)) with multiprocessing.Pool(32) as p: return p.map(ParseEvents._parse_events_process, input_tuples)
def _unify_types(self, input_events: dict, types_mapping: dict) -> List[tuple]: self.logger.info("Unifying events' types...") logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__, log_file=self.args.log_file, log_level=self.args.log_level) input_tuples = [] for index, event_id in enumerate(input_events): input_tuples.append((index + 1, len(input_events), input_events[event_id], types_mapping)) with multiprocessing.Pool(32) as p: return p.map(UnifyTypes._unify_types_process, input_tuples)
def _extract_keywords(self, input_events: List[tuple], keywords_dict: dict) -> List[tuple]: self.logger.info("Extracting events' keywords...") logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__, log_file=self.args.log_file, log_level=self.args.log_level) input_tuples = [] for index, event in enumerate(input_events): input_tuples.append( (index + 1, len(input_events), event, keywords_dict)) with multiprocessing.Pool(32) as p: return p.map(ExtractKeywords._extract_keywords_process, input_tuples)
def _geocode_locations(self, input_events: List[tuple], municipalities: List[dict]) -> List[dict]: self.logger.info("Geocoding events' locations...") logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__, log_file=self.args.log_file, log_level=self.args.log_level) calendars_with_default_gps = utils.get_base_dict_per_url( utils.get_base_with_default_gps()) input_tuples = [] for index, event in enumerate(input_events): input_tuples.append((index + 1, len(input_events), event, municipalities, calendars_with_default_gps)) with multiprocessing.Pool(32) as p: return p.map(GeocodeLocation._geocode_locations_process, input_tuples)
def _find_duplicates(self, input_events: dict) -> List[dict]: self.logger.info("Deduplicating events...") logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__, log_file=self.args.log_file, log_level=self.args.log_level) input_tuples = [] if self.args.event_url is not None: return [ self._find_duplicates_process( (1, 1, self.args.event_url, input_events)) ] for index, event_url in enumerate(input_events): input_tuples.append( (index + 1, len(input_events), event_url, input_events)) with multiprocessing.Pool(32) as p: return p.map(DeduplicateEvents._find_duplicates_process, input_tuples)
def _parse_calendars(self, input_calendars: List[tuple]) -> dict: self.logger.info("Parsing calendars...") logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__, log_file=self.args.log_file, log_level=self.args.log_level) timestamp = datetime.now() input_tuples = [] for index, calendar_tuple in enumerate(input_calendars): _, calendar_url, _ = calendar_tuple website_base = utils.get_base_by_url(calendar_url) input_tuples.append((index + 1, len(input_calendars), calendar_tuple, timestamp, website_base)) with multiprocessing.Pool(32) as p: events_lists = p.map(ParseCalendars._parse_calendars_process, input_tuples) events_to_insert = { calendar_id: events_list for element in events_lists for calendar_id, events_list in element.items() } return events_to_insert