Exemplo n.º 1
0
    def _download_events(self, input_events: List[tuple]) -> List[tuple]:
        self.logger.info("Downloading events...")

        logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__,
                                    log_file=self.args.log_file,
                                    log_level=self.args.log_level)
        timestamp = datetime.now()
        events_by_calendar = defaultdict(list)
        for event in input_events:
            _, _, calendar_url = event
            events_by_calendar[calendar_url].append(event)

        input_tuples = []
        for index, calendar_url in enumerate(events_by_calendar):
            events_list = events_by_calendar[calendar_url]
            input_tuples.append((index + 1, len(events_by_calendar),
                                 events_list, timestamp, self.args.dry_run))

        with multiprocessing.Pool(32) as p:
            result = p.map(DownloadEvents._download_events_process,
                           input_tuples)

        return [
            result_tuple for result_list in result
            for result_tuple in result_list
        ]
Exemplo n.º 2
0
    def _download_calendars(self, input_calendars: List[dict]) -> List[tuple]:
        self.logger.info("Downloading calendars...")

        logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__,
                                    log_file=self.args.log_file, log_level=self.args.log_level)
        timestamp = datetime.now()
        input_tuples = []
        for index, website_base in enumerate(input_calendars):
            input_tuples.append((index + 1, len(input_calendars), timestamp, website_base, self.args.dry_run))

        with multiprocessing.Pool(32) as p:
            return p.map(DownloadCalendars._download_calendars_process, input_tuples)
Exemplo n.º 3
0
    def _parse_events(self, input_events: List[tuple]) -> List[tuple]:
        self.logger.info("Parsing events...")

        logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__,
                                    log_file=self.args.log_file, log_level=self.args.log_level)
        timestamp = datetime.now()
        input_tuples = []
        for index, event_tuple in enumerate(input_events):
            _, _, _, calendar_url = event_tuple
            website_base = utils.get_base_by_url(calendar_url)
            input_tuples.append((index + 1, len(input_events), event_tuple, timestamp, website_base))

        with multiprocessing.Pool(32) as p:
            return p.map(ParseEvents._parse_events_process, input_tuples)
Exemplo n.º 4
0
    def _unify_types(self, input_events: dict,
                     types_mapping: dict) -> List[tuple]:
        self.logger.info("Unifying events' types...")

        logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__,
                                    log_file=self.args.log_file,
                                    log_level=self.args.log_level)
        input_tuples = []
        for index, event_id in enumerate(input_events):
            input_tuples.append((index + 1, len(input_events),
                                 input_events[event_id], types_mapping))

        with multiprocessing.Pool(32) as p:
            return p.map(UnifyTypes._unify_types_process, input_tuples)
    def _extract_keywords(self, input_events: List[tuple],
                          keywords_dict: dict) -> List[tuple]:
        self.logger.info("Extracting events' keywords...")

        logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__,
                                    log_file=self.args.log_file,
                                    log_level=self.args.log_level)
        input_tuples = []
        for index, event in enumerate(input_events):
            input_tuples.append(
                (index + 1, len(input_events), event, keywords_dict))

        with multiprocessing.Pool(32) as p:
            return p.map(ExtractKeywords._extract_keywords_process,
                         input_tuples)
    def _geocode_locations(self, input_events: List[tuple],
                           municipalities: List[dict]) -> List[dict]:
        self.logger.info("Geocoding events' locations...")

        logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__,
                                    log_file=self.args.log_file,
                                    log_level=self.args.log_level)
        calendars_with_default_gps = utils.get_base_dict_per_url(
            utils.get_base_with_default_gps())
        input_tuples = []
        for index, event in enumerate(input_events):
            input_tuples.append((index + 1, len(input_events), event,
                                 municipalities, calendars_with_default_gps))

        with multiprocessing.Pool(32) as p:
            return p.map(GeocodeLocation._geocode_locations_process,
                         input_tuples)
Exemplo n.º 7
0
    def _find_duplicates(self, input_events: dict) -> List[dict]:
        self.logger.info("Deduplicating events...")

        logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__,
                                    log_file=self.args.log_file,
                                    log_level=self.args.log_level)
        input_tuples = []
        if self.args.event_url is not None:
            return [
                self._find_duplicates_process(
                    (1, 1, self.args.event_url, input_events))
            ]
        for index, event_url in enumerate(input_events):
            input_tuples.append(
                (index + 1, len(input_events), event_url, input_events))

        with multiprocessing.Pool(32) as p:
            return p.map(DeduplicateEvents._find_duplicates_process,
                         input_tuples)
Exemplo n.º 8
0
    def _parse_calendars(self, input_calendars: List[tuple]) -> dict:
        self.logger.info("Parsing calendars...")

        logger.set_up_simple_logger(SIMPLE_LOGGER_PREFIX + __file__,
                                    log_file=self.args.log_file,
                                    log_level=self.args.log_level)
        timestamp = datetime.now()
        input_tuples = []
        for index, calendar_tuple in enumerate(input_calendars):
            _, calendar_url, _ = calendar_tuple
            website_base = utils.get_base_by_url(calendar_url)
            input_tuples.append((index + 1, len(input_calendars),
                                 calendar_tuple, timestamp, website_base))

        with multiprocessing.Pool(32) as p:
            events_lists = p.map(ParseCalendars._parse_calendars_process,
                                 input_tuples)

        events_to_insert = {
            calendar_id: events_list
            for element in events_lists
            for calendar_id, events_list in element.items()
        }
        return events_to_insert