def parse(self, parsing_context: ParsingContext) -> List[Event]: venue = parsing_context.venue source = venue.source_url content = json.loads(parsing_context.content) results = [] for day in content: events = [ event for event in day['events'] if event['type'] == 'event' ] for event in events: description = MelkwegParser._make_description(event) date = datetime.fromtimestamp( int(event['date']), pytz.timezone("Europe/Amsterdam")) title = event['name'] image_url = f'https://s3-eu-west-1.amazonaws.com/static.melkweg.nl/uploads/images/' \ f'scaled/agenda_thumbnail/{event["thumbnail"]}' url = f'https://www.melkweg.nl/nl/agenda/{event["slug"]}' results.append( Event(url=url, title=title, description=ParserUtil.sanitize_text( description[:1400]), venue=venue, source=source, date_published=datetime.now(), when=date, image_url=image_url)) logging.getLogger(__name__).info('parsed %d events melkweg', len(results)) return results
def fixture_vera_event() -> Event: return Event(url='http://dummy-vera-event', description='Omschrijving', title='Vera Event titel', source='vera', date_published=datetime.now(), image_url='http://image-url-vera-event.jpg', venue=fixture_vera_venue(), when=datetime.now(pytz.timezone('Europe/Amsterdam')) + timedelta(days=10))
def to_event(self, entity_map: Dict) -> Event: return Event(url=entity_map['url'], title=entity_map['title'], description=entity_map['description'], venue=self.venue_repository.get_venue_for( entity_map['venue_id']), source=entity_map['source'], date_published=entity_map['date_published'], when=entity_map['when'], image_url=entity_map['image_url'])
def to_entity(event: Event) -> Dict: return { 'url': event.url, 'title': event.title, 'description': event.description, 'venue_id': event.venue.venue_id, 'source': event.source, 'date_published': event.date_published, 'when': event.when, 'image_url': event.image_url, 'search_terms': event.generate_search_terms() }
def _transform(venue: Venue, tag: Tag) -> Event: source = venue.source_url url = tag.find('a', {'class': 'event-link'})['href'] artist_tag = tag.find('h3', {'class': re.compile(r'artist|artist ')}) if artist_tag is not None: artist = ParserUtil.remove_children_text_from( artist_tag, artist_tag.text) artist = VeraParser._add_sup_text_from_text(artist_tag, artist) artist = ParserUtil.sanitize_text(artist) else: artist = url extra = VeraParser._find_extra(tag) extra_title = tag.find('h4', {'class': 'pretitle'}) if extra_title is not None: extra_title = f'({ParserUtil.sanitize_text(extra_title.text)})' else: extra_title = '' when_tag = tag.find('div', {'class': 'date'}) if when_tag is not None: when = ParserUtil.remove_children_text_from( when_tag, when_tag.text) when = ParserUtil.sanitize_text(when) when_time = tag.find('div', {'class': 'schedule'}).text when_time = when_time[when_time.find('start: ') + 7:when_time.find('start: ') + 12] when_date: datetime = dateparser.parse( f'{when} {when_time}{venue.timezone_short}', languages=['nl']) else: when_date = datetime.min image_url = tag.find('div', {'class': 'artist-image'})['style'] image_url_end = image_url.find('\'', image_url.find('https') + 4) image_url = image_url[image_url.find('https'):image_url_end] when_date = when_date if when_date is not None else datetime.now() return Event( url=url, title=f'{artist} {extra_title}'.strip(), description= f'{artist}{" with support" if extra != "" else ""} {extra}'.strip( ), venue=venue, source=source, date_published=datetime.now(), when=when_date, image_url=image_url)
def _transform(venue: Venue, data: Dict) -> Event: source = venue.source_url tz_short = venue.timezone_short url = f'https://www.paradiso.nl/en/program/{data["slug"]}/{data["id"]}' title = data['title'] description = data['subtitle'] description = description if ParserUtil.not_empty( description) else title when_format = f'{data["start_date_time"]}{tz_short}' when = dateparser.parse(when_format, languages=['en']) return Event(url=url, title=title, description=description, venue=venue, source=source, date_published=datetime.now(), when=when)
def _transform(venue: Venue, tag: Tag) -> Event: when_text = tag.find('span', {'class': 'agenda-date'}).text when_text = when_text.replace('\n', '').strip() when_text = when_text[0:when_text.find('/')].strip() when_datetime = dateparser.parse(f'{when_text}{venue.timezone_short}', languages=['nl']) title = tag.find('h3', {'class': 'agenda-title'}).text description_tag = tag.find('span', {'class': 'small'}) description = description_tag.text if description_tag is not None else title url = tag.find('a', {'class': 'item-link'}).get('href') image_url = f'{venue.url}/{tag.find("img").get("src")}' return Event(url=url, title=f'{title}', description=description, venue=venue, source=venue.source_url, date_published=datetime.now(), when=when_datetime, image_url=image_url)
def _transform(venue: Venue, article: Tag) -> Event: source = venue.source_url base_url = venue.url url = article.a.get('href') content = article.find('div', {'class': 'program__content'}) figure = article.find('figure').img.get('data-src') date = article.find('time') title = content.h1 content_title = title.text if title.find('span') is None else \ title.text.replace(title.span.text, '') + ' - ' + title.span.text description = ParserUtil.stripped_text_or_default_if_empty( content.p, content_title) return Event(url=url, title=content_title, description=description, venue=venue, image_url=f'{base_url}{figure}', source=source, date_published=datetime.now(), when=datetime.fromisoformat(date.get('datetime')))
def _transform(venue: Venue, data: Dict) -> Event: source = venue.source_url tz_short = venue.timezone_short url = data['link'] title = data['title'] image_url = data['image'] description = data['subtitle'] description = description if ParserUtil.not_empty( description) else title when_format = f'{data["day"]} {data["month"]} {data["year"]} 00:00{tz_short}' when = dateparser.parse(when_format, languages=['nl']) return Event(url=url, title=title, description=description, venue=venue, image_url=image_url, source=source, date_published=datetime.now(), when=when)
def _transform(venue: Venue, tag: Tag) -> Event: url = tag.get('href') title = tag.get('title') subtitle_tag = tag.find('div', {'class': 'subtitle'}) details_tag = tag.find('div', {'class': 'details'}) description = subtitle_tag.text if subtitle_tag is not None else details_tag.text when = tag.find('div', {'class': 'date'}).text time = details_tag.text time = time[time.find('Aanvang: ') + 9:time.find('Aanvang: ') + 15] when_datetime = dateparser.parse( f'{when} {time}{venue.timezone_short}') image_url_style = tag.find('div', {'class': 'item-image'}).get('style') image_url_start = image_url_style.find('https') image_url = image_url_style[image_url_start:image_url_style.find('.jpg' ) + 4] return Event(url=url, title=f'{title}', description=description, venue=venue, source=venue.url, date_published=datetime.now(), when=when_datetime, image_url=image_url)
from app.core.event import Event from app.core.log import logger from app.events import * if __name__ == '__main__': logger.warning(" [*] Waiting for messages. To exit press CTRL+C") Event.listen()
def hello(): Event.send('new_queue', 'world') return "world"