def _parse_show(self, show_date, show_detail, show_time): show = Show() time_txt = ','.join([p for p in show_time.text_content().split(',') if not self.PRICE_OR_AGE.search(p)]) logger.debug('Show: %s - %s' % (time_txt, show_time.text_content().strip(' \n'))) show.venue = self.venue() show.performers = self._parse_performers(show_detail) show.show_time = date_util.parse_show_time(show_date, time_txt) show.door_time = date_util.parse_door_time(show_date, time_txt) # TODO right now the below parsing doesn't work, so just skip these shows for now if not show.show_time and not show.door_time: time_match = self.TIME_RE.search(time_txt) if time_match: show.door_time = date_util.parse_date_and_time(show_date, time_match.group('time')) show.resources.resource_uris = self.resource_extractor.extract_resources(show_detail, show_time) # TODO work could be done here to find larger images (sometimes the img's are enclosed in an anchor tag) for img_tag in show_detail.iter(tag = 'img'): src = img_tag.get('src') # Skip the images that show the early shows, later shows, and the 5 years logo if not ('early' in src or 'later' in src or '5years' in src): show.resources.image_url = src break return show
def _parse_show(self, el): event_detail = html_util.get_first_element(el, '.event-details') date_txt = html_util.get_first_element(event_detail, 'strong').text time_txt = event_detail.text_content() show = Show() show.venue = self.venue() title_txt = html_util.get_first_element(event_detail, '.event-name').text_content() show.performers = [Performer(p) for p in lang_util.parse_performers(title_txt)] show.show_time = date_util.parse_show_time(date_txt, time_txt) show.door_time = date_util.parse_door_time(date_txt, time_txt) show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail) img = html_util.get_first_element(el, ".event-image img", optional = True) if img is not None: show.resources.image_url = img.get('src') date_util.adjust_fuzzy_years(show, self._parse_started) return show
def _parse_show(self, link): event_doc = html_util.fetch_and_parse(link) event_detail = event_doc.get_element_by_id("eventDetail") title_txt = [] found_h_el = False # Start parsing when we find the first h* element # Stop parsing if we found an h* element, but then encounter anything else for el in event_detail.getchildren(): if el.tag in ("h1", "h2"): found_h_el = True if el.text_content(): title_txt.append(el.text_content()) elif found_h_el: break """ <span id="timeDetail"> Apr 24, 2010<br /> upstairs<br /> Doors @ 7 PM<br/> $15.00 Adv. / $20 at the Door<br /> <a href="http://www.deadcellentertainment.tix.com/Schedule.asp?OrganizationNumber=2690" target="_blank"> <img src="/images/buyticket.png" alt="Purchase Tickets" /> </a> </span> """ time_el = event_detail.get_element_by_id("timeDetail") date_txt = time_el.text time_txt = time_el.text_content() performers = [] show = Show() show.merge_key = link show.venue = self.venue() show.performers = [Performer(p) for p in lang_util.parse_performers("/".join(title_txt))] show.door_time = date_util.parse_show_time(date_txt, time_txt) show.show_time = date_util.parse_door_time(date_txt, time_txt) show.resources.show_url = link show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail) img = html_util.get_first_element(event_detail, "img") if img is not None: show.resources.image_url = img.get("src") return show
def _parse_show(self, link): event_doc = html_util.fetch_and_parse(link) event = html_util.get_first_element(event_doc, '#tfly-center-column-wide') event_detail = html_util.get_first_element(event, '#details') """ <div class="info"> Sat, May 22, 2010<br /> Doors: 6:00 PM / Show: 7:00 PM <br /> $5.00<br /> </div> """ event_info = html_util.get_first_element(event_detail, ".info") date_txt = event_info.text time_txt = event_info.getchildren()[0].tail img = html_util.get_first_element(event_detail, "img") performers = [] for tag in ('h1', 'h2', 'h3', 'h4'): for h in event_detail.iter(tag = tag): performers.extend(self._parse_performers(h)) show = Show() show.merge_key = link show.venue = self.venue() show.performers = performers show.show_time = date_util.parse_show_time(date_txt, time_txt) show.door_time = date_util.parse_door_time(date_txt, time_txt) show.resources.show_url = link show.resources.resource_uris = self.resource_extractor.extract_resources(event) if img is not None: show.resources.image_url = img.get('src') return show
def _parse_show(self, link, show_section): show_doc = html_util.fetch_and_parse(link) show_detail = html_util.get_first_element(show_doc, "#content .event-detail") date_txt = html_util.get_first_element(show_detail, ".dates").text_content() time_txt = html_util.get_first_element(show_detail, ".times").text_content() sold_out = html_util.get_first_element(show_detail, '.sold-out', optional = True) image = html_util.get_first_element(show_detail, 'img', optional = True) # The image we want is generally the first one, but if the layout changes this may break if image is not None: image_url = image.get('src') else: image_url = None performers = [] for tag in ('h1', 'h2', 'h3'): for p in show_detail.iter(tag = tag): if p.text_content(): performers.extend(self._parse_performers(p)) show = Show() show.merge_key = link show.venue = self.venue() show.performers = performers show.door_time = date_util.parse_door_time(date_txt, time_txt) show.show_time = date_util.parse_show_time(date_txt, time_txt) show.soldout = sold_out is not None show.resources.show_url = link show.resources.image_url = image_url show.resources.resource_uris = self.resource_extractor.extract_resources(show_detail) return show
def _parse_show(self, link): event_doc = html_util.fetch_and_parse(link) match = self.IS_EVENT.match(link) event_id = int(match.group("event_id")) event_detail = html_util.get_first_element(event_doc, ".tfly-event-id-%d" % event_id) date_txt = html_util.get_first_element(event_doc, ".dates").text_content() time_txt = html_util.get_first_element(event_doc, ".times").text_content() img = html_util.get_first_element(event_detail, "img") performers = [] for p in html_util.get_elements(event_detail, ".headliners"): performers.append(Performer(p.text_content(), headliner=True)) for p in html_util.get_elements(event_detail, ".supports"): for pi in lang_util.parse_performers(p.text_content()): performers.append(Performer(pi, headliner=False)) show = Show() show.merge_key = link show.venue = self.venue() show.performers = performers show.show_time = date_util.parse_show_time(date_txt, time_txt) show.door_time = date_util.parse_door_time(date_txt, time_txt) show.resources.show_url = link show.resources.resource_uris = self.resource_extractor.extract_resources(event_detail) if img is not None: show.resources.image_url = img.get("src") return show