def import_day(self, day, month, year): tree = fromstring(self._get_html(self._url(day, month, year))) titlenodes = tree.xpath("//td[@class='size2']/font[@color='#CD076A']/b") for titlenode in titlenodes: event = Event() event.name = titlenode.text_content() time = titlenode.xpath("./ancestor::table/parent::td/table[2]/tr/td[2]/font[1]/text()")[0] event.date_start = datetime.strptime(time, "%H:%M Uhr") event.date_start = event.date_start.replace(year=year, day=day, month=month) venue = titlenode.xpath("./ancestor::table/parent::td/table[2]/tr[2]/td[2]/font/descendant::text()")[0] address = titlenode.xpath("./ancestor::table/parent::td/table[2]/tr[2]/td[2]/text()[preceding-sibling::br]")[0].strip() p = re.search(".*[0-9]{5} (.*)$", address) city = p.group(1) geodata = GooglePlacesLookup.find_geo_data_for_venue(venue, city) venue = geodata['name'] lat = geodata['lat'] lon = geodata['lon'] location, created = Location.objects.get_or_create(name=venue, city=city, latitude=lat, longitude=lon) event.location = location if not self.is_duplicate_event(event): event.save()
def parse_tree(self, tree): context = tree.xpath("//div[@class='Content']/div[contains(@class,'Row')]") events = [] for node in context: try: event = Event() event.name = node.xpath("./div[contains(@class, 'first ')]/a/text()")[0].strip() br_divided_div_text = "./div[@class='%s']/text()[%s-sibling::br]" loc_name = node.xpath(br_divided_div_text % ("second", "following"))[0].strip() loc_city = node.xpath(br_divided_div_text % ("second", "preceding"))[0].strip() _logger.info('found event %s at location %s, %s' % (event.name, loc_name, loc_city)) geodata = GooglePlacesLookup.find_geo_data_for_venue(loc_name, loc_city) loc_name = geodata['name'] lat = geodata['lat'] lon = geodata['lon'] location, created = Location.objects.get_or_create(name=loc_name, city=loc_city, latitude=lat, longitude=lon) if created: _logger.info("created new location %s" % location.__unicode__()) event.location = location date = node.xpath(br_divided_div_text % ("third", "following"))[0].strip() time = node.xpath(br_divided_div_text % ("third", "preceding"))[0].strip() event.date_start = datetime.strptime(date + " " + time, "%d.%m.%Y %H:%M Uhr") if not self.is_duplicate_event(event): event.save() category_name = node.xpath("./div[@class='fourth']")[0].text_content().strip() cat, created = Category.objects.get_or_create(name=category_name) event.categories.add(cat) cat.save() event.save() events.append(event) except Exception, err: _logger.exception("error importing node: %s" % etree.tostring(node))
def __init__(self): geodata = GooglePlacesLookup.find_geo_data_for_venue("Badisches Staatsthater", "Karlsruhe") loc_name = geodata['name'] lat = geodata['lat'] lon = geodata['lon'] self.LOCATION, created = Location.objects.get_or_create(name=loc_name, city="Karlsruhe", latitude=lat, longitude=lon)