def reviewdupes(self): conn = hub.getConnection() dupe_results = conn.queryAll(""" select date, venue_id, count(*) from event where date >= CURRENT_DATE group by venue_id, date having count(*) > 1 """) dupe_groups = [] limit = 5 for date, venue_id, count in dupe_results: possible_dupes = Event.selectBy(date=date, venueID=venue_id) if self.events_likely_dupes(possible_dupes): dupes = [] for dupe in possible_dupes: others = set(possible_dupes) others.remove(dupe) dupes.append((dupe, others)) dupe_groups.append(dupes) if len(dupe_groups) >= limit: break return dict(dupes=dupe_groups, dupe_count=len(dupe_results), artist_list=artist_list)
def import_to_db(self, event): flag_for_review = False venue_name = self.venue_name_fix(event['venue']['name']) try: v = Venue.byNameI(venue_name) except SQLObjectNotFound: v = Venue(name=venue_name, added_by=identity.current.user) flag_for_review = True if event['venue'].has_key('phone'): phone = event['venue']['phone'] if not len(phone) >= 8: phone = "503-" + phone p = validators.PhoneNumber() try: event['venue']['phone'] = p.to_python(phone) except: event['venue']['phone'] = None self._set_optional_fields(v, event['venue'], ("address", "phone", "zip_code", "url", "description")) event_name = self.event_name_fix(event['name']) event_date = event["date"] event_time = event.get("time") if event_time: event_time = event_time.lower() # check same venue, date, time db_events = Event.selectBy(date=event_date, time=event_time, venue=v) if db_events.count(): e = db_events[0] new_event = False else: # no time? still could be skippable, if event name is the same db_events = Event.selectBy(date=event_date, name=event_name, venue=v) if db_events.count(): e = db_events[0] new_event = False else: e = Event(venue=v, name=event_name, date=event_date, time=event_time, added_by=identity.current.user) new_event = True try: s = Source.byName(event["source"]) except SQLObjectNotFound: s = Source(name=event["source"]) flag_for_review = True e.addSource(s) self._set_optional_fields(e, event, ("cost", "ages", "url", "description", "ticket_url")) for artist in self.artists_clean(event['artists']): try: a = Artist.byNameI(artist) # artist was a similar, but now it's listed as having an event # so approve it if not a.approved: a.approved = datetime.now() except SQLObjectNotFound: a = Artist(name=artist, added_by=identity.current.user) flag_for_review = True if not e.id in [existing.id for existing in a.events]: a.addEvent(e) # flag all events from certain unreliable sources if event["source"] in ("ticketswest", "upcoming", "lastfm"): flag_for_review = True if new_event and not flag_for_review: e.approved = datetime.now() elif flag_for_review: e.approved = None return (new_event, flag_for_review)