def handle_import(user, data, processor_class, importer_class, logger=None): """ Puts data through the processor and then uses the specified importer to put the data into the database processor_class and importer_class should be references to their classes Returns the log """ if logger == None: logger = APILogger() logger.log('notice', 'import', 'Performing import as user ' + user.username) processor = processor_class(logger=logger) modules = processor.process(data) importer = importer_class(user, logger=logger) if modules is not None: for module in modules: importer.add_module_data(module) return logger
class APIImporter(object): """ Takes a ModuleData and processes it, updating the database as required. Requires a user to verify their permissions """ def __init__(self, user, logger=None): self.user = user if logger is not None: self.logger = logger else: self.logger = APILogger() self.thing_list = Thing.objects.all() self.event_list = Event.objects.all() self.event_source_list = EventSourceTag.objects.all() def add_module_data(self, module): """ Adds the data in module to the database. Performs a check to ensure that user has the appropriate permission. Will not create Things above the module level. """ try: module.is_valid() except DataValidationException as err: self.logger.log( 'failed', 'module', 'Module data not valid {0}'.format(err) ) return path = module['path'] # find the parent of the module try: parent_thing = self.thing_list.get(pathid=Thing.hash(path[:-1])) except Thing.DoesNotExist: self.logger.log( 'failed', 'module', 'The path {0} does not exist'.format(path[:-1]) ) return # check for permission if not parent_thing.can_be_edited_by(self.user.username): self.logger.log( 'denied', 'module', 'You do not have permission to modify {0}'.format(path[:-1]) ) return self.process_module_dict(module) def process_module_dict(self, module): """ Processes a ModuleData dict and updates the database """ path = module['path'] try: db_module = self.thing_list.get( pathid=Thing.hash(path+module['shortname']) ) except Thing.DoesNotExist: db_module = None is_deleting_module = module.is_being_deleted() if db_module is None: if is_deleting_module: # wanted to delete it, doesn't exist, nothing to do return db_module = self.create_module( path, module['name'], module['shortname'] ) if db_module is None: # something went wrong creating the module (no need to report # it as the logger should already contain the details) return # check if we want to delete it if is_deleting_module: self.delete_module(db_module) return # create a list of child sources module_sources = [] matching_source_tags = self.event_source_list.filter( thing=db_module, annotation='home' ) for tag in matching_source_tags: # check it was imported via the api if 'importid' in tag.eventsource.metadata: module_sources.append( (tag.eventsource, tag.eventsource.metadata['importid']) ) for source in module['seriesList']: db_source = self.process_source_dict( module_sources, db_module, source ) if db_source is not None: module_sources.append( (db_source, db_source.metadata['importid']) ) def process_source_dict(self, module_sources, db_module, source): """ Processes a SeriesData dict and updates the database """ is_deleting_source = source.is_being_deleted() db_source = None # check if the source is already in the data for existing_source in module_sources: if existing_source[1] == source['externalid']: db_source = existing_source[0] break # update/add/delete the source if db_source is not None: if is_deleting_source: self.delete_source(db_source) return else: self.update_source(db_source, source) else: # doesn't exist if is_deleting_source: return else: db_source = self.add_source(db_module, source) for event in source['events']: self.process_event_dict(db_source, event) return db_source def process_event_dict(self, db_source, event): """ Processes a EventData dict and updates the database """ if event.is_being_deleted(): # delete it if it exists self.delete_event( db_source, event ) else: self.add_or_update_event( db_source, event ) def create_module(self, path, name, shortname): """ Creates a module in the database. Will not create Things above module level, and will fail if the path is not unique. """ # find the parent (remove end slash) try: parent = self.thing_list.get(pathid=Thing.hash(path[:-1])) except Thing.DoesNotExist: self.logger.log( 'failed', 'module', 'Could not find path {0}'.format(path[:-1]) ) return except Thing.MultipleObjectsReturned: self.logger.log( 'failed', 'module', 'Path {0} was not unique'.format(path[:-1]) ) return db_module = Thing( fullname=name, type='module', parent=parent, name=shortname ) self.logger.log('insert', 'module', name) db_module.save() return db_module def delete_module(self, db_module): """ Deletes a module from the database. Also deletes all event sources attached to the module. """ self.logger.log('delete', 'module', db_module.fullname) # clear the module of all of its sources (and their events) matching_source_tags = self.event_source_list.filter( thing=db_module ).prefetch_related('eventsource') for tag in matching_source_tags: db_source = tag.eventsource self.delete_source(db_source) db_module.delete() def update_source(self, db_source, source): """ Verifies that source and db_source differ, and updates db_source if they do. """ has_name_data = 'name' in source has_lecturer_data = ('lecturer' in source and source['lecturer'] is not None) has_location_data = ('location' in source and source['location'] is not None) has_db_people = 'people' in db_source.metadata has_db_location = 'location' in db_source.metadata if has_lecturer_data and has_db_people: lecturer_changed = ( db_source.metadata['people'] != source['lecturer'] ) else: lecturer_changed = has_lecturer_data and not has_db_people if has_location_data and has_db_location: location_changed = ( db_source.metadata['location'] != source['location'] ) else: location_changed = has_location_data and not has_db_location name_changed = has_name_data and (db_source.title != source['name']) if name_changed or lecturer_changed or location_changed: db_source.title = source['name'] if has_lecturer_data: db_source.metadata['people'] = source['lecturer'] if has_location_data: db_source.metadata['location'] = source['location'] db_source.save() self.logger.log('update', 'source', source['name']) return db_source def add_source(self, db_module, source): """ Adds an event source to the database. Also adds the source tag to connect it to db_module, using annotation 'home' """ db_source = EventSource( title=source['name'], sourcetype='importapi' ) db_source.metadata['importid'] = source['externalid'] if 'lecturer' in source and source['lecturer'] is not None: db_source.metadata['people'] = source['lecturer'] if 'location' in source and source['location'] is not None: db_source.metadata['location'] = source['location'] self.logger.log('insert', 'source', source['name']) db_source.save() # add the source tag source_tag = EventSourceTag( thing=db_module, eventsource=db_source, annotation='home' ) self.logger.log( 'insert', 'sourcetag', db_module.name+' > '+db_source.title ) source_tag.save() return db_source def delete_source(self, db_source): """ Deletes a source from the database. Will also delete all child events that have this as their source. For logging clarity, reports the deletion of the source before deletion of the children. """ child_events = self.event_list.filter(source=db_source) self.logger.log('delete', 'source', db_source.title) for event in child_events: self.logger.log('delete', 'event', event.title) event.delete() db_source.delete() def delete_event(self, db_source, event): """ Deletes the event specified. The event is identified by it's uid (with the import- prefix) and its source. """ event_uid = event.get_internal_id() try: matching_event = self.event_list.get( uid=event_uid, source=db_source ) except Event.MultipleObjectsReturned: self.logger.log( 'failed', 'event', 'Multiple events found with uid {0}'.format(event_uid) ) return except Event.DoesNotExist: return self.logger.log('delete', 'event', matching_event.title) matching_event.delete() def add_or_update_event(self, db_source, event): """ Checks if an event exists identified (by uid with import- prefix and by source) and updates it or adds it as appropriate. Will only update if the new data differs from the current data. """ event_uid = event.get_internal_id() try: db_event = self.event_list.get(uid=event_uid, source=db_source) except Event.DoesNotExist: db_event = None except Event.MultipleObjectsReturned: self.logger.log( 'failed', 'event', 'Event uid {0} was not unique'.format(event_uid) ) return start_time = TIMEZONE.localize( datetime.datetime.combine(event['date'], event['start']) ) end_time = TIMEZONE.localize( datetime.datetime.combine(event['date'], event['end']) ) # check for existence if db_event is None: # add a new event db_event = Event( start=start_time, end=end_time, title=event['name'], location=event['location'], uid=event_uid, source=db_source, status=0 ) db_event.metadata['people'] = event['lecturer'] db_event.metadata['type'] = event['type'] self.logger.log('insert', 'event', event['name']) db_event.save() else: # check if an update is required if('people' not in db_event.metadata or 'type' not in db_event.metadata or db_event.start != start_time or db_event.end != end_time or db_event.title != event['name'] or db_event.location != event['location'] or db_event.uid != event_uid or db_event.source != db_source or db_event.status != 0 or db_event.metadata['people'] != event['lecturer'] or db_event.metadata['type'] != event['type'] ): # update it db_event.start = start_time db_event.end = end_time db_event.title = event['name'] db_event.location = event['location'] db_event.uid = event_uid db_event.source = db_source db_event.status = 0 db_event.metadata['people'] = event['lecturer'] db_event.metadata['type'] = event['type'] self.logger.log('update', 'event', event['name']) db_event.save() return db_event
class PostImportProcessor(object): """ Processes a POST request into ModuleData """ def __init__(self, logger=None): if logger is not None: self.logger = logger else: self.logger = APILogger() def process(self, post_data): """ Reads POST data. The POST data accepts the following variables: REQ tripos The identifier of the tripos REQ part The identifier of the part OPT subject The identifier of the subject (optional) REQ modulename The display name of the module (used as the identifier) REQ seriesid The external id of the series to import REQ seriesname The display name of the series OPT delete-series If present, the series will be deleted (if delete-series is present, none of the following are used) REQ uniqueid The external id of the event to modify OPT delete-event If present, the event will be deleted (if delete-event is present, none of the following are used) REQ name The display name of the event REQ date The date of the event, in %Y-%m-%d format (eg 2013-08-27) REQ start The start time of the event, in %H:%M:%S format (eg "11:00:00", "22:00:00") REQ end The end time of the event, in %H:%M:%S format REQ location The location of the event REQ type The type of the event (which should be one of: field trip, lecture, class, seminar, practical but this is not enforced) REQ lecturer A semi-colon separated list of people involved (eg "Mr. Smith;Prof. Bloggs;Fred Smith") Whitespace is removed from each end, so "Mr. Smith; Prof. Bloggs" is also acceptable """ try: module_tripos = post_data['tripos'] module_part = post_data['part'] if 'subject' in post_data: module_subject = post_data['subject'] else: module_subject = None module_name = post_data['modulename'] series_id = post_data['seriesid'] series_name = post_data['seriesname'] delete_series = 'delete-series' in post_data # follow the structure of the XML import object module = ModuleData(module_name) module['path'] = build_path_string(module_tripos, module_part, module_subject) module['seriesList'] = [SeriesData(series_id) ] # list of one series module['seriesList'][0]['name'] = series_name if delete_series: module['seriesList'][0]['delete'] = True else: event_id = post_data['uniqueid'] is_deleting_event = 'delete-event' in post_data event = EventData(event_id) if is_deleting_event: event['delete'] = True else: event['name'] = post_data['name'] event['date'] = datetime.datetime.strptime( post_data['date'], "%Y-%m-%d").date() event['start'] = datetime.datetime.strptime( post_data['start'], "%H:%M:%S").time() event['end'] = datetime.datetime.strptime( post_data['end'], "%H:%M:%S").time() event['location'] = post_data['location'] event['type'] = post_data['type'] event['lecturer'] = [ i.strip() for i in post_data['lecturer'].split(';') ] module['seriesList'][0]['events'] = [event] # list of one except KeyError as err: # some data was missing self.logger.log('failed', 'post', 'Key Error {0}'.format(err)) return except ValueError as err: # the time strings were not formatted correctly self.logger.log('failed', 'post', 'Value Error {0}'.format(err)) return return [module]
class XMLImportProcessor(object): """ Processes XML files into a list of ModuleData """ def __init__(self, logger=None): if logger is not None: self.logger = logger else: self.logger = APILogger() self.parser = XML_PARSER def process(self, data): """ Parses the xml in the data string and constructs an array of dicts representing the data. The XML file must match self.schema """ try: # defusedxml doesn't seem to use the parser, so # parse it first with defused to check for risks, # then parse it with lxml to check for schema # compliance XMLFromStringSecurity(data, self.parser) xml = XMLFromString(data, self.parser) except IOError as err: self.logger.log('failed', 'xml', 'Unable to read XML file: {0}'.format(err)) return except (etree.XMLSyntaxError, DefusedXmlException, ParseError) as err: self.logger.log('failed', 'xml', 'XML file was not valid: {0}'.format(err)) return modules = [] # modules for xml_module in xml.findall('module'): module = self.process_xml_module_node_to_dict(xml_module) if module is None: return modules.append(module) return modules def process_xml_module_node_to_dict(self, xml_module): """ Creates a ModuleData from a module node """ name = xml_module.find('name').text if name is None: name = "" module = ModuleData(name) xml_path_node = xml_module.find('path') tripos = xml_path_node.find('tripos').text part = xml_path_node.find('part').text if xml_path_node.find('subject') is not None: subject = xml_path_node.find('subject').text else: subject = None module['path'] = build_path_string(tripos, part, subject) series_list = [] # check for delete node if xml_module.find('delete') is not None: # mark this module for removal and skip to the next module['delete'] = True return module # series for xml_series in xml_module.findall('series'): series = self.process_xml_series_node_to_dict(xml_series) if series is None: # series wasn't created properly return series_list.append(series) module['seriesList'] = series_list return module def process_xml_series_node_to_dict(self, xml_series): """ Creates a SeriesData from a series node """ series = SeriesData(xml_series.find('uniqueid').text) series['name'] = xml_series.find('name').text # check for delete node if xml_series.find('delete') is not None: # mark this series for removal and skip to the next series['delete'] = True return series events = [] # attempts to give a general location/lecturer # group to the whole series # will only assign anything if everything is the same locations = set() lecturer_groups = set() # events for xml_event in xml_series.findall('event'): event = self.process_xml_event_node_to_dict(xml_event) if event is None: # event wasn't created properly return if not event.is_being_deleted(): locations.add(event['location']) lecturer_groups.add(';'.join(event['lecturer'])) events.append(event) # check to see if the events all agreed on lecturer or location if len(lecturer_groups) == 1: series['lecturer'] = lecturer_groups.pop().split(';') if len(locations) == 1: series['location'] = locations.pop() series['events'] = events return series def process_xml_event_node_to_dict(self, xml_event): """ Creates an EventData from an event node """ # check for delete node if xml_event.find('delete') is not None: # mark this event for removal and skip to the next event = EventData(xml_event.find('uniqueid').text, delete=True) return event # required info start = datetime.datetime.strptime( xml_event.find('start').text, "%H:%M:%S") date = datetime.datetime.strptime( xml_event.find('date').text, "%Y-%m-%d") event = EventData(xml_event.find('uniqueid').text, name=xml_event.find('name').text, date=date.date(), start=start.time(), type=xml_event.find('type').text) # check the event doesn't have id '-', as this is the # exporter's default id and won't be unique if event['externalid'] == XML_EXPORT_FAKE_ID: self.logger.log('failed', 'xml', 'Event had id {0}'.format(XML_EXPORT_FAKE_ID)) # event either has a duration or an end time if xml_event.find('duration') is None: event['end'] = datetime.datetime.strptime( xml_event.find('end').text, "%H:%M:%S").time() if (event['end'] < event['start']): self.logger.log('failed', 'xml', 'Event ends before it begins') return else: duration = datetime.datetime.strptime( xml_event.find('duration').text, "%H:%M:%S") duration_delta = datetime.timedelta(hours=duration.hour, minutes=duration.minute, seconds=duration.second) end = start + duration_delta if end.day != start.day: self.logger.log( 'failed', 'xml', 'Event \'' + event['name'] + '\' is overnight!') return event['end'] = end.time() # event can have a location if (xml_event.find('location') is not None and xml_event.find('location').text is not None): event['location'] = xml_event.find('location').text else: event['location'] = '' # event can have a set of lecturers event['lecturer'] = [ x.text for x in xml_event.findall('lecturer') if x.text is not None ] return event