示例#1
0
def handle_import(user, data, processor_class, importer_class, logger=None):
    """
    Puts data through the processor and then uses the specified
    importer to put the data into the database

    processor_class and importer_class should be references to
    their classes

    Returns the log
    """

    if logger == None:
        logger = APILogger()

    logger.log('notice', 'import',
               'Performing import as user ' + user.username)

    processor = processor_class(logger=logger)
    modules = processor.process(data)

    importer = importer_class(user, logger=logger)

    if modules is not None:
        for module in modules:
            importer.add_module_data(module)

    return logger
示例#2
0
class APIImporter(object):
    """
    Takes a ModuleData and processes it, updating the database
    as required. Requires a user to verify their permissions
    """

    def __init__(self, user, logger=None):
        self.user = user
        if logger is not None:
            self.logger = logger
        else:
            self.logger = APILogger()

        self.thing_list = Thing.objects.all()
        self.event_list = Event.objects.all()
        self.event_source_list = EventSourceTag.objects.all()

    def add_module_data(self, module):
        """
        Adds the data in module to the database. Performs
        a check to ensure that user has the appropriate permission.
        Will not create Things above the module level.
        """
        try:
            module.is_valid()
        except DataValidationException as err:
            self.logger.log(
                'failed',
                'module',
                'Module data not valid {0}'.format(err)
            )
            return

        path = module['path']

        # find the parent of the module
        try:
            parent_thing = self.thing_list.get(pathid=Thing.hash(path[:-1]))
        except Thing.DoesNotExist:
            self.logger.log(
                'failed',
                'module',
                'The path {0} does not exist'.format(path[:-1])
            )
            return

        # check for permission
        if not parent_thing.can_be_edited_by(self.user.username):
            self.logger.log(
                'denied',
                'module',
                'You do not have permission to modify {0}'.format(path[:-1])
            )
            return

        self.process_module_dict(module)


    def process_module_dict(self, module):
        """
        Processes a ModuleData dict and updates the database
        """

        path = module['path']
        try:
            db_module = self.thing_list.get(
                pathid=Thing.hash(path+module['shortname'])
            )
        except Thing.DoesNotExist:
            db_module = None

        is_deleting_module = module.is_being_deleted()

        if db_module is None:
            if is_deleting_module:
                # wanted to delete it, doesn't exist, nothing to do
                return
            db_module = self.create_module(
                path,
                module['name'],
                module['shortname']
            )
            if db_module is None:
                # something went wrong creating the module (no need to report
                # it as the logger should already contain the details)
                return

        # check if we want to delete it
        if is_deleting_module:
            self.delete_module(db_module)
            return

        # create a list of child sources
        module_sources = []
        matching_source_tags = self.event_source_list.filter(
            thing=db_module,
            annotation='home'
        )

        for tag in matching_source_tags:
            # check it was imported via the api
            if 'importid' in tag.eventsource.metadata:
                module_sources.append(
                    (tag.eventsource, tag.eventsource.metadata['importid'])
                )

        for source in module['seriesList']:
            db_source = self.process_source_dict(
                module_sources, db_module, source
            )
            if db_source is not None:
                module_sources.append(
                    (db_source, db_source.metadata['importid'])
                )



    def process_source_dict(self, module_sources, db_module, source):
        """
        Processes a SeriesData dict and updates the database
        """

        is_deleting_source = source.is_being_deleted()
        db_source = None

        # check if the source is already in the data
        for existing_source in module_sources:
            if existing_source[1] == source['externalid']:
                db_source = existing_source[0]
                break

        # update/add/delete the source
        if db_source is not None:
            if is_deleting_source:
                self.delete_source(db_source)
                return
            else:
                self.update_source(db_source, source)
        else:
            # doesn't exist
            if is_deleting_source:
                return
            else:
                db_source = self.add_source(db_module, source)

        for event in source['events']:
            self.process_event_dict(db_source, event)

        return db_source


    def process_event_dict(self, db_source, event):
        """
        Processes a EventData dict and updates the database
        """
        if event.is_being_deleted():
            # delete it if it exists
            self.delete_event(
                db_source,
                event
            )
        else:
            self.add_or_update_event(
                db_source,
                event
            )

    def create_module(self, path, name, shortname):
        """
        Creates a module in the database. Will not create Things above
        module level, and will fail if the path is not unique.
        """

        # find the parent (remove end slash)
        try:
            parent = self.thing_list.get(pathid=Thing.hash(path[:-1]))
        except Thing.DoesNotExist:
            self.logger.log(
                'failed',
                'module',
                'Could not find path {0}'.format(path[:-1])
            )
            return
        except Thing.MultipleObjectsReturned:
            self.logger.log(
                'failed',
                'module',
                'Path {0} was not unique'.format(path[:-1])
            )
            return

        db_module = Thing(
            fullname=name,
            type='module',
            parent=parent,
            name=shortname
        )

        self.logger.log('insert', 'module', name)
        db_module.save()

        return db_module


    def delete_module(self, db_module):
        """
        Deletes a module from the database. Also deletes all event
        sources attached to the module.
        """

        self.logger.log('delete', 'module', db_module.fullname)

        # clear the module of all of its sources (and their events)
        matching_source_tags = self.event_source_list.filter(
            thing=db_module
        ).prefetch_related('eventsource')

        for tag in matching_source_tags:
            db_source = tag.eventsource
            self.delete_source(db_source)

        db_module.delete()


    def update_source(self, db_source, source):
        """
        Verifies that source and db_source differ, and updates
        db_source if they do.
        """

        has_name_data = 'name' in source
        has_lecturer_data = ('lecturer' in source and
                            source['lecturer'] is not None)
        has_location_data = ('location' in source and
                            source['location'] is not None)

        has_db_people = 'people' in db_source.metadata
        has_db_location = 'location' in db_source.metadata

        if has_lecturer_data and has_db_people:
            lecturer_changed = (
                db_source.metadata['people'] != source['lecturer']
            )
        else:
            lecturer_changed = has_lecturer_data and not has_db_people

        if has_location_data and has_db_location:
            location_changed = (
                db_source.metadata['location'] != source['location']
            )
        else:
            location_changed = has_location_data and not has_db_location

        name_changed = has_name_data and (db_source.title != source['name'])

        if name_changed or lecturer_changed or location_changed:
            db_source.title = source['name']

            if has_lecturer_data:
                db_source.metadata['people'] = source['lecturer']
            if has_location_data:
                db_source.metadata['location'] = source['location']

            db_source.save()

            self.logger.log('update', 'source', source['name'])

        return db_source


    def add_source(self, db_module, source):
        """
        Adds an event source to the database. Also adds the
        source tag to connect it to db_module, using
        annotation 'home'
        """

        db_source = EventSource(
            title=source['name'],
            sourcetype='importapi'
        )

        db_source.metadata['importid'] = source['externalid']

        if 'lecturer' in source and source['lecturer'] is not None:
            db_source.metadata['people'] = source['lecturer']

        if 'location' in source and source['location'] is not None:
            db_source.metadata['location'] = source['location']

        self.logger.log('insert', 'source', source['name'])
        db_source.save()

        # add the source tag
        source_tag = EventSourceTag(
            thing=db_module,
            eventsource=db_source,
            annotation='home'
        )
        self.logger.log(
            'insert',
            'sourcetag',
            db_module.name+' > '+db_source.title
        )
        source_tag.save()

        return db_source


    def delete_source(self, db_source):
        """
        Deletes a source from the database. Will also delete
        all child events that have this as their source. For
        logging clarity, reports the deletion of the source
        before deletion of the children.
        """

        child_events = self.event_list.filter(source=db_source)

        self.logger.log('delete', 'source', db_source.title)
        for event in child_events:
            self.logger.log('delete', 'event', event.title)
            event.delete()

        db_source.delete()


    def delete_event(self, db_source, event):
        """
        Deletes the event specified. The event is identified by
        it's uid (with the import- prefix) and its source.
        """

        event_uid = event.get_internal_id()

        try:
            matching_event = self.event_list.get(
                uid=event_uid,
                source=db_source
            )
        except Event.MultipleObjectsReturned:
            self.logger.log(
                'failed',
                'event',
                'Multiple events found with uid {0}'.format(event_uid)
            )
            return
        except Event.DoesNotExist:
            return

        self.logger.log('delete', 'event', matching_event.title)
        matching_event.delete()


    def add_or_update_event(self, db_source, event):
        """
        Checks if an event exists identified (by uid with import-
        prefix and by source) and updates it or adds it as
        appropriate. Will only update if the new data differs from
        the current data.
        """

        event_uid = event.get_internal_id()

        try:
            db_event = self.event_list.get(uid=event_uid, source=db_source)
        except Event.DoesNotExist:
            db_event = None
        except Event.MultipleObjectsReturned:
            self.logger.log(
                'failed',
                'event',
                'Event uid {0} was not unique'.format(event_uid)
            )
            return

        start_time = TIMEZONE.localize(
            datetime.datetime.combine(event['date'], event['start'])
        )
        end_time = TIMEZONE.localize(
            datetime.datetime.combine(event['date'], event['end'])
        )

        # check for existence
        if db_event is None:
            # add a new event
            db_event = Event(
                start=start_time,
                end=end_time,
                title=event['name'],
                location=event['location'],
                uid=event_uid,
                source=db_source,
                status=0
            )
            db_event.metadata['people'] = event['lecturer']
            db_event.metadata['type'] = event['type']
            self.logger.log('insert', 'event', event['name'])
            db_event.save()
        else:
            # check if an update is required
            if('people' not in db_event.metadata or
                'type' not in db_event.metadata or
                db_event.start != start_time or
                db_event.end != end_time or
                db_event.title != event['name'] or
                db_event.location != event['location'] or
                db_event.uid != event_uid or
                db_event.source != db_source or
                db_event.status != 0 or
                db_event.metadata['people'] != event['lecturer'] or
                db_event.metadata['type'] != event['type']
               ):
                # update it
                db_event.start = start_time
                db_event.end = end_time
                db_event.title = event['name']
                db_event.location = event['location']
                db_event.uid = event_uid
                db_event.source = db_source
                db_event.status = 0
                db_event.metadata['people'] = event['lecturer']
                db_event.metadata['type'] = event['type']
                self.logger.log('update', 'event', event['name'])
                db_event.save()

        return db_event
class PostImportProcessor(object):
    """
    Processes a POST request into ModuleData
    """
    def __init__(self, logger=None):
        if logger is not None:
            self.logger = logger
        else:
            self.logger = APILogger()

    def process(self, post_data):
        """
        Reads POST data.

        The POST data accepts the following variables:
        REQ     tripos           The identifier of the tripos
        REQ     part             The identifier of the part
        OPT     subject          The identifier of the subject (optional)
        REQ     modulename       The display name of the module (used as the
                                    identifier)
        REQ     seriesid         The external id of the series to import
        REQ     seriesname       The display name of the series
        OPT     delete-series    If present, the series will be deleted

        (if delete-series is present, none of the following are used)
        REQ     uniqueid         The external id of the event to modify
        OPT     delete-event     If present, the event will be deleted

        (if delete-event is present, none of the following are used)
        REQ     name             The display name of the event
        REQ     date             The date of the event, in %Y-%m-%d format
                                    (eg 2013-08-27)
        REQ     start            The start time of the event, in %H:%M:%S
                                    format (eg "11:00:00", "22:00:00")
        REQ     end              The end time of the event, in %H:%M:%S format
        REQ     location         The location of the event
        REQ     type             The type of the event (which should be one of:
                                    field trip, lecture, class, seminar,
                                    practical
                                    but this is not enforced)
        REQ     lecturer         A semi-colon separated list of people involved
                                    (eg "Mr. Smith;Prof. Bloggs;Fred Smith")
                                    Whitespace is removed from each end, so
                                    "Mr. Smith; Prof. Bloggs" is also acceptable
        """

        try:
            module_tripos = post_data['tripos']
            module_part = post_data['part']
            if 'subject' in post_data:
                module_subject = post_data['subject']
            else:
                module_subject = None
            module_name = post_data['modulename']

            series_id = post_data['seriesid']
            series_name = post_data['seriesname']
            delete_series = 'delete-series' in post_data

            # follow the structure of the XML import object
            module = ModuleData(module_name)
            module['path'] = build_path_string(module_tripos, module_part,
                                               module_subject)
            module['seriesList'] = [SeriesData(series_id)
                                    ]  # list of one series
            module['seriesList'][0]['name'] = series_name

            if delete_series:
                module['seriesList'][0]['delete'] = True
            else:
                event_id = post_data['uniqueid']

                is_deleting_event = 'delete-event' in post_data

                event = EventData(event_id)

                if is_deleting_event:
                    event['delete'] = True
                else:
                    event['name'] = post_data['name']
                    event['date'] = datetime.datetime.strptime(
                        post_data['date'], "%Y-%m-%d").date()
                    event['start'] = datetime.datetime.strptime(
                        post_data['start'], "%H:%M:%S").time()
                    event['end'] = datetime.datetime.strptime(
                        post_data['end'], "%H:%M:%S").time()
                    event['location'] = post_data['location']
                    event['type'] = post_data['type']
                    event['lecturer'] = [
                        i.strip() for i in post_data['lecturer'].split(';')
                    ]

                module['seriesList'][0]['events'] = [event]  # list of one

        except KeyError as err:
            # some data was missing
            self.logger.log('failed', 'post', 'Key Error {0}'.format(err))
            return
        except ValueError as err:
            # the time strings were not formatted correctly
            self.logger.log('failed', 'post', 'Value Error {0}'.format(err))
            return

        return [module]
class XMLImportProcessor(object):
    """
    Processes XML files into a list of ModuleData
    """
    def __init__(self, logger=None):
        if logger is not None:
            self.logger = logger
        else:
            self.logger = APILogger()

        self.parser = XML_PARSER

    def process(self, data):
        """
        Parses the xml in the data string and constructs
        an array of dicts representing the data. The XML
        file must match self.schema
        """

        try:
            # defusedxml doesn't seem to use the parser, so
            # parse it first with defused to check for risks,
            # then parse it with lxml to check for schema
            # compliance
            XMLFromStringSecurity(data, self.parser)
            xml = XMLFromString(data, self.parser)
        except IOError as err:
            self.logger.log('failed', 'xml',
                            'Unable to read XML file: {0}'.format(err))
            return
        except (etree.XMLSyntaxError, DefusedXmlException, ParseError) as err:
            self.logger.log('failed', 'xml',
                            'XML file was not valid: {0}'.format(err))
            return

        modules = []

        # modules
        for xml_module in xml.findall('module'):
            module = self.process_xml_module_node_to_dict(xml_module)
            if module is None:
                return
            modules.append(module)

        return modules

    def process_xml_module_node_to_dict(self, xml_module):
        """
        Creates a ModuleData from a module node
        """
        name = xml_module.find('name').text
        if name is None:
            name = ""

        module = ModuleData(name)

        xml_path_node = xml_module.find('path')
        tripos = xml_path_node.find('tripos').text
        part = xml_path_node.find('part').text
        if xml_path_node.find('subject') is not None:
            subject = xml_path_node.find('subject').text
        else:
            subject = None

        module['path'] = build_path_string(tripos, part, subject)

        series_list = []

        # check for delete node
        if xml_module.find('delete') is not None:
            # mark this module for removal and skip to the next
            module['delete'] = True
            return module

        # series
        for xml_series in xml_module.findall('series'):
            series = self.process_xml_series_node_to_dict(xml_series)
            if series is None:
                # series wasn't created properly
                return
            series_list.append(series)

        module['seriesList'] = series_list

        return module

    def process_xml_series_node_to_dict(self, xml_series):
        """
        Creates a SeriesData from a series node
        """
        series = SeriesData(xml_series.find('uniqueid').text)
        series['name'] = xml_series.find('name').text

        # check for delete node
        if xml_series.find('delete') is not None:
            # mark this series for removal and skip to the next
            series['delete'] = True
            return series

        events = []

        # attempts to give a general location/lecturer
        # group to the whole series
        # will only assign anything if everything is the same
        locations = set()
        lecturer_groups = set()

        # events
        for xml_event in xml_series.findall('event'):
            event = self.process_xml_event_node_to_dict(xml_event)
            if event is None:
                # event wasn't created properly
                return
            if not event.is_being_deleted():
                locations.add(event['location'])
                lecturer_groups.add(';'.join(event['lecturer']))

            events.append(event)

        # check to see if the events all agreed on lecturer or location
        if len(lecturer_groups) == 1:
            series['lecturer'] = lecturer_groups.pop().split(';')
        if len(locations) == 1:
            series['location'] = locations.pop()

        series['events'] = events

        return series

    def process_xml_event_node_to_dict(self, xml_event):
        """
        Creates an EventData from an event node
        """
        # check for delete node
        if xml_event.find('delete') is not None:
            # mark this event for removal and skip to the next
            event = EventData(xml_event.find('uniqueid').text, delete=True)

            return event

        # required info
        start = datetime.datetime.strptime(
            xml_event.find('start').text, "%H:%M:%S")

        date = datetime.datetime.strptime(
            xml_event.find('date').text, "%Y-%m-%d")

        event = EventData(xml_event.find('uniqueid').text,
                          name=xml_event.find('name').text,
                          date=date.date(),
                          start=start.time(),
                          type=xml_event.find('type').text)

        # check the event doesn't have id '-', as this is the
        # exporter's default id and won't be unique
        if event['externalid'] == XML_EXPORT_FAKE_ID:
            self.logger.log('failed', 'xml',
                            'Event had id {0}'.format(XML_EXPORT_FAKE_ID))

        # event either has a duration or an end time
        if xml_event.find('duration') is None:
            event['end'] = datetime.datetime.strptime(
                xml_event.find('end').text, "%H:%M:%S").time()

            if (event['end'] < event['start']):
                self.logger.log('failed', 'xml', 'Event ends before it begins')
                return
        else:
            duration = datetime.datetime.strptime(
                xml_event.find('duration').text, "%H:%M:%S")
            duration_delta = datetime.timedelta(hours=duration.hour,
                                                minutes=duration.minute,
                                                seconds=duration.second)
            end = start + duration_delta
            if end.day != start.day:
                self.logger.log(
                    'failed', 'xml',
                    'Event \'' + event['name'] + '\' is overnight!')
                return

            event['end'] = end.time()

        # event can have a location
        if (xml_event.find('location') is not None
                and xml_event.find('location').text is not None):
            event['location'] = xml_event.find('location').text
        else:
            event['location'] = ''

        # event can have a set of lecturers
        event['lecturer'] = [
            x.text for x in xml_event.findall('lecturer') if x.text is not None
        ]

        return event