Exemplo n.º 1
0
    def test07_process_source(self):
        # connection and readable resourcesync, write sitemap to file
        try:
            shutil.rmtree("rs/destination/d6/sitemaps")
        except:
            pass
        Config.__set_config_filename__("test-files/config.txt")
        Config().__drop__()
        DestinationMap.__set_map_filename__("test-files/desmap.txt")
        DestinationMap().__drop__()
        des.reporter.reset_instance()
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__set_destination__(
            "http://localhost:8000/rs/source/s6", "rs/destination/d6")
        des.processor.processor_listeners.append(SitemapWriter())
        base_uri = "http://localhost:8000/rs/source/s6/"

        sdproc = Sodesproc(base_uri)
        sdproc.read_source()
        self.assertEqual(200, sdproc.source_status)
        self.assertEqual(Status.document, sdproc.status)
        self.assertTrue(
            os.path.isfile(
                "rs/destination/d6/sitemaps/.well-known/resourcesync"))
Exemplo n.º 2
0
    def test_02_change(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1")

        __clear_destination__("d1")
        __clear_sources_xml__("s1")
        __create_resourcelist__("s1")

        logger.debug("\n=========== create ==============\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()
        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)

        __change_resource__("s1", "resource1.txt")
        __create_changelist__("s1")

        logger.debug("\n=========== change ==============\n")
        chanlisync = Chanlisync("http://localhost:8000/rs/source/s1/changelist.xml")
        chanlisync.process_source()

        self.assertEqual(0, len(chanlisync.exceptions))
        self.assertEqual(Status.processed, chanlisync.status)
        reporter = des.reporter.instance()
        self.assertEqual(4, len(reporter.sync_status))
        #self.assertEqual(1, reporter.sync_status[3].same)
        self.assertIsNone(reporter.sync_status[3].same)
        self.assertEqual(0, reporter.sync_status[3].created)
        self.assertEqual(1, reporter.sync_status[3].updated)
        self.assertEqual(0, reporter.sync_status[3].deleted)
        self.assertEqual(0, reporter.sync_status[3].to_delete)
        self.assertIsNone(reporter.sync_status[3].exception)

        reporter.sync_status_to_file("logs/incremental-change.csv")
Exemplo n.º 3
0
    def test01_no_destination_no_connection(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        DestinationMap().__remove_destination__("http://bla.com")
        des.reporter.reset_instance()

        logger.debug("\n============ no destination =============\n")
        # returns at no destination
        relisync = Relisync("http://bla.com")
        self.assertEqual(Status.init, relisync.status)
        relisync.process_source()
        self.assertEqual(1, len(relisync.exceptions))
        self.assertEqual("No destination for http://bla.com", relisync.exceptions[0])
        self.assertEqual(Status.processed_with_exceptions, relisync.status)

        # cannot get connection and ends up in caught exception
        DestinationMap().__set_destination__("http://bla.com", "destination_x")
        logger.debug("\n============destination, no connection =============\n")
        relisync = Relisync("http://bla.com")
        relisync.process_source()
        self.assertEqual(1, len(relisync.exceptions))
        self.assertEqual(Status.processed_with_exceptions, relisync.status)
        reporter = des.reporter.instance()
        self.assertEqual(2, len(reporter.sync_status))
        self.assertIsNotNone(reporter.sync_status[0].exception)

        # using net location 'bla.com' as destination, still no connection
        Config().__set_prop__(Config.key_use_netloc, "True")
        DestinationMap().__remove_destination__("http://bla.com")
        logger.debug("\n=========== using netloc, still no connection ==============\n")
        relisync = Relisync("http://bla.com")
        relisync.process_source()
        self.assertEqual(1, len(relisync.exceptions))
        self.assertEqual(Status.processed_with_exceptions, relisync.status)
        self.assertEqual(3, len(reporter.sync_status))
        self.assertIsNotNone(reporter.sync_status[1].exception)
Exemplo n.º 4
0
    def test04_process_baseline_netloc(self):
        Config().__set_prop__(Config.key_use_netloc, "True")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__remove_destination__("http://localhost:8000/rs/source/s1")

        __clear_sources_xml__("s1")
        __create_resourcelist__("s1")
        if os.path.isdir("localhost:8000"):
            logger.debug("Expecting only audit")
            expected_sync_status_count = 1
        else:
            logger.debug("Expecting update")
            expected_sync_status_count = 2

        logger.debug("\n=========================\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()

        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)
        reporter = des.reporter.instance()
        # depends on whether test is run individually or in group
        #self.assertEqual(expected_sync_status_count, len(reporter.sync_status))

        reporter.sync_status_to_file("logs/baseline-netloc.csv")
Exemplo n.º 5
0
    def test_03_change_delete(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__set_destination__("http://localhost:8000/rs/source/s2", "rs/destination/d2")

        __clear_destination__("d2")
        __clear_sources_xml__("s2")
        __add_resource__("s2", "added.txt")
        __create_resourcelist__("s2")

        logger.debug("\n=========== create ==============\n")
        relisync = Relisync("http://localhost:8000/rs/source/s2/resourcelist.xml")
        relisync.process_source()
        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)

        __change_resource__("s2", "resource2.txt")
        __delete_resource__("s2", "added.txt")
        __create_changelist__("s2")

        des.reporter.reset_instance()
        #time.sleep(5)
        logger.debug("\n=========== update + delete ==============\n")
        chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml")
        chanlisync.process_source()

        self.assertEqual(0, len(chanlisync.exceptions))
        self.assertEqual(Status.processed, chanlisync.status)

        reporter = des.reporter.instance()
        reporter.sync_status_to_file("logs/incremental-change-delete.csv")
        self.assertEqual(2, len(reporter.sync_status))
        self.assertIsNone(reporter.sync_status[1].same)
        self.assertEqual(0, reporter.sync_status[1].created)
        self.assertEqual(1, reporter.sync_status[1].updated)
        self.assertEqual(1, reporter.sync_status[1].deleted)
        self.assertEqual(1, reporter.sync_status[1].to_delete)
        self.assertIsNone(reporter.sync_status[1].exception)

        des.reporter.reset_instance()
        logger.debug("\n=========== no change ==============\n")
        chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml")
        chanlisync.process_source()

        self.assertEqual(0, len(chanlisync.exceptions))
        self.assertEqual(Status.processed, chanlisync.status)

        reporter = des.reporter.instance()
        self.assertEqual(1, len(reporter.sync_status))
        self.assertIsNone(reporter.sync_status[0].same)
        self.assertEqual(0, reporter.sync_status[0].created)
        self.assertEqual(0, reporter.sync_status[0].updated)
        self.assertEqual(0, reporter.sync_status[0].deleted)
        self.assertEqual(0, reporter.sync_status[0].to_delete)
        self.assertIsNone(reporter.sync_status[0].exception)
Exemplo n.º 6
0
    def test04__drop__(self):
        Config.__set_config_filename__("test-files/config.txt")
        config1 = Config()

        self.assertIsNotNone(config1)
        self.assertIsNone(config1.prop("this_is"))
        config1.__drop__()
        self.assertIsNone(Config.__instance__)

        Config.__set_config_filename__("test-files/alt-config.txt")
        config2 = Config()
        self.assertIsNotNone(config2)
        self.assertNotEqual(config1, config2)
        self.assertEqual("a_test", config2.prop("this_is"))
        config2.__drop__()
Exemplo n.º 7
0
    def __init__(self, config_filename="conf/config.txt"):
        '''
        Create a Runner using the configuration file denoted by config_filename.
        :param config_filename:
        :return: None
        '''
        try:
            Config.__set_config_filename__(config_filename)
            config = Config()

        except FileNotFoundError as err:
            print(err)
            raise err

        logging_configuration_file = config.prop(Config.key_logging_configuration_file, "conf/logging.conf")
        # logging.config.fileConfig raises "KeyError: 'formatters'" if the configuration file does not exist.
        # A FileNotFoundError in this case is less confusing.
        if not os.path.isfile(logging_configuration_file):
            # It seems there is no default logging configuration to the console in Python?
            # In that case we'll call it a day.
            raise FileNotFoundError("Logging configuration file not found: " + logging_configuration_file)

        logging.config.fileConfig(logging_configuration_file)
        self.logger = logging.getLogger(__name__)

        self.pid = os.getpid()
        self.sources = None
        self.exceptions = []

        self.logger.info("Started %s with pid %d" % (__file__, self.pid))
        self.logger.info("Configured %s from '%s'" % (self.__class__.__name__, config_filename))
        self.logger.info("Configured logging from '%s'" % logging_configuration_file)
        self.__inject_dependencies__(config)
Exemplo n.º 8
0
    def test_inject_dependencies(self):
        Config.__set_config_filename__("test-files/config.txt")
        Config().__set_prop__(
            Config.key_des_processor_listeners,
            "des.processor_listener.SitemapWriter, des.processor.ProcessorListener"
        )

        runner = DesRunner()
        self.assertEqual(2, len(des.processor.processor_listeners))
Exemplo n.º 9
0
    def test04_list_prop(self):
        Config.__set_config_filename__("test-files/config.txt")
        config = Config()

        list = config.list_prop("test_list")
        self.assertEqual(3, len(list))
        self.assertEqual("foo.bar", list[0])
        self.assertEqual("bar.foo", list[1])
        self.assertEqual("foo.bar.baz", list[2])
Exemplo n.º 10
0
    def run(self, sources, task="discover", once=False):
        """
        Run the DesRunner. A running application can be stopped by creating a file named 'stop' in the directory
        the runner was started from.
        Source urls are read from the file given in param 'sources'. This file is read each time a full round
        of synchronizing has taken place, so source urls can be extended or changed without restarting the application.

        Sources are mapped to the destinations given in the file denoted by the configuration parameter
        "location_mapper_destination_file".

        :param sources: the file containing source urls
        :param task: the task to run.
                        - If source urls can all be discovered by reading the .well-known/resourcesync
                            on each source, use 'wellknown'.
                        - If all source urls point to capability lists, use 'capability'.
                        - If source urls are heterogeneous, use 'discover'.
        :param once: True for exploring source urls once and than exit, False otherwise
        :return:
        """
        condition = True
        while condition:
            # list of urls
            self.logger.info("Reading source urls from '%s'" % sources)
            self.__read_sources_doc__(sources)
            # reset url --> destination map. New mappings may be configured
            DestinationMap.__set_map_filename__(Config().
                                                prop(Config.key_location_mapper_destination_file, "conf/desmap.txt"))
            # drop to force fresh read from file
            DestinationMap().__drop__()
            # Set the root of the destination folder if configured
            DestinationMap().set_root_folder(Config().prop(Config.key_destination_root))
            # do all the urls
            self.__do_task__(task)
            # report
            self.__do_report__(task)
            # to continue or not to continue
            condition = not (once or self.__stop__())
            if condition:
                pause = Config().int_prop(Config.key_sync_pause)
                self.logger.info("Going to sleep for %d seconds." % pause)
                self.logger.debug("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz")
                time.sleep(pause)
                # repeat after sleep
                condition = not (once or self.__stop__())
Exemplo n.º 11
0
    def test03_process_baseline(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1")

        __clear_destination__("d1")
        __clear_sources_xml__("s1")
        __create_resourcelist__("s1")
        des.reporter.reset_instance()

        logger.debug("\n=========== create ==============\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()

        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)
        reporter = des.reporter.instance()
        # sync_status count: 1 for audit, 1 for create. expected 2
        # print(reporter.sync_status_to_string())
        self.assertEqual(2, len(reporter.sync_status))
        self.assertEqual(0, reporter.sync_status[0].same)
        self.assertEqual(3, reporter.sync_status[0].created)
        self.assertEqual(0, reporter.sync_status[0].updated)
        self.assertEqual(0, reporter.sync_status[0].deleted)
        self.assertEqual(0, reporter.sync_status[0].to_delete)
        self.assertIsNone(reporter.sync_status[0].exception)
        #reporter.sync_status_to_file("logs/baseline.csv")

        logger.debug("\n============ update =============\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()

        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)
        reporter = des.reporter.instance()
        # sync_status count: 1 for audit, 1 for create (both from previous run), 1 for audit, no update. expected 3
        self.assertEqual(3, len(reporter.sync_status))
        self.assertEqual(3, reporter.sync_status[2].same)
        self.assertEqual(0, reporter.sync_status[2].created)
        self.assertEqual(0, reporter.sync_status[2].updated)
        self.assertEqual(0, reporter.sync_status[2].deleted)
        self.assertEqual(0, reporter.sync_status[2].to_delete)
        self.assertIsNone(reporter.sync_status[2].exception)
Exemplo n.º 12
0
    def test08_try_robots_with_netloc(self):
        DestinationMap().__remove_destination__(
            "http://localhost:8000/rs/source/discover/")
        Config().__set_prop__(Config.key_use_netloc, "True")
        uri = "http://localhost:8000/rs/source/discover/loc2"
        discoverer = Discoverer(uri)

        processor = discoverer.get_processor()
        self.assertIsInstance(processor, proc.Reliproc)
        processor.read_source()
Exemplo n.º 13
0
    def test02_process_audit(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "True")
        DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1")

        __clear_destination__("d1")
        __clear_sources_xml__("s1")
        __create_resourcelist__("s1")
        des.reporter.reset_instance()

        logger.debug("\n=========================\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()

        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)
        reporter = des.reporter.instance()
        self.assertEqual(1, len(reporter.sync_status))

        reporter.sync_status_to_file("logs/audit.csv")
Exemplo n.º 14
0
 def set_config(self, config_file, section):
     """
     Establecer el archivo de configuración y la sección.
     Set config file and section.
     :param config_file: nombre del archivo de config. | file name of the config file
     :param section: la sección dentro del archivo | the section within the config file
     """
     self.config_file = config_file
     self.section = section
     # se crea una instancia del administrador de la configuración
     # instantiate config manager
     self.config = Config(self.config_file, self.section)
Exemplo n.º 15
0
    def test02_new(self):
        Config.__set_config_filename__("test-files/config.txt")
        config = Config()
        self.assertEqual("test-files/config.txt", config._config_filename)
        self.assertEqual("test-files/config.txt",
                         Config.__get_config_filename__())

        self.assertEqual("logging.conf",
                         config.prop(Config.key_logging_configuration_file))
        self.assertEqual(
            "test-files/desmap.txt",
            config.prop(Config.key_location_mapper_destination_file))
Exemplo n.º 16
0
Arquivo: sync.py Projeto: EHRI/resydes
    def process_source(self):
        config = Config()
        netloc = config.boolean_prop(Config.key_use_netloc, False)
        base_uri, destination = DestinationMap().find_destination(self.uri, netloc=netloc, infix="resources")
        if destination is None:
            self.logger.debug("No destination for %s" % self.uri)
            self.exceptions.append("No destination for %s" % self.uri)
            des.reporter.instance().log_status(self.uri,
                exception="No destination specified and use of net location prohibited.")
        else:
            self.__synchronize__(destination)

        self.status = Status.processed_with_exceptions if self.has_exceptions() else Status.processed
Exemplo n.º 17
0
 def event_sitemap_received(self, uri, capability, text):
     config = Config()
     netloc = config.boolean_prop(Config.key_use_netloc, False)
     baser_uri, local_path = DestinationMap().find_local_path(
         uri, netloc=netloc, infix=SITEMAP_FOLDER)
     if local_path is not None:
         os.makedirs(os.path.dirname(local_path), exist_ok=True)
         with open(local_path, "w") as file:
             file.write(text)
         self.logger.debug("Saved %s '%s'" % (capability, local_path))
     else:
         self.logger.warn("Could not save %s. No local path for %s" %
                          (capability, uri))
Exemplo n.º 18
0
 def sync_status_to_file(self, filename=None):
     if filename is None:
         filename = Config().prop(Config.key_sync_status_report_file,
                                  "sync-status.csv")
     with open(filename, 'w') as file:
         file.write(
             "%s\n" %
             "date,uri,in_sync,incremental,audit,same,created,updated,deleted,to_delete,exception,origin"
         )
         for item in self.sync_status:
             file.write("%s\n" % item)
         file.close()
     self.logger.info("Wrote %d source statuses to audit file %s" %
                      (len(self.sync_status), filename))
Exemplo n.º 19
0
    def test03_boolean_prop(self):
        Config.__set_config_filename__("test-files/config.txt")
        config = Config()

        self.assertFalse(config.boolean_prop(Config.key_use_netloc))

        config.__set_prop__(Config.key_use_netloc, str(True))
        self.assertTrue(config.boolean_prop(Config.key_use_netloc))

        config.__set_prop__(Config.key_use_netloc, str(False))
        self.assertFalse(config.boolean_prop(Config.key_use_netloc))

        self.assertTrue(config.boolean_prop("no_key", True))
        self.assertFalse(config.boolean_prop("no_key", False))
Exemplo n.º 20
0
def setUpModule():
    global server
    server_address = ('', 8000)
    handler_class = SimpleHTTPRequestHandler
    server = HTTPServer(server_address, handler_class)
    t = threading.Thread(target=server.serve_forever)
    t.daemon = True
    logger.debug("Starting server at http://localhost:8000/")
    t.start()
    proc.processor_listeners.append(SitemapWriter())
    Config.__set_config_filename__("test-files/config.txt")
    Config().__drop__()
    DestinationMap.__set_map_filename__("test-files/desmap.txt")
    DestinationMap().__drop__()
    DestinationMap().__set_destination__(
        "http://localhost:8000/rs/source/discover/", "rs/destination/discover")
Exemplo n.º 21
0
Arquivo: sync.py Projeto: EHRI/resydes
    def __synchronize__(self, destination):
        config = Config()
        checksum = config.boolean_prop(Config.key_use_checksum, True)
        audit_only = config.boolean_prop(Config.key_audit_only, True)
        allow_deletion = not audit_only

        desclient = des.desclient.instance()
        try:
            desclient.set_mappings((self.uri, destination))
            self.do_synchronize(desclient, allow_deletion, audit_only)
        except ClientFatalError as err:
            self.logger.warn("EXCEPTION while syncing %s" % self.uri, exc_info=True)
            desclient.log_status(exception=err)
            self.exceptions.append(err)
        finally:
            # A side effect (or a bug ;) is messing around with the
            # class-level property Client.checksum. Make sure it is always set to initial value before the next
            # source is processed.
            desclient.checksum = checksum
Exemplo n.º 22
0
def instance():
    """
    resync.Client is a somewhat heavy class. Desclient inherits and is adapted to be used during one run of
    resyncing several sources. For convenience: grab the one instance from here.
    :return: an instance of Desclient
    """
    global _instance
    logger = logging.getLogger(__name__)
    if _instance is None:
        config = Config()

        # Parameters in the constructor of resync Client
        checksum = config.boolean_prop(Config.key_use_checksum, True)
        verbose = False

        # Parameters in the method client.baseline_or_audit
        audit_only = config.boolean_prop(Config.key_audit_only, True)
        dryrun = audit_only

        _instance = DesClient(checksum, verbose, dryrun)
        logger.debug("Created a new %s [checksum=%s, verbose=%s, dryrun=%s]"
                         % ( _instance.__class__.__name__ , checksum, verbose, dryrun))

    return _instance
Exemplo n.º 23
0
 def setUp(self):
     Config.__set_config_filename__("test-files/config.txt")
     Config().__drop__()
Exemplo n.º 24
0
    def test01_new(self):
        Config.__set_config_filename__("test-files/no-config.txt")

        with self.assertRaises(FileNotFoundError):
            Config()
Exemplo n.º 25
0
    def base_line(self, unzipdir):
        """
        Synchronize the unzipped contents of a resource dump with the local resources
        :param unzipdir: the directory of the unzipped packed contents.
        :return:
        """
        manifest_file_name = os.path.join(unzipdir, "manifest.xml")
        try:
            sitemap = Sitemap()
            manifest_doc = sitemap.parse_xml(fh=manifest_file_name)
            # the manifest_doc is a resync.resource_container.ResourceContainer
            capability = manifest_doc.capability
            assert capability == CAPA_RESOURCEDUMP_MANIFEST, "Capability is not %s but %s" % (
                CAPA_RESOURCEDUMP_MANIFEST, capability)
            self.status = Status.parsed
            self.__inform_sitemap_received__(capability, manifest_file_name)

            config = Config()
            netloc = config.boolean_prop(Config.key_use_netloc, False)
            base_uri, destination = DestinationMap().find_destination(
                self.pack_uri, netloc=netloc)
            assert destination is not None, "Found no destination folder in DestinationMap"
            mapper = Mapper((base_uri, destination))
            rlb = ResourceListBuilder(mapper=mapper)
            dst_resource_list = rlb.from_disk()
            # Compares on uri
            same, updated, deleted, created = dst_resource_list.compare(
                manifest_doc)

            raise NotImplementedError("This class is not fully implemented.")

            print(len(same), len(updated), len(deleted), len(created))

            print("same")
            for resource in same:
                print(resource)
            print("updated")
            for resource in updated:
                print(resource)
            print("deleted")
            for resource in deleted:
                print(resource)
            print("created")
            for resource in created:
                print(resource)
                base_uri, local_path = DestinationMap().find_local_path(
                    resource.uri)
                print(base_uri, local_path)

        except AssertionError as err:
            self.logger.debug("%s Error: %s" % (self.pack_uri, str(err)))
            self.status = Status.parse_error
            self.exceptions.append(err)
        except SitemapParseError as err:
            self.logger.debug("%s Unreadable source: %s" %
                              (self.source_uri, str(err)))
            self.status = Status.parse_error
            self.exceptions.append(err)

        self.status = Status.processed_with_exceptions if self.has_exceptions(
        ) else Status.processed
Exemplo n.º 26
0
 def setUp(self):
     Config.__set_config_filename__("test-files/config.txt")
     Config().__drop__()
     des.dump.dump_listeners.append(SitemapWriter())
     DestinationMap().__set_destination__("http://localhost:8000/rs/source",
                                          "rs/destination/d7")
Exemplo n.º 27
0
 def setUp(self):
     Config.__set_config_filename__("test-files/config.txt")
     Config().__drop__()
     DestinationMap.__set_map_filename__("test-files/desmap.txt")
     DestinationMap().__drop__()
     des.desclient.reset_instance()