Пример #1
0
    def run(self, cfg: Optional[DictConfig]):
        """Clear the graph db.

        .. warning::

            **task.force = true** must be passed, else :class:`RuntimeError` will be raised.

        :param cfg: the configuration file
        :return: None
        """
        driver = self.get_driver(cfg)
        logger.info("clearing database...")
        if not cfg.task.force:
            print(
                Panel(
                    "This task will delete the entire database\n"
                    "Please use `task.force=true` to force db clearing.",
                    style="white on red",
                    title="warning",
                    expand=False,
                ))
            self.help()
            raise HelpException(
                "This task will delete the entire database"
                " Please use task.force=true to force db clearing.")
        else:
            if cfg.task.only_edges:
                driver.clear_relationships()
            else:
                driver.clear()
Пример #2
0
def init_config_store():
    cs = ConfigStore.instance()
    cs.store(name="config", node=Config)
    cs.store(group="aquarium", name="default", node=AquariumConnection)
    cs.store(group="neo", name="default", node=NeoConnetion)
    # cs.store(group='job', name='default', node=Job)
    for name, task in Task.registered_tasks.items():
        logger.info("Registering task {} ({})".format(name, task.__name__))
        cs.store(group="task", name=name, node=task)
Пример #3
0
    def get_aq(cfg: DictConfig) -> AqSession:
        """Get the :class:`AqSession <pydent.aqsession.AqSession>` from
        configuration file.

        :param cfg: The configuration file
        :return: The :class:`AqSession <pydent.aqsession.AqSession>`
        """
        logger.info("Initializing Aquarium Driver...")
        return AqSession(cfg.aquarium.user, cfg.aquarium.password,
                         cfg.aquarium.uri)
Пример #4
0
    def get_driver(cfg: DictConfig) -> AquariumETLDriver:
        """Get the :class:`AquariumETLDriver.

        <aqneodriver.driver.AquariumETLDriver>` from configuration file.

        :param cfg: The configuration file
        :return: The :class:`AquariumETLDriver <aqneodriver.driver.AquariumETLDriver>`
        """
        logger.info("Initializing Neo4j Driver...")
        return AquariumETLDriver(cfg.neo.uri, cfg.neo.user, cfg.neo.password)
Пример #5
0
    def run(self, cfg: DictConfig):
        if not cfg.task.query:
            raise ValueError("Query must be provided.")
        driver, aq = self.sessions(cfg)
        with Progress(
                "[progress.description]{task.description}",
                BarColumn(),
                "[progress.percentage]{task.percentage:>3.0f}%",
                TimeRemainingColumn(),
                TransferSpeedColumn(),
                DownloadColumn(),
        ) as progress:
            n_cpus = cfg.task.n_jobs or os.cpu_count()

            # TASK 0
            logger.info("Requesting Aquarium inventory...")

            match_query = "MATCH (n:Sample) RETURN n.id"
            if cfg.task.query.n_items > -1:
                match_query += " LIMIT {}".format(cfg.task.query.n_items)
            results = driver.read(match_query)
            sample_ids = [r[0] for r in results]

            models = aq.Sample.find(sample_ids)
            logger.info("Found {} samples in graph db".format(len(models)))

            node_payload = aq_inventory_to_cypher(aq, models)

            if cfg.task.strict:

                def error_callback(e: Exception):
                    raise e

            else:
                error_callback = self.catch_constraint_error

            task1 = progress.add_task("adding nodes...")
            # TODO: indicate when creation is skipped
            if cfg.task.create_nodes:
                progress.tasks[task1].total = len(node_payload)
                driver.pool(n_cpus).write(
                    node_payload,
                    callback=lambda _: progress.update(task1, advance=1),
                    chunksize=cfg.task.chunksize,
                    error_callback=error_callback,
                )
                progress.update(task1, completed=progress.tasks[task1].total)
Пример #6
0
    def run(self, cfg: Optional[DictConfig] = None):
        """Update the graphdb using some Aquarium sample query.

        :param cfg: the configuration
        :return: None
        """
        driver, aq = self.sessions(cfg)

        logger.info("Requesting Aquarium inventory...")
        if not cfg.task.query:
            raise ValueError("Query must be provided.")
        query = {}
        if cfg.task.query.user:
            user = aq.User.where({"login": cfg.task.query.user})[0]
            query["user_id"] = user.id
        n_samples = cfg.task.query.n_samples
        models = aq.Sample.last(n_samples, query)

        with Progress(
            "[progress.description]{task.description}",
            BarColumn(),
            "[progress.percentage]{task.percentage:>3.0f}%",
            TimeRemainingColumn(),
            TransferSpeedColumn(),
            DownloadColumn(),
        ) as progress:
            n_cpus = cfg.task.n_jobs or os.cpu_count()

            task0 = progress.add_task(
                "[blue]collecting Aquarium samples[/blue]", total=100
            )

            if cfg.task.strict:

                def error_callback(e: Exception):
                    raise e
            else:
                error_callback = self.catch_constraint_error

            if cfg.task.create_nodes:
                task1 = progress.add_task(
                    "[red]writing nodes to [bold]neo4j[/bold]...[/red] ([green]cpus: {cpus}[/green])".format(
                        cpus=n_cpus
                    )
                )
            else:
                task1 = None

            with infinite_task_context(progress, task0) as callback:
                node_payload = aq_samples_to_cypher(
                    aq,
                    models,
                    new_node_callback=callback
                )

            if cfg.task.create_nodes:
                progress.tasks[task1].total = len(node_payload)
                driver.pool(n_cpus).write(
                    node_payload,
                    callback=lambda x: progress.update(task1, advance=len(x)),
                    chunksize=cfg.task.chunksize,
                    error_callback=error_callback,
                )
                progress.update(task1, completed=progress.tasks[task1].total)
Пример #7
0
 def wrapped(self, cfg, **kwargs):
     with log_level_context(self.log_level):
         logger.info("running {}".format(str(self)))
         result = f(self, cfg, **kwargs)
         logger.info("completed {}".format(str(self)))
     return result