Пример #1
0
def run_lexmapr(input_file):
    """Run LexMapr package with specified arguments."""

    # Need to convert InMemoryUploadedFile values to actual files
    with TemporaryDirectory() as tmp_dir:
        tmp_input_path = tmp_dir + "/" + str(input_file)
        tmp_output_path = tmp_dir + "/output.tsv"

        with open(tmp_input_path, "ab") as tmp_input_fp:
            for input_file_chunk in input_file.chunks():
                tmp_input_fp.write(input_file_chunk)
        try:
            run(
                Namespace(input_file=tmp_input_path,
                          config="envo_foodon_config.json",
                          format="basic",
                          output=tmp_output_path,
                          version=False,
                          bucket=True))
        except Exception as e:
            return "Oops! Something went wrong"

        with open(tmp_output_path, "r") as tmp_output_fp:
            ret = tmp_output_fp.read()

    return ret
Пример #2
0
def run_lexmapr(job_id):
    """Execute ``PipelineJob`` object.

    This means running the original LexMapr pipeline using parameters
    set by the object, and in this function.

    If the execution succeeds, the object's ``complete`` field is set
    to ``True``.

    :param str job_id: ``id`` value of ``PipelineJob`` object
    """
    job = PipelineJob.objects.get(id=job_id)

    try:
        run(
            Namespace(input_file=job.input_file.path,
                      config=None,
                      full=None,
                      output=job.output_file.path,
                      version=False,
                      bucket=False,
                      no_cache=False,
                      profile="ifsac"))
    except Exception as e:
        job.err = True
        job.err_msg = str(e)

    job.complete = True
    job.save()
Пример #3
0
    def run_pipeline_with_args(bucket=None):
        """Run pipeline with some default arguments."""

        # Path to input file used in all tests
        small_simple_path = os.path.join(ROOT, "tests", "test_input", "small_simple.csv")

        pipeline.run(argparse.Namespace(input_file=small_simple_path, config=None, full=None,
                                        output=None, version=False, bucket=bucket, no_cache=False,
                                        profile=None))
Пример #4
0
    def run_pipeline_with_args(input_file, config=None):
        """Run pipeline with some default arguments.

        input_file must be specified. web and root can be specified,
        but otherwise are ``None`` by default.
        """
        pipeline.run(
            argparse.Namespace(input_file=input_file,
                               config=config,
                               format="basic",
                               output=None,
                               version=False))
Пример #5
0
    def test_pipeline_with_files(self):
        """Compares actual pipeline.run outputs to expected outputs.

        For each expected output and input pair in self.test_files, we
        compare the contents of the actual output of pipeline.run (when
        given input) to the contents of the expected output. This
        function raises a single assertion error that lists all failed
        assertions.
        """
        # This will be a multi-line string containing all expected
        # outputs that are not equal to their actual outputs.
        failures = []
        # Iterate over all expected outputs
        for expected_output in self.test_files:
            # Path of expected output file
            expected_output_path = os.path.join(
                os.path.dirname(__file__),
                "output/" + expected_output + ".tsv")
            # Path of input file
            input = self.test_files[expected_output][0]
            input_path = os.path.join(os.path.dirname(__file__),
                                      "input/" + input + ".csv")
            # Format value
            format = self.test_files[expected_output][1]
            # Temporary file path to store actual output of input file
            actual_output_path = tempfile.mkstemp()[1]
            # Run pipeline.run using input_path and actual_output_path
            pipeline.run(
                type(
                    "", (object, ), {
                        "input_file": input_path,
                        "output": actual_output_path,
                        "format": format,
                        "config": None
                    })())
            # Get actual_output_path contents
            with open(actual_output_path, "r") as actual_output_file:
                actual_output_contents = actual_output_file.read()
            # Get expected_output_path contents
            with open(expected_output_path, "r") as expected_output_file:
                expected_output_contents = expected_output_file.read()
            try:
                # Compare expected output with actual output
                self.assertMultiLineEqual(expected_output_contents,
                                          actual_output_contents)
            except AssertionError as e:
                print(e)
                failures += [expected_output]
        if failures:
            print("Failed files:")
            for failure in failures:
                print(failure)
            raise AssertionError
Пример #6
0
    def test_pipeline_with_files(self):
        """Compares actual pipeline.run outputs to expected outputs.

        For each expected output and input pair in self.test_files, we
        compare the contents of the actual output of pipeline.run (when
        given input) to the contents of the expected output. This
        function raises a single assertion error that lists all failed
        assertions.
        """
        # This will be a multi-line string containing all expected
        # outputs that are not equal to their actual outputs.
        failures = []
        # Iterate over all expected outputs
        for expected_output_filename, pipeline_args in self.test_files.items():
            # Path of expected output file
            expected_output_path = os.path.join(ROOT, "tests", "test_output",
                                                expected_output_filename + ".tsv")
            # File path to store actual output of input file
            actual_output_path = os.path.join(self.tmp_dir, "actual_output.tsv")
            # Run pipeline.run using input_path and actual_output_path
            default_args = {"full": True, "bucket": False}
            default_args.update(pipeline_args)
            pipeline.run(argparse.Namespace(input_file=default_args["input"], config=None,
                                            full=default_args["full"],
                                            output=actual_output_path, version=False,
                                            bucket=default_args["bucket"], no_cache=False,
                                            profile=None))
            # Get actual_output_path contents
            with open(actual_output_path, "r") as actual_output_file:
                actual_output_contents = actual_output_file.read()
            # Get expected_output_path contents
            with open(expected_output_path, "r") as expected_output_file:
                expected_output_contents = expected_output_file.read()
            try:
                # Compare expected output with actual output
                self.assertMultiLineEqual(expected_output_contents, actual_output_contents)
            except AssertionError as e:
                print(e)
                failures += [expected_output_path]
        if failures:
            print("Failed files:")
            for failure in failures:
                print(failure)
            raise AssertionError