Пример #1
0
    def test_from_lines_strict_raises_on_error(self):
        """
        Checks that the first error encountered is raised.
        """
        lines = ["#key1 value", "#key1 value"]
        with self.assertRaises(MafFormatException) as context:
            MafHeader.from_lines(
                lines=lines, validation_stringency=ValidationStringency.Strict)

        self.assertIn("Multiple header lines", str(context.exception))
        self.assertEqual(context.exception.tpe,
                         MafValidationErrorType.HEADER_DUPLICATE_KEYS)
Пример #2
0
 def test_from_lines_default_to_basic(self):
     lines = [TestMafHeader.__version_line, "#key1 value", "#key2 value"]
     header = MafHeader.from_lines(
         lines=lines, validation_stringency=ValidationStringency.Silent)
     self.assertEqual(len(header.validation_errors), 0)
     self.assertIsNone(header.annotation())
     self.assertIsNotNone(header.scheme())
     self.assertIsNotNone(header.scheme().annotation_spec())
Пример #3
0
    def test_from_lines_no_sort_order(self):
        lines = [TestMafHeader.__version_line, TestMafHeader.__annotation_line]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 0)
        self.assertEqual(header.sort_order().name(),
                         sort_order.Unsorted.name())
Пример #4
0
    def test_from_lines_supported_sort_order(self):
        for so in sort_order.SortOrder.all():
            lines = [
                TestMafHeader.__version_line,
                TestMafHeader.__annotation_line,
                "%s%s %s" % (MafHeader.HeaderLineStartSymbol,
                             MafHeader.SortOrderKey, so.name()),
            ]
            header = MafHeader.from_lines(
                lines=lines, validation_stringency=ValidationStringency.Silent)

            self.assertTrue(len(header.validation_errors) == 0)
            self.assertEqual(header.sort_order().name(), so.name())
Пример #5
0
    def test_from_lines_unsupported_annotation(self):
        scheme = GdcV1_0_0_BasicScheme()
        lines = [
            "%s%s %s" % (MafHeader.HeaderLineStartSymbol, MafHeader.VersionKey,
                         scheme.version()),
            "%s%s %s" % (
                MafHeader.HeaderLineStartSymbol,
                MafHeader.AnnotationSpecKey,
                scheme.annotation_spec(),
            ),
        ]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 1)
        self.assertEqual(
            header.validation_errors[0].tpe,
            MafValidationErrorType.HEADER_UNSUPPORTED_ANNOTATION_SPEC,
        )
        self.assertIsNotNone(header.annotation())
        self.assertIsNotNone(header.scheme())

        for line in [
                "#%s not_annotation" % MafHeader.AnnotationSpecKey,
                "#%s %sx" %
            (MafHeader.AnnotationSpecKey, TestMafHeader.AnnotationSpec),
        ]:
            lines = [TestMafHeader.__version_line, line]
            header = MafHeader.from_lines(
                lines=lines, validation_stringency=ValidationStringency.Silent)

            self.assertTrue(len(header.validation_errors) == 1)
            self.assertEqual(
                header.validation_errors[0].tpe,
                MafValidationErrorType.HEADER_UNSUPPORTED_ANNOTATION_SPEC,
            )
            self.assertIsNotNone(header.annotation())
            self.assertIsNone(header.scheme())
Пример #6
0
 def test_from_lines_misformatted_line(self):
     lines = [
         TestMafHeader.__version_line,
         TestMafHeader.__annotation_line,
         "key1 value1",
     ]
     header = MafHeader.from_lines(
         lines=lines, validation_stringency=ValidationStringency.Silent)
     self.assertEqual(len(header), 2)
     self.assertEqual(len(header.validation_errors), 1)
     self.assertEqual(
         header.validation_errors[0].tpe,
         MafValidationErrorType.HEADER_LINE_MISSING_START_SYMBOL,
     )
Пример #7
0
    def test_from_lines_missing_version(self):
        lines = [TestMafHeader.__annotation_line, "#key1 value", "#key2 value"]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 1)
        self.assertEqual(
            header.validation_errors[0].tpe,
            MafValidationErrorType.HEADER_MISSING_VERSION,
        )
        self.assertIsNone(header.version())
        self.assertIsNotNone(header.scheme())
        self.assertIsNotNone(header.scheme().version())
        self.assertIsNotNone(header.scheme().annotation_spec())
Пример #8
0
    def test_record_validation_error(self):
        scheme = TestMafWriter.TestScheme()
        fd, path = tempfile.mkstemp()

        # Create the header
        header_lines = (MafHeader.scheme_header_lines(scheme) +
                        ["#key1 value1", "#key2 value2"] +
                        ["str1\tNone\tstr2"])
        header = MafHeader.from_lines(
            lines=header_lines,
            validation_stringency=ValidationStringency.Silent)

        # Create the record
        values = ["string2", "error", "string1"]
        record_line = MafRecord.ColumnSeparator.join(values)
        record = MafRecord.from_line(
            line=record_line,
            scheme=scheme,
            line_number=1,
            validation_stringency=ValidationStringency.Silent,
        )

        # Write the header, and the record twice
        with captured_output() as (stdout, stderr):
            writer = MafWriter.from_path(
                header=header,
                validation_stringency=ValidationStringency.Lenient,
                path=path,
            )
            writer += record
            writer.write(record)
            writer.close()
        stdout = stdout.getvalue().rstrip('\r\n').split("\n")
        stderr = stderr.getvalue().rstrip('\r\n').split("\n")
        self.assertListEqual(stdout, [''])

        # The errors that should be written stderr
        errors = [
            "HEADER_UNSUPPORTED_VERSION",
            "HEADER_UNSUPPORTED_ANNOTATION_SPEC",
            "RECORD_COLUMN_WITH_NO_VALUE",
            "RECORD_COLUMN_WITH_NO_VALUE",
        ]
        self.assertListEqualAndIn(errors, stderr)

        # The second column should be None
        err_record_line = record_line.replace("error", "None")
        self.assertListEqual(read_lines(path),
                             header_lines + [err_record_line, err_record_line])
Пример #9
0
    def test_from_lines_unsupported_version(self):
        for line in [
                "#%s not_version" % MafHeader.VersionKey,
                "#%s %sx" % (MafHeader.VersionKey, TestMafHeader.Version),
        ]:
            lines = [line, TestMafHeader.__annotation_line]
            header = MafHeader.from_lines(
                lines=lines, validation_stringency=ValidationStringency.Silent)

            self.assertTrue(len(header.validation_errors) == 1)
            self.assertEqual(
                header.validation_errors[0].tpe,
                MafValidationErrorType.HEADER_UNSUPPORTED_VERSION,
            )
            self.assertIsNotNone(header.version())
            self.assertIsNone(header.scheme())
Пример #10
0
    def test_from_lines_duplicate_keys(self):
        lines = [
            TestMafHeader.__version_line,
            TestMafHeader.__annotation_line,
            "#dupkey value",
            "#dupkey value",
        ]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 1)
        self.assertIn("dupkey", str(header.validation_errors[0]))
        self.assertEqual(
            header.validation_errors[0].tpe,
            MafValidationErrorType.HEADER_DUPLICATE_KEYS,
        )
Пример #11
0
    def test_close(self):
        fd, path = tempfile.mkstemp()

        lines = [
            TestMafWriter.__version_line,
            TestMafWriter.__annotation_line,
            "#key1 value1",
            "#key2 value2",
            TestMafWriter.__keys_line,
        ]
        header = MafHeader.from_lines(lines=lines)
        writer = MafWriter.from_path(header=header, path=path)
        writer._handle.write("LAST")  # Naughty
        writer.close()
        self.assertListEqual(read_lines(path), lines + ["LAST"])

        with self.assertRaises(ValueError):
            writer._handle.write("Oh no")
Пример #12
0
 def test_from_lines_missing_annotation(self):
     lines = [
         "%s%s %s" % (
             MafHeader.HeaderLineStartSymbol,
             MafHeader.VersionKey,
             NoRestrictionsScheme.version(),
         ),
         "#key1 value",
         "#key2 value",
     ]
     header = MafHeader.from_lines(
         lines=lines, validation_stringency=ValidationStringency.Silent)
     self.assertEqual(len(header.validation_errors), 1)
     self.assertEqual(
         header.validation_errors[0].tpe,
         MafValidationErrorType.HEADER_MISSING_ANNOTATION_SPEC,
     )
     self.assertIsNone(header.annotation())
     self.assertIsNone(header.scheme())
Пример #13
0
    def test_from_lines_supported_annotation(self):
        scheme = GdcV1_0_0_ProtectedScheme()
        lines = [
            "%s%s %s" % (MafHeader.HeaderLineStartSymbol, MafHeader.VersionKey,
                         scheme.version()),
            "%s%s %s" % (
                MafHeader.HeaderLineStartSymbol,
                MafHeader.AnnotationSpecKey,
                scheme.annotation_spec(),
            ),
        ]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 0)
        self.assertEqual(header.version(), scheme.version())
        self.assertEqual(header.annotation(), scheme.annotation_spec())
        self.assertEqual(header.scheme().version(), scheme.version())
        self.assertEqual(header.scheme().annotation_spec(),
                         scheme.annotation_spec())
Пример #14
0
    def test_from_lines_unsupported_sort_order(self):
        lines = [
            TestMafHeader.__version_line,
            TestMafHeader.__annotation_line,
            "%s%s %s" % (
                MafHeader.HeaderLineStartSymbol,
                MafHeader.SortOrderKey,
                "not-a-sort-order",
            ),
        ]
        header = MafHeader.from_lines(
            lines=lines, validation_stringency=ValidationStringency.Silent)

        self.assertTrue(len(header.validation_errors) == 1)
        self.assertEqual(
            header.validation_errors[0].tpe,
            MafValidationErrorType.HEADER_UNSUPPORTED_SORT_ORDER,
        )
        self.assertEqual(header.sort_order().name(),
                         sort_order.Unsorted.name())
Пример #15
0
    def add_records(self):
        scheme = TestMafWriter.TestScheme()
        fd, path = tempfile.mkstemp()

        header_lines = MafHeader.scheme_header_lines(scheme) + [
            "#key1 value1",
            "#key2 value2",
        ]
        header = MafHeader.from_lines(lines=header_lines)
        writer = MafWriter.from_path(header=header, path=path)
        values = ["string2", "3.14", "string1"]
        record_line = MafRecord.ColumnSeparator.join(values)
        record = MafRecord.from_line(line=record_line,
                                     scheme=scheme,
                                     line_number=1)
        writer += record
        writer.write(record)
        writer.close()

        self.assertListEqual(read_lines(path),
                             header_lines + [record_line, record_line])
Пример #16
0
    def test_with_sorting(self):
        scheme = TestMafWriter.TestCoordinateScheme()
        fd, path = tempfile.mkstemp()

        # Create the header
        header_lines = (MafHeader.scheme_header_lines(scheme) +
                        ["#key1 value1", "#key2 value2"] + [
                            "%s%s %s" % (
                                MafHeader.HeaderLineStartSymbol,
                                MafHeader.SortOrderKey,
                                Coordinate().name(),
                            )
                        ] + ["\t".join(scheme.column_names())])
        header = MafHeader.from_lines(
            lines=header_lines,
            validation_stringency=ValidationStringency.Silent)

        # Write the header, and the record twice
        writer = MafWriter.from_path(
            header=header,
            validation_stringency=ValidationStringency.Lenient,
            path=path,
            assume_sorted=False,
        )
        writer += TestMafWriter.DummyRecord("chr1", 2, 2)
        writer += TestMafWriter.DummyRecord("chr1", 3, 3)
        writer += TestMafWriter.DummyRecord("chr1", 4, 4)
        writer.close()

        reader = MafReader.reader_from(path=path, scheme=scheme)
        header = reader.header()
        records = [rec for rec in reader]
        reader.close()

        self.assertEqual(header.sort_order().name(), Coordinate.name())

        self.assertListEqual([r["Start_Position"].value for r in records],
                             [2, 3, 4])
        self.assertListEqual([r["End_Position"].value for r in records],
                             [2, 3, 4])
Пример #17
0
    def test_gz_support(self):
        fd, path = tempfile.mkstemp(suffix=".gz")

        lines = [
            TestMafWriter.__version_line,
            TestMafWriter.__annotation_line,
            "#key1 value1",
            "#key2 value2",
            TestMafWriter.__keys_line,
        ]
        with captured_output() as (stdout, stderr):
            header = MafHeader.from_lines(lines=lines)
            writer = MafWriter.from_path(header=header, path=path)
            writer.close()
            self.assertListEqual(read_lines(path), lines)
            self.assertEqual(
                str(writer.header()) + "\n" + TestMafWriter.__keys_line,
                "\n".join(lines),
            )
        stdout = stdout.getvalue().rstrip('\r\n').split("\n")
        stderr = stderr.getvalue().rstrip('\r\n').split("\n")
        self.assertListEqual(stdout, [''])
        self.assertListEqual(stderr, [''])
Пример #18
0
    def __init__(self, lines,
                 closeable=None,
                 validation_stringency=None,
                 scheme=None):
        """ Initializes a MAF reader and reads in the header and column
        definitions.

        If no scheme is provided, the scheme will be determined from the
        version and annotation pragmas in the header, and matched against the
        known set of schemes.  If the scheme is not recognized, then the
        column names will determine a custom scheme and no assumption is made
        about the values of each column.

        :param lines: the lines (iterable) from the MAF file.
        :param closeable: any closeable object (has a ``close()`` method) that
        will be closed when ``close()`` is called.
        :param validation_stringency: the validation stringency.
        :param scheme: a scheme that should be used to override the scheme in
        the header.
        """
        self.__iter = iter(lines)
        self.__closeable = closeable
        self.validation_stringency = \
            ValidationStringency.Silent if (validation_stringency is None) \
                else validation_stringency
        self.__logger = Logger.get_logger(self.__class__.__name__)
        self.validation_errors = list()

        self.__next_line = None
        self.__line_number = 0

        def add_error(error):
            self.validation_errors.append(error)

        # read in the header lines
        header_lines = list()
        while True:
            self.__next_line__()
            if self.__next_line is not None \
                    and self.__next_line.startswith(MafHeader.HeaderLineStartSymbol):
                header_lines.append(self.__next_line)
            else:
                break
        self.__header = \
            MafHeader.from_lines(
                lines=header_lines,
                validation_stringency=self.validation_stringency)

        for error in self.__header.validation_errors:
            add_error(error)

        # get the column names
        if self.__next_line is not None:
            column_names = self.__next_line.split(MafRecord.ColumnSeparator)
            self.__next_line__()
        else:
            column_names = None

        # update the scheme
        self.__update_scheme__(scheme=scheme, column_names=column_names)

        # validate the column names against the scheme
        if column_names is not None:
            # match the column names against the scheme
            scheme_column_names = self.__scheme.column_names()
            if len(column_names) != len(scheme_column_names):
                add_error(MafValidationError(
                    MafValidationErrorType.SCHEME_MISMATCHING_NUMBER_OF_COLUMN_NAMES,
                    "Found '%d' columns but expected '%d'" %
                    (len(column_names), len(scheme_column_names)),
                    line_number=self.__line_number - 1
                ))
            else:
                for i, (column_name, scheme_column_name) in \
                        enumerate(zip(column_names, scheme_column_names)):
                    if column_name != scheme_column_name:
                        add_error(MafValidationError(
                            MafValidationErrorType.SCHEME_MISMATCHING_COLUMN_NAMES,
                            "Found column with name '%s' but expected '%s' for "
                            "the '%d'th column" %
                            (column_name, scheme_column_name, i + 1),
                            line_number=self.__line_number - 1
                        ))
        else:
            add_error(MafValidationError(
                MafValidationErrorType.HEADER_MISSING_COLUMN_NAMES,
                "Found no column names",
                line_number=self.__line_number+1
            ))

        # process validation errors so far
        MafValidationError.process_validation_errors(
            validation_errors=self.validation_errors,
            validation_stringency=self.validation_stringency,
            name=self.__class__.__name__,
            logger=self.__logger
        )
Пример #19
0
    def __test_from_lines_lenient_or_silent(self, validation_stringency):
        """
        Checks that all errors are either printed out (Lenient) or not (Silent), and that a header is returned *without*
        the header lines that caused an error.
        """
        self.assertIn(
            validation_stringency,
            [ValidationStringency.Silent, ValidationStringency.Lenient],
        )
        lines = [
            TestMafHeader.__version_line,
            TestMafHeader.__annotation_line,
            "#key1 value1",
            "#key1 value2",
            "#key2 value3",
            "#key2 value4",
        ]
        err_stream = tempfile.NamedTemporaryFile(delete=False, mode="w")
        err_file_name = err_stream.name
        logger = Logger.get_logger(err_file_name, stream=err_stream)
        header = MafHeader.from_lines(
            lines=lines,
            validation_stringency=validation_stringency,
            logger=logger)
        err_stream.close()

        reader = open(err_file_name, "r")
        actual_lines = reader.readlines()
        expected_lines = [
            "Multiple header lines with key 'key1' found",
            "Multiple header lines with key 'key2' found",
        ]
        reader.close()
        os.remove(err_file_name)

        if validation_stringency == ValidationStringency.Lenient:
            self.assertTrue(len(actual_lines) == len(expected_lines))
            [
                self.assertIn(expected, actual)
                for (actual, expected) in zip(actual_lines, expected_lines)
            ]
        else:
            self.assertTrue(len(actual_lines) == 0)

        self.assertTrue(len(header) == 4)
        self.assertListEqual(
            list(header.keys()),
            [
                MafHeader.VersionKey, MafHeader.AnnotationSpecKey, "key1",
                "key2"
            ],
        )
        self.assertEqual(
            [str(record.value) for record in header.values()],
            [
                TestMafHeader.Version, TestMafHeader.AnnotationSpec, "value1",
                "value3"
            ],
        )
        for record, clzz in zip(
                header.values(),
            [MafHeaderVersionRecord, MafHeaderRecord, MafHeaderRecord]):
            self.assertTrue(isinstance(record, clzz))