def run_main(subcommand, lines, main_args=None, subcommand_args=None, to_stdout=False): in_fh, in_fn = tmp_file(lines=lines) in_fh.close() out_fh, out_fn = tempfile.mkstemp() if not main_args: main_args = [] main_args.extend([subcommand, "--input", str(in_fn)]) if not to_stdout: main_args.extend(["--output", str(out_fn)]) if subcommand_args: main_args.extend(subcommand_args) with captured_output() as (stdout, stderr): main(args=main_args) fh = open(out_fn, "r") out_lines = [line.rstrip("\r\n") for line in fh] fh.close() stdout = stdout.getvalue().rstrip('\r\n').split("\n") stderr = stderr.getvalue().rstrip('\r\n').split("\n") os.remove(in_fn) os.remove(out_fn) return (out_lines, stdout, stderr)
def test_sorter_with_sort_order_args(self): lines = [ "chr1\t248956422\t112\t70\t71" "chr2\t242193529\t252513167\t70\t71", "chr3\t198295559\t498166716\t70\t71", "chr4\t190214555\t699295181\t70\t71", "chr5\t181538259\t892227221\t70\t71", "chr6\t170805979\t1076358996\t70\t71", "chr7\t159345973\t1249605173\t70\t71", "chr8\t145138636\t1411227630\t70\t71", "chr9\t138394717\t1558439788\t70\t71", "chr10\t133797422\t1698811686\t70\t71" ] fd, fn = tmp_file(lines=lines) sorter = MafSorter(sort_order_name=BarcodesAndCoordinate.name(), max_objects_in_ram=100, fasta_index=fn) self.__test_sorter(sorter=sorter, chromosome="chr5") with self.assertRaises(ValueError): self.__test_sorter(sorter=sorter, chromosome="1") fd.close() os.remove(fn)
def test_with_contigs(self): lines = [ "chr1\t248956422\t112\t70\t71" "chr2\t242193529\t252513167\t70\t71", "chr3\t198295559\t498166716\t70\t71", "chr4\t190214555\t699295181\t70\t71", "chr5\t181538259\t892227221\t70\t71", "chr6\t170805979\t1076358996\t70\t71", "chr7\t159345973\t1249605173\t70\t71", "chr8\t145138636\t1411227630\t70\t71", "chr9\t138394717\t1558439788\t70\t71", "chr10\t133797422\t1698811686\t70\t71" ] fd, fn = tmp_file(lines=lines) sort_order = Coordinate(fasta_index=fn) sort_key = sort_order.sort_key() r1 = TestCoordinateKey.DummyRecord("chr1", 1, 2) r2 = TestCoordinateKey.DummyRecord("chr10", 1, 3) r3 = TestCoordinateKey.DummyRecord("no-chr", 1, 3) # both have contigs defined self.__test_diff(r1, r2, sort_key=sort_key) # contig undefined with self.assertRaises(ValueError): k3 = sort_key(r3) fd.close() os.remove(fn)
def __sort(self, lines, extra_args, test_func, to_stdout=False): in_fh, in_fn = tmp_file(lines=lines) in_fh.close() out_fh, out_fn = tempfile.mkstemp() main_args = ["validate", "--input", str(in_fn)] if not to_stdout: main_args.extend(["--output", str(out_fn)]) main_args.extend(extra_args) with captured_output() as (stdout, stderr): main(args=main_args) fh = open(out_fn, "r") out_lines = [line.rstrip("\r\n") for line in fh] fh.close() stdout = stdout.getvalue().rstrip('\r\n').split("\n") stderr = stderr.getvalue().rstrip('\r\n').split("\n") test_func(out_lines=out_lines, stdout=stdout, stderr=stderr) os.remove(in_fn) os.remove(out_fn)
def test_empty_file(self): fh, fn = tmp_file(lines=[]) reader = LineReader(fh=fh) self.assertEqual(reader.peek_line(), "") self.assertEqual(reader.read_line(), "") self.assertEqual(reader.line_number(), 0) reader.close() os.remove(fn)
def test_single_line(self): line = "A single line" fh, fn = tmp_file(lines=[line]) reader = LineReader(fh=fh) self.assertEqual(reader.line_number(), 0) self.assertEqual(reader.peek_line(), line) self.assertEqual(reader.read_line(), line) self.assertEqual(reader.line_number(), 1) self.assertEqual(reader.peek_line(), "") self.assertEqual(reader.read_line(), "") self.assertEqual(reader.line_number(), 1) reader.close() os.remove(fn)
def test_multiple_line(self): lines = ["A few", "good", "lines"] fh, fn = tmp_file(lines=lines) reader = LineReader(fh=fh) num_lines = 0 for i, line in enumerate(reader): line_number = i + 1 self.assertEqual(line, lines[i]) self.assertEqual(line_number, reader.line_number()) if line_number < len(lines): self.assertEqual(reader.peek_line(), lines[i+1]) num_lines += 1 self.assertEqual(num_lines, len(lines)) reader.close() os.remove(fn)
def test_with_fasta_index(self): # change the order of chromosomes! fasta_index_lines = [ "chr13\t114364328\t2106716512\t70\t71", "chr1\t248956422\t112\t70\t71" ] fd, fn = tmp_file(lines=fasta_index_lines) lines, header, records = self.read_test_maf() subcommand_args = [ "--version", GdcV1_0_0_PublicScheme.version(), "--annotation", GdcV1_0_0_PublicScheme.annotation_spec() ] out_lines, stdout, stderr = run_main(subcommand="sort", lines=lines, subcommand_args=subcommand_args) # Check that we have the same # of records out_records = [line for line in out_lines \ if not line.startswith("#") and not line.startswith("Hugo_Symbol")] self.assertEqual(len(out_records), len(records)) # Check that we added the sort pragma sortOrderLine = "%s%s %s" % (MafHeader.HeaderLineStartSymbol, MafHeader.SortOrderKey, BarcodesAndCoordinate.name()) self.assertTrue(sortOrderLine in out_lines) scheme = find_scheme( version=GdcV1_0_0_PublicScheme.version(), annotation=GdcV1_0_0_PublicScheme.annotation_spec()) # we should see chr13 before chr1 self.assertEqual(len(out_lines) - 1, len(lines)) # added the pragma found_chr1 = False for line in out_lines: if line.startswith(MafHeader.HeaderLineStartSymbol): continue record = MafRecord.from_line(line=line, scheme=scheme) self.assertFalse(record["Chromosome"] == "chr13" and found_chr1) found_chr1 = record["Chromosome"] == "chr1" fd.close() os.remove(fn)
def test_from_line_reader_ok(self): fh, fn = tmp_file([ TestMafHeader.__version_line, TestMafHeader.__annotation_line, TestMafHeader.__sort_order_line, "#key1 value1", "#key2 value2" ]) line_reader = LineReader(fh) header = MafHeader.from_line_reader( line_reader=line_reader, validation_stringency=ValidationStringency.Silent) fh.close() self.assertTrue(len(header.validation_errors) == 0) self.assertTrue(len(header) == 5) self.assertEqual(list(header.keys()), [ MafHeader.VersionKey, MafHeader.AnnotationSpecKey, MafHeader.SortOrderKey, "key1", "key2" ]) self.assertEqual([str(record.value) for record in header.values()], [ TestMafHeader.Version, TestMafHeader.AnnotationSpec, Coordinate.name(), "value1", "value2" ]) self.assertEqual(header.version(), TestMafHeader.Version) os.remove(fn)