def test_full_LOC(self): """LOC should behave as expected when initialized with rich data""" data = [ "abc\t def", " 3 \t n", " abc \txyz\n\n", "x\t5", "fgh ", "x\t3 ", ] class rec(MappedRecord): Required = {"abc": []} maps = {"abc": list_adder, "x": int_setter, "fgh": bool_setter} label_splitter = DelimitedSplitter("\t") constructor = rec strict = True loc_bad = LineOrientedConstructor(data, label_splitter, maps, constructor, strict) self.assertRaises(FieldError, loc_bad) strict = False loc_good = LineOrientedConstructor(data, label_splitter, maps, constructor, strict) result = loc_good() assert isinstance(result, rec) self.assertEqual(result, { "abc": ["def", "xyz"], "3": "n", "fgh": False, "x": 3 })
def test_splitter(self): """StrictFieldWrapper with splitter should use that splitter""" fields = ["label", "count"] splitter = DelimitedSplitter(":", -1) f = StrictFieldWrapper(fields, splitter) self.assertEqual(f("n:k:n:a:sd "), {"label": "n:k:n:a", "count": "sd"}) self.assertEqual(f("nknasd:"), {"label": "nknasd", "count": ""}) self.assertRaises(FieldError, f, "")
def test_parsers(self): """DelimitedSplitter should return function with correct behavior""" empty = DelimitedSplitter() space = DelimitedSplitter(None) semicolon = DelimitedSplitter(";") twosplits = DelimitedSplitter(";", 2) allsplits = DelimitedSplitter(";", None) lastone = DelimitedSplitter(";", -1) lasttwo = DelimitedSplitter(";", -2) self.assertEqual(empty("a b c"), ["a", "b c"]) self.assertEqual(empty("abc"), ["abc"]) self.assertEqual(empty(" "), []) self.assertEqual(empty("a b c"), space("a b c")) self.assertEqual(semicolon(" a ; b ; c d"), ["a", "b ; c d"]) self.assertEqual(twosplits(" a ; b ; c d"), ["a", "b", "c d"]) self.assertEqual(allsplits(" a ; b ; c;;d;e ;"), ["a", "b", "c", "", "d", "e", ""]) self.assertEqual(lastone(" a ; b ; c;;d;e ;"), ["a ; b ; c;;d;e", ""]) self.assertEqual(lasttwo(" a ; b ; c;;d;e ;"), ["a ; b ; c;;d", "e", ""]) self.assertEqual(lasttwo(""), []) self.assertEqual(lasttwo("x"), ["x"]) self.assertEqual(lasttwo("x;"), ["x", ""])
def DefaultDelimitedSplitter(delimiter): """Wraps delimited splitter to handle empty records""" parser = DelimitedSplitter(delimiter=delimiter) def f(line): parsed = parser(line) if len(parsed) == 1: parsed.append("") return parsed return f
__email__ = "*****@*****.**" __status__ = "Development" maketrans = str.maketrans strip = str.strip rstrip = str.rstrip def ll_start(line): """Returns True if line looks like the start of a LocusLink record.""" return line.startswith(">>") LLFinder = LabeledRecordFinder(ll_start) pipes = DelimitedSplitter("|", None) first_pipe = DelimitedSplitter("|") commas = DelimitedSplitter(",", None) first_colon = DelimitedSplitter(":", 1) accession_wrapper = FieldWrapper(["Accession", "Gi", "Strain"], pipes) def _read_accession(line): """Reads accession lines: format is Accession | Gi | Strain.""" return MappedRecord(accession_wrapper(line)) rell_wrapper = FieldWrapper(["Description", "Id", "IdType", "Printable"], pipes)
result[key] = [val] labels.append(key) return result, labels def is_clustal_seq_line(line): """Returns True if line starts with a non-blank character but not 'CLUSTAL'. Useful for filtering other lines out of the file. """ return (line and (not line[0].isspace()) and (not line.startswith("CLUSTAL")) and (not line.startswith("MUSCLE"))) last_space = DelimitedSplitter(None, -1) def delete_trailing_number(line): """Deletes trailing number from a line. WARNING: does not preserve internal whitespace when a number is removed! (converts each whitespace run to a single space). Returns the original line if it didn't end in a number. """ pieces = line.split() try: int(pieces[-1]) return " ".join(pieces[:-1]) except ValueError: # no trailing numbers return line