def __init__(self, data, vlevel=1, virtual=False, version=None): self.vlevel = vlevel self._virtual = virtual self._datatype = {} self._data = {} self._gfa = None self._version = version self._refs = {} if self.__class__ == gfapy.Line: raise gfapy.AssertionError("Line subclass unknown") if isinstance(data, dict): # API private initialization using dict self._data.update(data) else: # public initialization using list (or tab-separated string) if self.__class__ == gfapy.line.Comment: data = gfapy.Line._init_comment_data(data) elif isinstance(data, str): data = data.split(gfapy.Line.SEPARATOR) if self.version is None: self._compute_version(data[0]) else: self._validate_version() self._initialize_positional_fields(data) self._initialize_tags(data) if self.vlevel >= 1: self._validate_record_type_specific_info() if self.version is None: raise gfapy.RuntimeError( "version could not be determined, " + "record_type={}".format(self.record_type))
def _remove_junctions(self, jntag): if jntag is None: jntag = "jn" for s in self.segments: jndata = s.get(jntag) if jndata: ln = len(s.sequence) for m1, dir1 in jndata["L"].items(): for m2, dir2 in jndata["R"].items(): if self._version == "gfa1": l = gfapy.line.edge.Link([m1,dir1,m2,dir2,"{}M".format(ln)]) self.add_line(l) elif self._version == "gfa2": m1ln = len(self.segment(m1).sequence) m2ln = len(self.segment(m2).sequence) r1 = (dir1 == "-") r2 = (dir2 == "-") l = gfapy.line.edge.GFA2(["*", m1+dir1, m2+dir2, "0" if r1 else str(m1ln-ln), str(ln) if r1 else str(m1ln)+"$", "0" if r2 else str(m2ln-ln), str(ln) if r1 else str(m2ln)+"$", str(ln)+"M"]) self.add_line(l) else: raise gfapy.AssertionError() s.disconnect()
def _link_duplicated_last(self, merged, last, is_reversed, jntag): # annotate junction if jntag is None: jntag = "jn" if not last.get(jntag): last.set(jntag, {"L":[],"R":[]}) if is_reversed: last.get(jntag)["R"].append([merged.name, "-"]) else: last.get(jntag)["L"].append([merged.name, "+"]) # create temporary link ln = len(last.sequence) if self._version == "gfa1": tmp_link = gfapy.line.edge.Link([merged.name, "+", last.name, "-" if is_reversed else "+", "{}M".format(ln), "co:Z:temporary"]) self.add_line(tmp_link) elif self._version == "gfa2": mln = len(merged.sequence) tmp_link = gfapy.line.edge.GFA2(["*",merged.name+"+", \ last_name+("-" if is_reversed else "+"), str(mln - ln), "{}$".format(mln), str(ln-1) if is_reversed else "0", # on purpose fake "{}$".format(ln) if is_reversed else "1", # on purpose fake "{}M".format(ln), "co:Z:temporary"]) self.add_line(tmp_link) else: raise gfapy.AssertionError()
def add_line(self, gfa_line): """Add a line to a GFA instance. Note: append() is an alias to this method Parameters: gfa_line (str, Line): a line instance or a string, containing a line of a GFA file (if a string, a line instance is constructed using the string) Raises: gfapy.error.VersionError : If a wrong line type is used, for the GFA version gfapy.error.FormatError : If the content of the line string is not valid """ if gfa_line is None: return if self._version == "gfa1": self.__add_line_GFA1(gfa_line) elif self._version == "gfa2": self.__add_line_GFA2(gfa_line) elif self._version is None: self.__add_line_unknown_version(gfa_line) else: raise gfapy.AssertionError("This point should never be reached")
def __add_line_GFA1(self, gfa_line): if isinstance(gfa_line, str): if gfa_line[0] == "S": gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel) else: gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel, version="gfa1") elif gfa_line.__class__ in gfapy.Lines.GFA2Specific: raise gfapy.VersionError( "Version: 1.0 ({})\n".format(self._version_explanation) + "Cannot add instance of incompatible line type " + str(type(gfa_line))) if gfa_line.record_type == "H": if self._vlevel > 0 and gfa_line.VN and gfa_line.VN != "1.0": raise gfapy.VersionError( "Header line specified wrong version ({})\n".format( gfa_line.VN) + "Line: {}\n".format(gfa_line) + "File version: 1.0 ({})".format(self._version_explanation)) self.header._merge(gfa_line) elif gfa_line.record_type == "S": if gfa_line.version == "gfa2": raise gfapy.VersionError( "Version: 1.0 ({})\n".format(self._version_explanation) + "GFA2 segment found: {}".format(gfa_line)) gfa_line.connect(self) elif gfa_line.record_type in ["L", "P", "C", "#"]: gfa_line.connect(self) else: raise gfapy.AssertionError( "Invalid record type {}. This should never happen".format(rt))
def _backreference_keys(self, ref, key_in_ref): if ref.record_type == "U": return ["sets"] elif ref.record_type == "O": return ["paths"] elif ref.record_type == "S": return ["sid1", "sid2"] else: raise gfapy.AssertionError("Bug found, please report\n" + "ref: {}\n".format(ref) + "key_in_ref: {}".format(key_in_ref))
def _push_item_on_se_path(self, path, prev_edge, item): if isinstance(item.line, str): raise gfapy.RuntimeError( "Captured path cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (String found)".format(item.line)) elif isinstance(item.line, gfapy.line.segment.GFA2): if not item.line.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed; item is not connected\n" + "Line: {}\n".format(self) + "Item: {}".format(item.line)) self._push_segment_on_se_path(path, prev_edge, item) prev_edge = False elif isinstance(item.line, gfapy.line.edge.GFA2): if not item.line.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed; item is not connected\n" + "Line: {}\n".format(self) + "Item: {}".format(item.line)) if not path: self._push_first_edge_on_se_path(path, self.items) else: self._push_nonfirst_edge_on_se_path(path, item) prev_edge = True elif isinstance(item.line, gfapy.line.group.Ordered): if not item.line.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed; item is not connected\n" + "Line: {}\n".format(self) + "Item: {}".format(item.line)) subpath, prev_edge_subpath = item.line._compute_captured_path() if not subpath: raise gfapy.AssertionError() if item.orient == "+": for subpath_item in subpath: path, prev_edge = self._push_item_on_se_path( path, prev_edge, subpath_item) else: for subpath_item in reversed(subpath): path, prev_edge = self._push_item_on_se_path( path, prev_edge, subpath_item.inverted()) prev_edge = prev_edge_subpath elif isinstance(item.line, gfapy.line.unknown.Unknown): raise gfapy.RuntimeError( "Captured path cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (Virtual unknown line)".format( item.name)) else: raise gfapy.TypeError("Line: {}\t".format(self) + "Cannot compute captured path:\t" + "Error: items of type {} are not supported\t" .format(item.line.__class__.__name__) + "Unsupported item: {}".format(item)) return path, prev_edge
def _refkey_for_s(self, snum): a = [self.sid1.orient, self.sid2.orient] if a == ["+", "+"]: return "gaps_R" if (snum == 1) else "gaps_L" elif a == ["+", "-"]: return "gaps_R" elif a == ["-", "+"]: return "gaps_L" elif a == ["-", "-"]: return "gaps_L" if (snum == 1) else "gaps_R" else: raise gfapy.AssertionError("Bug found, please report\n" + "snum: {}".format(snum))
def _push_segment_on_se_path(self, path, prev_edge, oriented_segment): if path: if isinstance(path[-1].line, gfapy.line.segment.GFA2): if prev_edge: self._check_s_is_as_expected(path, oriented_segment) return # do not add segment, as it is already there else: path.append( self._find_edge_from_path_to_segment( path, oriented_segment)) elif isinstance(path[-1].line, gfapy.line.edge.GFA2): self._check_s_to_e_contiguity(path, oriented_segment) else: raise gfapy.AssertionError() path.append(oriented_segment)
def _initialize_positional_fields(self, strings): if strings[0] != self.RECORD_TYPE and self.RECORD_TYPE != "\n": raise gfapy.FormatError( "Record type of records of " + "class {} must be {} ({} found)".format( self.__class__, self.RECORD_TYPE, strings[0])) if self.version is None: raise gfapy.AssertionError("Bug found, please report\n" + "strings: {}".format(repr(strings))) if (self.vlevel >= 1) and (len(strings) - 1 < self._n_positional_fields): raise gfapy.FormatError( "{} positional fields expected, ".format( self._n_positional_fields) + "{} found\n{}".format(len(strings) - 1, repr(strings))) for i, n in enumerate(self.POSFIELDS): self._init_field_value(n, self.__class__.DATATYPE[n], strings[i + 1], errmsginfo=strings)
def _unregister_line(self, gfa_line): self._api_private_check_gfa_line(gfa_line, "unregister_line") rt = gfa_line.record_type if rt == "H": raise gfapy.AssertionError("Bug found, please report\n" + "gfa_line: {}".format(gfa_line)) collection = self._records[rt] storage_key = gfa_line.__class__.STORAGE_KEY if storage_key == "name": name = gfa_line.name if gfapy.is_placeholder(name): name = id(gfa_line) collection.pop(name) elif storage_key == "external": subkey = gfa_line.external.name collection = collection[subkey] collection.pop(id(gfa_line)) if not collection: self._records[rt].pop(subkey) else: collection.pop(id(gfa_line))