Пример #1
0
 def __init__(self, data, vlevel=1, virtual=False, version=None):
     self.vlevel = vlevel
     self._virtual = virtual
     self._datatype = {}
     self._data = {}
     self._gfa = None
     self._version = version
     self._refs = {}
     if self.__class__ == gfapy.Line:
         raise gfapy.AssertionError("Line subclass unknown")
     if isinstance(data, dict):
         # API private initialization using dict
         self._data.update(data)
     else:
         # public initialization using list (or tab-separated string)
         if self.__class__ == gfapy.line.Comment:
             data = gfapy.Line._init_comment_data(data)
         elif isinstance(data, str):
             data = data.split(gfapy.Line.SEPARATOR)
         if self.version is None:
             self._compute_version(data[0])
         else:
             self._validate_version()
         self._initialize_positional_fields(data)
         self._initialize_tags(data)
         if self.vlevel >= 1:
             self._validate_record_type_specific_info()
         if self.version is None:
             raise gfapy.RuntimeError(
                 "version could not be determined, " +
                 "record_type={}".format(self.record_type))
Пример #2
0
 def _remove_junctions(self, jntag):
   if jntag is None:
     jntag = "jn"
   for s in self.segments:
     jndata = s.get(jntag)
     if jndata:
       ln = len(s.sequence)
       for m1, dir1 in jndata["L"].items():
         for m2, dir2 in jndata["R"].items():
           if self._version == "gfa1":
             l = gfapy.line.edge.Link([m1,dir1,m2,dir2,"{}M".format(ln)])
             self.add_line(l)
           elif self._version == "gfa2":
             m1ln = len(self.segment(m1).sequence)
             m2ln = len(self.segment(m2).sequence)
             r1 = (dir1 == "-")
             r2 = (dir2 == "-")
             l = gfapy.line.edge.GFA2(["*", m1+dir1, m2+dir2,
                "0" if r1 else str(m1ln-ln),
                str(ln) if r1 else str(m1ln)+"$",
                "0" if r2 else str(m2ln-ln),
                str(ln) if r1 else str(m2ln)+"$",
                str(ln)+"M"])
             self.add_line(l)
           else:
             raise gfapy.AssertionError()
       s.disconnect()
Пример #3
0
 def _link_duplicated_last(self, merged, last, is_reversed, jntag):
   # annotate junction
   if jntag is None:
     jntag = "jn"
   if not last.get(jntag):
     last.set(jntag, {"L":[],"R":[]})
   if is_reversed:
     last.get(jntag)["R"].append([merged.name, "-"])
   else:
     last.get(jntag)["L"].append([merged.name, "+"])
   # create temporary link
   ln = len(last.sequence)
   if self._version == "gfa1":
     tmp_link = gfapy.line.edge.Link([merged.name, "+",
         last.name, "-" if is_reversed else "+",
         "{}M".format(ln), "co:Z:temporary"])
     self.add_line(tmp_link)
   elif self._version == "gfa2":
     mln = len(merged.sequence)
     tmp_link = gfapy.line.edge.GFA2(["*",merged.name+"+", \
       last_name+("-" if is_reversed else "+"),
       str(mln - ln), "{}$".format(mln),
       str(ln-1) if is_reversed else "0", # on purpose fake
       "{}$".format(ln) if is_reversed else "1", # on purpose fake
       "{}M".format(ln), "co:Z:temporary"])
     self.add_line(tmp_link)
   else:
     raise gfapy.AssertionError()
Пример #4
0
    def add_line(self, gfa_line):
        """Add a line to a GFA instance.

    Note:
      append() is an alias to this method

    Parameters:
      gfa_line (str, Line): a line instance or a string, containing a line
        of a GFA file (if a string, a line instance is constructed using
        the string)

    Raises:
      gfapy.error.VersionError : If a wrong line type is used, for the GFA
        version
      gfapy.error.FormatError : If the content of the line string is
        not valid
    """
        if gfa_line is None:
            return
        if self._version == "gfa1":
            self.__add_line_GFA1(gfa_line)
        elif self._version == "gfa2":
            self.__add_line_GFA2(gfa_line)
        elif self._version is None:
            self.__add_line_unknown_version(gfa_line)
        else:
            raise gfapy.AssertionError("This point should never be reached")
Пример #5
0
 def __add_line_GFA1(self, gfa_line):
     if isinstance(gfa_line, str):
         if gfa_line[0] == "S":
             gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel)
         else:
             gfa_line = gfapy.Line(gfa_line,
                                   vlevel=self._vlevel,
                                   version="gfa1")
     elif gfa_line.__class__ in gfapy.Lines.GFA2Specific:
         raise gfapy.VersionError(
             "Version: 1.0 ({})\n".format(self._version_explanation) +
             "Cannot add instance of incompatible line type " +
             str(type(gfa_line)))
     if gfa_line.record_type == "H":
         if self._vlevel > 0 and gfa_line.VN and gfa_line.VN != "1.0":
             raise gfapy.VersionError(
                 "Header line specified wrong version ({})\n".format(
                     gfa_line.VN) + "Line: {}\n".format(gfa_line) +
                 "File version: 1.0 ({})".format(self._version_explanation))
         self.header._merge(gfa_line)
     elif gfa_line.record_type == "S":
         if gfa_line.version == "gfa2":
             raise gfapy.VersionError(
                 "Version: 1.0 ({})\n".format(self._version_explanation) +
                 "GFA2 segment found: {}".format(gfa_line))
         gfa_line.connect(self)
     elif gfa_line.record_type in ["L", "P", "C", "#"]:
         gfa_line.connect(self)
     else:
         raise gfapy.AssertionError(
             "Invalid record type {}. This should never happen".format(rt))
Пример #6
0
 def _backreference_keys(self, ref, key_in_ref):
     if ref.record_type == "U":
         return ["sets"]
     elif ref.record_type == "O":
         return ["paths"]
     elif ref.record_type == "S":
         return ["sid1", "sid2"]
     else:
         raise gfapy.AssertionError("Bug found, please report\n" +
                                    "ref: {}\n".format(ref) +
                                    "key_in_ref: {}".format(key_in_ref))
Пример #7
0
 def _push_item_on_se_path(self, path, prev_edge, item):
     if isinstance(item.line, str):
         raise gfapy.RuntimeError(
             "Captured path cannot be computed; a reference has not been resolved\n"
             + "Line: {}\n".format(self) +
             "Unresolved reference: {} (String found)".format(item.line))
     elif isinstance(item.line, gfapy.line.segment.GFA2):
         if not item.line.is_connected():
             raise gfapy.RuntimeError(
                 "Captured path cannot be computed; item is not connected\n"
                 + "Line: {}\n".format(self) + "Item: {}".format(item.line))
         self._push_segment_on_se_path(path, prev_edge, item)
         prev_edge = False
     elif isinstance(item.line, gfapy.line.edge.GFA2):
         if not item.line.is_connected():
             raise gfapy.RuntimeError(
                 "Captured path cannot be computed; item is not connected\n"
                 + "Line: {}\n".format(self) + "Item: {}".format(item.line))
         if not path:
             self._push_first_edge_on_se_path(path, self.items)
         else:
             self._push_nonfirst_edge_on_se_path(path, item)
         prev_edge = True
     elif isinstance(item.line, gfapy.line.group.Ordered):
         if not item.line.is_connected():
             raise gfapy.RuntimeError(
                 "Captured path cannot be computed; item is not connected\n"
                 + "Line: {}\n".format(self) + "Item: {}".format(item.line))
         subpath, prev_edge_subpath = item.line._compute_captured_path()
         if not subpath:
             raise gfapy.AssertionError()
         if item.orient == "+":
             for subpath_item in subpath:
                 path, prev_edge = self._push_item_on_se_path(
                     path, prev_edge, subpath_item)
         else:
             for subpath_item in reversed(subpath):
                 path, prev_edge = self._push_item_on_se_path(
                     path, prev_edge, subpath_item.inverted())
         prev_edge = prev_edge_subpath
     elif isinstance(item.line, gfapy.line.unknown.Unknown):
         raise gfapy.RuntimeError(
             "Captured path cannot be computed; a reference has not been resolved\n"
             + "Line: {}\n".format(self) +
             "Unresolved reference: {} (Virtual unknown line)".format(
                 item.name))
     else:
         raise gfapy.TypeError("Line: {}\t".format(self) +
                               "Cannot compute captured path:\t" +
                               "Error: items of type {} are not supported\t"
                               .format(item.line.__class__.__name__) +
                               "Unsupported item: {}".format(item))
     return path, prev_edge
Пример #8
0
 def _refkey_for_s(self, snum):
     a = [self.sid1.orient, self.sid2.orient]
     if a == ["+", "+"]:
         return "gaps_R" if (snum == 1) else "gaps_L"
     elif a == ["+", "-"]:
         return "gaps_R"
     elif a == ["-", "+"]:
         return "gaps_L"
     elif a == ["-", "-"]:
         return "gaps_L" if (snum == 1) else "gaps_R"
     else:
         raise gfapy.AssertionError("Bug found, please report\n" +
                                    "snum: {}".format(snum))
Пример #9
0
 def _push_segment_on_se_path(self, path, prev_edge, oriented_segment):
     if path:
         if isinstance(path[-1].line, gfapy.line.segment.GFA2):
             if prev_edge:
                 self._check_s_is_as_expected(path, oriented_segment)
                 return  # do not add segment, as it is already there
             else:
                 path.append(
                     self._find_edge_from_path_to_segment(
                         path, oriented_segment))
         elif isinstance(path[-1].line, gfapy.line.edge.GFA2):
             self._check_s_to_e_contiguity(path, oriented_segment)
         else:
             raise gfapy.AssertionError()
     path.append(oriented_segment)
Пример #10
0
 def _initialize_positional_fields(self, strings):
     if strings[0] != self.RECORD_TYPE and self.RECORD_TYPE != "\n":
         raise gfapy.FormatError(
             "Record type of records of " +
             "class {} must be {} ({} found)".format(
                 self.__class__, self.RECORD_TYPE, strings[0]))
     if self.version is None:
         raise gfapy.AssertionError("Bug found, please report\n" +
                                    "strings: {}".format(repr(strings)))
     if (self.vlevel >= 1) and (len(strings) - 1 <
                                self._n_positional_fields):
         raise gfapy.FormatError(
             "{} positional fields expected, ".format(
                 self._n_positional_fields) +
             "{} found\n{}".format(len(strings) - 1, repr(strings)))
     for i, n in enumerate(self.POSFIELDS):
         self._init_field_value(n,
                                self.__class__.DATATYPE[n],
                                strings[i + 1],
                                errmsginfo=strings)
Пример #11
0
 def _unregister_line(self, gfa_line):
     self._api_private_check_gfa_line(gfa_line, "unregister_line")
     rt = gfa_line.record_type
     if rt == "H":
         raise gfapy.AssertionError("Bug found, please report\n" +
                                    "gfa_line: {}".format(gfa_line))
     collection = self._records[rt]
     storage_key = gfa_line.__class__.STORAGE_KEY
     if storage_key == "name":
         name = gfa_line.name
         if gfapy.is_placeholder(name):
             name = id(gfa_line)
         collection.pop(name)
     elif storage_key == "external":
         subkey = gfa_line.external.name
         collection = collection[subkey]
         collection.pop(id(gfa_line))
         if not collection:
             self._records[rt].pop(subkey)
     else:
         collection.pop(id(gfa_line))