def __init__(self, *args, vlevel=1, version=None): if not isinstance(vlevel, int): raise gfapy.ArgumentError( "vlevel is not an integer ({})".format(vlevel)) if vlevel < 0: raise gfapy.ArgumentError( "vlevel is not a positive integer ({})".format(vlevel)) if not version in ['gfa1', 'gfa2', None]: raise gfapy.VersionError( "GFA version unknown ({})".format(version)) self._vlevel = vlevel self._max_int_name = 0 self._records = defaultdict(dict) self._records["H"] = gfapy.line.Header(["H"], vlevel=vlevel) self._records["H"].connect(self) self._records["S"] = {} self._records["P"] = {} self._records["F"] = {} self._records["\n"] = {} self._records["E"] = {} self._records["U"] = {} self._records["G"] = {} self._records["O"] = {} self._records["C"] = {} self._records["L"] = {} self._records["#"] = {} self._segments_first_order = False self._progress = None self._default = {"count_tag": "RC", "unit_length": 1} self._line_queue = [] if version is None: self._version = None self._version_explanation = None self._version_guess = "gfa2" else: self._version = version self._version_explanation = "set during initialization" self._version_guess = version self._validate_version() if len(args) == 1: lst = None if isinstance(args[0], str): lst = args[0].split("\n") elif isinstance(args[0], list): lst = args[0] else: raise gfapy.ArgumentError("Cannot create a Gfa" + " instance from an object of type {}" .format(type(args[0]))) for line in lst: self.add_line(line) self.process_line_queue() if vlevel >= 1: self.validate() elif len(args) > 1: raise gfapy.ArgumentError("Wrong number of arguments for Gfa()" + "({})".format(len(args)))
def set_datatype(self, fieldname, datatype): """ Set the datatype of a tag. If an existing tag datatype is changed, its content may become invalid (call **validate_field** if necessary). Parameters ---------- fieldname : str The field name (it is not required that the field exists already) datatype : gfapy.Field.FIELD_DATATYPE The datatype. Raises ------ gfapy.ArgumentError If **datatype** is not a valid datatype for tags. """ if self._is_predefined_tag(fieldname): if self.get_datatype(fieldname) != datatype: raise gfapy.RuntimeError( "Cannot set the datatype of {} to {}\n".format( fieldname, datatype) + "The datatype of a predefined tag cannot be changed") elif not self._is_valid_custom_tagname(fieldname) and self.vlevel > 0: raise gfapy.FormatError( "{} is not a valid custom tag name".format(fieldname)) if datatype not in gfapy.Field.TAG_DATATYPE: raise gfapy.ArgumentError("Unknown datatype: {}".format(datatype)) self._datatype[fieldname] = datatype
def _check_ref_connection(self, item): if item.line.gfa != self._gfa: raise gfapy.ArgumentError( "Line: {}\n".format(self)+ "Item: {}".format(repr(item))+ "The item added to the group must be connected\n"+ "to the same GFA object as the group")
def other_end(self, segment_end, tolerant=False): """The other segment end involved in the alignment represented by the edge. Note: The result is meaningful only for dovetails overlaps (GFA1 L lines or GFA2 E lines representing dovetail overlaps). Parameters: segment_end (`gfapy.segment_end.SegmentEnd`) : one of the two segment ends involved in the alignment represented by the edge Returns: gfapy.segment_end.SegmentEnd Raises: gfapy.error.ArgumentError: If segment_end is not a valid segment end gfapy.RuntimeError: if the segment_end is not involved in the alignment represented by the line. """ segment_end if (self.from_end == segment_end): return self.to_end elif (self.to_end == segment_end): return self.from_end elif tolerant: return None else: raise gfapy.ArgumentError( "Segment end '{}' not found\n".format(repr(segment_end)) + "(from={};to={})".format(repr(self.from_end), repr( self.to_end)))
def __new__(cls, *args, **kargs): """Create an instance of an alignment field class.""" if args[0] is None or \ gfapy.is_placeholder(args[0]): return gfapy.AlignmentPlaceholder() if len(args) > 1: raise gfapy.ArgumentError("The Alignment() constructor requires "+ "a single positional argument, {} found".format(len(args))) if isinstance(args[0], gfapy.CIGAR) or \ isinstance(args[0], gfapy.Trace): return args[0] if isinstance(args[0], str): return Alignment._from_string(*args, **kargs) elif isinstance(args[0], list): return Alignment._from_list(*args, **kargs) else: raise gfapy.ArgumentError("Cannot create an alignment "+ "from an instance of the class {}".format(type(args[0])))
def __init__(self, verbose_level = 1, channel = sys.stderr, prefix = "#"): self._progress = False if not isinstance(verbose_level, int): raise gfapy.ArgumentError("verbose_level must be an Integer") if not(getattr(channel, "write", None) and callable(channel.write)): raise gfapy.TypeError("channel must provide a 'write' method") self._channel = channel self._pfx = prefix self._verbose_level = verbose_level self._data = {}
def __add_line_unknown_version(self, gfa_line): if isinstance(gfa_line, str): rt = gfa_line[0] elif isinstance(gfa_line, gfapy.Line): rt = gfa_line.record_type else: raise gfapy.ArgumentError(\ "Only strings and gfapy.Line instances can be added") if rt == "#": if isinstance(gfa_line, str): gfa_line = gfapy.Line(gfa_line, dialect=self._dialect) gfa_line.connect(self) elif rt == "H": if isinstance(gfa_line, str): gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel, dialect=self._dialect) self.header._merge(gfa_line) if gfa_line.VN: if gfa_line.VN == "1.0": self._version = "gfa1" elif gfa_line.VN == "2.0": self._version = "gfa2" else: self._version = gfa_line.VN self._version_explanation = "specified in header VN tag" if self._vlevel > 0: self._validate_version() self.process_line_queue() elif rt == "S": if isinstance(gfa_line, str): gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel, dialect=self._dialect) self._version = gfa_line.version self._version_explanation = \ "implied by: syntax of S {} line".format(gfa_line.name) self.process_line_queue() gfa_line.connect(self) elif rt in ["E", "F", "G", "U", "O"]: self._version = "gfa2" self._version_explanation = "implied by: presence of a {} line".format( rt) if isinstance(gfa_line, str): gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel, version=self._version, dialect=self._dialect) self.process_line_queue() gfa_line.connect(self) elif rt in ["L", "C", "P"]: self._version_guess = "gfa1" self._line_queue.append(gfa_line) else: self._line_queue.append(gfa_line)
def _check_ref_class(self, item): if item.__class__ not in [ gfapy.line.edge.GFA2, gfapy.line.segment.GFA2, gfapy.line.gap.Gap, gfapy.line.group.Ordered, self.__class__]: raise gfapy.ArgumentError( "Line: {}\n".format(self)+ "Cannot add items of class {}\n".format(item.__class__.__name__)+ "Only GFA2 edges, segments, gaps, groups[*] "+ "can be added\n(* = unordered groups to unordered groups only).")
def __init__(self, *args): if len(args) == 1: if isinstance(args[0], OrientedLine): return elif isinstance(args[0], str): self.__line = args[0][0:-1] self.__orient = args[0][-1] elif isinstance(args[0], list): self.__line = args[0][0] self.__orient = args[0][1] else: raise gfapy.ArgumentError("Cannot create an OrientedLine" + " instance from an object of type {}" .format(type(args[0]))) elif len(args) == 2: self.__line = args[0] self.__orient = args[1] else: raise gfapy.ArgumentError( "Wrong number of arguments for OrientedLine()") self.__editable = True
def __init__(self, *args): if len(args) == 1: if isinstance(args[0], SegmentEnd): return elif isinstance(args[0], str): self.__segment = args[0][0:-1] self.__end_type = args[0][-1] elif isinstance(args[0], list): if len(args[0]) != 2: raise gfapy.ArgumentError( "Cannot create a SegmentEnd " + " from a list of size {}".format(len(args[0]))) self.__segment = args[0][0] self.__end_type = args[0][1] else: raise gfapy.ArgumentError( "Cannot create an SegmentEnd " + " from an object of type {}".format(type(args[0]))) elif len(args) == 2: self.__segment = args[0] self.__end_type = args[1] else: raise gfapy.ArgumentError( "Wrong number of arguments for SegmentEnd()")
def enable_progress(self, part = 0.1): """Enable output of progress of long running methods. Parameters part (float between 0 and 1) : if part = 0, output at every call of progress_log(); if 0 < part < 1, output once per part of the total progress (e.g. 0.001 = log every 0.1% progress); if part = 1, output only total elapsed time at the end of the computation. """ if part < 0 or part > 1: raise gfapy.ArgumentError("part must be in range [0..1]") self._progress = True self._part = part if self._verbose_level > 0: self._channel.write("{} Progress logging enabled\n".format(self._pfx))
def _select_distribute_end(self, links_distribution_policy, segment_name, factor): if links_distribution_policy not in self.LINKS_DISTRIBUTION_POLICY: raise gfapy.ArgumentError("Unknown links distribution policy {}\n".format(\ links_distribution_policy)+"accepted values are: {}".format(\ ", ".join(self.LINKS_DISTRIBUTION_POLICY))) if links_distribution_policy == "off": return None if links_distribution_policy in ["L", "R"]: return links_distribution_policy else: s = self.segment(segment_name) esize = len(s.dovetails_of_end("R")) bsize = len(s.dovetails_of_end("L")) return self._auto_select_distribute_end( factor, bsize, esize, links_distribution_policy == "equal")
def _connectivity(self): """ Computes the connectivity of a segment from its number of dovetail overlaps. Returns ------- (conn_symbol,conn_symbol) list conn. symbols respectively of the :L and :R ends of +segment+. <b>Connectivity symbol:</b> (+conn_symbol+) - Let _n_ be the number of links to an end (+:L+ or +:R+) of a segment. Then the connectivity symbol is +:M+ if <i>n > 1</i>, otherwise _n_. """ if not self.is_connected(): raise gfapy.ArgumentError( "Cannot compute the connectivity of {}\n".format(self) + "Segment is not connected to a GFA instance") return self._connectivity_symbols(len(self.dovetails_L), len(self.dovetails_R))
def multiply(self, segment, factor, copy_names=None, conserve_components=True, distribute=None, track_origin=False, origin_tag="or", extended=False): """Multiply a segment by a given factor. The multiplication operation is implemented as described in Gonnella and Kurtz (2016). Parameters: segment (Line, str): the segment to multiply factor (int): the multiplication factor; if 0, the segment is deleted; if 1, nothing is done; if > 1, the multiplication is performed copy_names (list, None): an optional list of strings, the names of the copies which will result from the multiplication; the length of this list must be equal to factor - 1; if no list is specified, the names are computed automatically, adding (or incrementing) an integer as suffix to the segment name, until enough non-previously used names are found conserve_components (bool): if True, the removal of segments in the case where factor == 0 is only done if it does not split an existing connected component (thereby only dovetail overlaps are considered) extended : if True, then dovetail distribution and track origin are turned on by default distribute (str, None) : select an end for which the dovetail overlaps are distributed (see Gonnella and Kurtz, 2016); if ``auto`` (the default if extended is set), an end is selected automatically, trying to maximize the number of links which can be deleted; if ``off`` (the default if extended is not set), no distribution is performed; if ``L`` or ``R``, links of the specified end are distributed; if ``equal``, an end is selected (if any), for which the number of links is equal to the factor (if none, links are not distributed; if both, then ``R`` is used) track_origin (bool): if True, the name of the original segment (or the content of its own origin tag, if any) is stored in a tag in the copies (default: False) origin_tag (str): the tag where to store the origin information, if track_origin is set (default: ``or``) """ if extended: if distribute == None: distribute = "auto" track_origin = True if factor < 0: raise gfapy.ArgumentError("Mulitiplication factor must be >= 0" + " ({} found)".format(factor)) elif factor == 0: if conserve_components and factor == 1 and self.is_cut_segment( segment): return self else: self.rm(segment) return self elif factor == 1: return self else: s, sn = self._segment_and_segment_name(segment) if track_origin and not s.get(origin_tag): s.set(origin_tag, sn) self.__divide_segment_and_connection_counts(s, factor) if copy_names is None: copy_names = self._compute_copy_names(sn, factor) for cn in copy_names: self.__clone_segment_and_connections(s, cn) if distribute: self._distribute_links(distribute, sn, copy_names, factor) return self