def _to_gfa1_a(self): """List of the field content of the line in GFA1. """ at = self._alignment_type if at == "I": raise gfapy.RuntimeError( "Conversion of edge line from GFA2 to GFA1 failed\n" + "Edge represents an internal overlap:\n" + "Edge line: {}\n".format(str(self))) a = [at] if self._is_sid1_from(): ol1 = self.get("sid1") ol2 = self.get("sid2") else: ol1 = self.get("sid2") ol2 = self.get("sid1") a.append(ol1.name) a.append(ol1.orient) a.append(ol2.name) a.append(ol2.orient) if self._alignment_type == "C": a.append(str(self.pos)) try: self.overlap.validate(version="gfa1") except: raise gfapy.RuntimeError( "Conversion of edge line from GFA2 to GFA1 failed\n" + "Overlap is invalid or not compatible with GFA1\n" + "Edge line: {}\n".format(str(self))) a.append(str(self.overlap)) if not gfapy.is_placeholder(self.eid): a.append(gfapy.Field._to_gfa_tag(self.eid, "ID", datatype="Z")) for fn in self.tagnames: a.append(self.field_to_s(fn, tag=True)) return a
def _push_item_on_se_path(self, path, prev_edge, item): if isinstance(item.line, str): raise gfapy.RuntimeError( "Captured path cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (String found)".format(item.line)) elif isinstance(item.line, gfapy.line.segment.GFA2): if not item.line.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed; item is not connected\n" + "Line: {}\n".format(self) + "Item: {}".format(item.line)) self._push_segment_on_se_path(path, prev_edge, item) prev_edge = False elif isinstance(item.line, gfapy.line.edge.GFA2): if not item.line.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed; item is not connected\n" + "Line: {}\n".format(self) + "Item: {}".format(item.line)) if not path: self._push_first_edge_on_se_path(path, self.items) else: self._push_nonfirst_edge_on_se_path(path, item) prev_edge = True elif isinstance(item.line, gfapy.line.group.Ordered): if not item.line.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed; item is not connected\n" + "Line: {}\n".format(self) + "Item: {}".format(item.line)) subpath, prev_edge_subpath = item.line._compute_captured_path() if not subpath: raise gfapy.AssertionError() if item.orient == "+": for subpath_item in subpath: path, prev_edge = self._push_item_on_se_path( path, prev_edge, subpath_item) else: for subpath_item in reversed(subpath): path, prev_edge = self._push_item_on_se_path( path, prev_edge, subpath_item.inverted()) prev_edge = prev_edge_subpath elif isinstance(item.line, gfapy.line.unknown.Unknown): raise gfapy.RuntimeError( "Captured path cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (Virtual unknown line)".format( item.name)) else: raise gfapy.TypeError("Line: {}\t".format(self) + "Cannot compute captured path:\t" + "Error: items of type {} are not supported\t" .format(item.line.__class__.__name__) + "Unsupported item: {}".format(item)) return path, prev_edge
def induced_segments_set(self): if not self.is_connected(): raise gfapy.RuntimeError( "Induced set cannot be computed\n" + "Line is not connected to a GFA instance\n" + "Line: {}".format(self)) segments_set = list() for item in self.items: if isinstance(item, str): raise gfapy.RuntimeError( "Induced set cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (String found)".format(item.line) ) elif isinstance(item, gfapy.line.segment.GFA2): self._check_induced_set_elem_connected(item) segments_set.append(item) elif isinstance(item, gfapy.line.edge.GFA2): self._check_induced_set_elem_connected(item) for sl in [item.sid1.line, item.sid2.line]: self._check_induced_set_elem_connected(sl) segments_set.append(sl) elif isinstance(item, gfapy.line.group.Ordered): self._check_induced_set_elem_connected(item) subset = item.captured_segments assert (subset) for elem in subset: segments_set.append(elem.line) elif isinstance(item, gfapy.line.group.Unordered): self._check_induced_set_elem_connected(item) subset = item.induced_segments_set assert (subset) for elem in subset: segments_set.append(elem) elif isinstance(item, gfapy.line.Unknown): raise gfapy.RuntimeError( "Induced set cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (Virtual unknown line)".format( item.name)) else: raise gfapy.TypeError( "Line: {}\t".format(self) + "Cannot compute induced set:\t" + "Error: items of type {} are not supported\t".format( item.__class__.__name__) + "Unsupported item: {}".format(item)) unique_ids = set() return [e for e in segments_set \ if id(e) not in unique_ids and not unique_ids.add(id(e))]
def induced_edges_set(self): if not self.is_connected(): raise gfapy.RuntimeError( "Induced set cannot be computed\n" + "Line is not connected to a GFA instance\n" + "Line: {}".format(self)) return self._compute_induced_edges_set(self.induced_segments_set)
def connect(self, gfa): """ Connect the line to a GFA instance Parameters ---------- gfa : GFA the GFA instance Returns ------- None """ if self.is_connected(): raise gfapy.RuntimeError( "Line {} is already connected to a GFA instance".format(self)) previous = gfa._search_duplicate(self) if previous: if previous.virtual: return self._substitute_virtual_line(previous) else: return self._process_not_unique(previous) else: self._gfa = gfa self._initialize_references() self._gfa._register_line(self) return None
def line(self, line): if self.__editable: self.__line = line else: raise gfapy.RuntimeError( "gfapy.OrientedLine instance cannot be edited ({})".format( self))
def to_version(self, version, raise_on_failure=True): """ Returns ------- gfapy.Line Conversion to the selected version. """ if version == self._version: return self elif version not in gfapy.VERSIONS: raise gfapy.VersionError("Version unknown ({})".format(version)) else: l = getattr(self, "_to_" + version + "_a")() if l: try: converted = gfapy.Line(l, version=version, vlevel=self.vlevel) except: raise gfapy.RuntimeError( "Conversion to {} failed\n".format(version) + "Line: {}".format(str(self))) return converted elif raise_on_failure: raise gfapy.VersionError( "Records of type {} ".format(self.record_type) + "cannot be converted from version {} ".format( self._version) + "to version {}".format(version)) else: return None
def _to_gfa2_a(self): """ Returns ------- list of str A list of GFA2 field strings. """ try: length = self.try_get_length() except gfapy.NotFoundError: raise gfapy.RuntimeError( "Conversion of GFA1 segment line to GFA2 failed\n" + "GFA2 requires to specify a length\n" + "No length information available in the GFA1 segment:\n" + "Segment line: {}".format(str(self))) a = [ "S", self.field_to_s("name", tag=False), str(self.try_get_length()), self.field_to_s("sequence", tag=False) ] for fn in self.tagnames: if fn != "LN": a.append(self.field_to_s(fn, tag=True)) return a
def captured_path(self): if not self.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed\n" + "Line is not connected to a GFA instance\n" + "Line: {}".format(self)) return self._compute_captured_path()[0]
def orient(self, orient): if self.__editable: self.__orient = orient else: raise gfapy.RuntimeError( "gfapy.OrientedLine instance cannot be edited ({})".format( self))
def set_datatype(self, fieldname, datatype): """ Set the datatype of a tag. If an existing tag datatype is changed, its content may become invalid (call **validate_field** if necessary). Parameters ---------- fieldname : str The field name (it is not required that the field exists already) datatype : gfapy.Field.FIELD_DATATYPE The datatype. Raises ------ gfapy.ArgumentError If **datatype** is not a valid datatype for tags. """ if self._is_predefined_tag(fieldname): if self.get_datatype(fieldname) != datatype: raise gfapy.RuntimeError( "Cannot set the datatype of {} to {}\n".format( fieldname, datatype) + "The datatype of a predefined tag cannot be changed") elif not self._is_valid_custom_tagname(fieldname) and self.vlevel > 0: raise gfapy.FormatError( "{} is not a valid custom tag name".format(fieldname)) if datatype not in gfapy.Field.TAG_DATATYPE: raise gfapy.ArgumentError("Unknown datatype: {}".format(datatype)) self._datatype[fieldname] = datatype
def _set_existing_field(self, fieldname, value, set_reference=False): renaming_connected = False if self._gfa: if not set_reference and \ (fieldname in self.__class__.REFERENCE_FIELDS or \ fieldname in self.__class__.BACKREFERENCE_RELATED_FIELDS): raise gfapy.RuntimeError( "The value of field '{}' cannot be changed, ".format( fieldname) + "as the line belongs to a GFA instance") if (fieldname == self.__class__.STORAGE_KEY) or \ (self.__class__.STORAGE_KEY == "name" and \ fieldname == self.__class__.NAME_FIELD): renaming_connected = True self._gfa._unregister_line(self) if value is None: if fieldname in self._data: self._data.pop(fieldname) else: if self.vlevel >= 3: self._field_or_default_datatype(fieldname, value) gfapy.Field._validate_gfa_field( value, self._field_datatype(fieldname), fieldname) self._data[fieldname] = value if renaming_connected: self._gfa._register_line(self)
def __init__(self, data, vlevel=1, virtual=False, version=None): self.vlevel = vlevel self._virtual = virtual self._datatype = {} self._data = {} self._gfa = None self._version = version self._refs = {} if self.__class__ == gfapy.Line: raise gfapy.AssertionError("Line subclass unknown") if isinstance(data, dict): # API private initialization using dict self._data.update(data) else: # public initialization using list (or tab-separated string) if self.__class__ == gfapy.line.Comment: data = gfapy.Line._init_comment_data(data) elif isinstance(data, str): data = data.split(gfapy.Line.SEPARATOR) if self.version is None: self._compute_version(data[0]) else: self._validate_version() self._initialize_positional_fields(data) self._initialize_tags(data) if self.vlevel >= 1: self._validate_record_type_specific_info() if self.version is None: raise gfapy.RuntimeError( "version could not be determined, " + "record_type={}".format(self.record_type))
def connect(self, gfa): if gfa.header is not self: raise gfapy.RuntimeError( "gfapy.line.Header instances cannot be connected\n" + "Use gfa.add_line(this_line) to add the information\n" + "contained in this header line to the header of a GFA instance." ) else: self._gfa = gfa
def diffscript(self, other, selfvar): outscript = [] for diffitem in self.diff(other): if diffitem[0] == "incompatible": if diffitem[1] == "record_type": raise gfapy.RuntimeError( "Cannot compute conversion script: different record type\n"+ "Line: {}\n".format(self)+ "Other: {}\n".format(other)+ "{0} != {1}",format(diffitem[2], diffitem[3])) elif diffitem[1] == "version": raise gfapy.RuntimeError( "Cannot compute conversion script: different GFA version\n"+ "Line: {}\n".format(self)+ "Other: {}\n".format(other)+ "{0} != {1}",format(diffitem[2], diffitem[3])) elif diffitem[0] == "different": if diffitem[1] == "positional_field": outscript.append("{0}.set('{1}', '{2}')".format(selfvar, diffitem[2].replace("'","\\'"), diffitem[4].replace("'","\\'"))) elif diffitem[1] == "tag": if diffitem[3] != diffitem[5]: outscript.append("{0}.set_datatype('{1}', '{2}')".format(selfvar, diffitem[2].replace("'","\\'"), diffitem[5].replace("'","\\'"))) if diffitem[4] != diffitem[6]: outscript.append("{0}.set('{1}', '{2}')".format(selfvar, diffitem[2].replace("'","\\'"), diffitem[6].replace("'","\\'"))) elif diffitem[0] == "exclusive": if diffitem[1] == ">": if diffitem[2] == "tag": outscript.append("{0}.set_datatype('{1}', '{2}')".format(selfvar, diffitem[3].replace("'","\\'"), diffitem[4].replace("'","\\'"))) outscript.append("{0}.set('{1}', '{2}')".format(selfvar, diffitem[3].replace("'","\\'"), diffitem[5].replace("'","\\'"))) elif diffitem[1] == "<": if diffitem[2] == "tag": outscript.append("{0}.delete('{1}')".format(selfvar, diffitem[3].replace("'","\\'"))) return "\n".join(outscript)
def _api_private_check_gfa_line(self, gfa_line, callermeth): if not isinstance(gfa_line, gfapy.Line): raise gfapy.TypeError("Note: {} is API private, ".format(callermeth)+ "do not call it directly\n"+ "Error: line class is {} and not gfapy.Line") elif not gfa_line._gfa is self: raise gfapy.RuntimeError("Note: {} is API private, ".format(callermeth)+ "do not call it directly\n"+ "Error: line.gfa is not the expected instance of gfapy.Gfa\n"+ repr(gfa_line.gfa)+" != "+repr(self))
def set(self, fieldname, value): """Set the value of a field. The generic Line.set() method is overwritten for comments, in order to disallow tags. """ if fieldname in ["content", "spacer"]: return super().set(fieldname, value) else: raise gfapy.RuntimeError("Tags of comment lines cannot be set")
def _set_existing_field(self, fieldname, value, set_reference=False): if fieldname == "VN" and self.get( "VN") is not None and self.is_connected(): raise gfapy.RuntimeError( "The value of the header tag VN cannot be edited\n" + "For version conversion use to_gfa1 or to_gfa2") else: super()._set_existing_field(fieldname, value, set_reference=set_reference)
def randomly_orient_invertible(self, segment): '''Selects a random orientation for an invertible segment. For the definition of invertible segment, see Gonnella and Kurtz (2016).''' if isinstance(segment, gfapy.Line): segment_name = segment.name else: segment_name = segment if not self._segment_same_links_both_ends(segment_name): raise gfapy.RuntimeError( "Only segments with links to the same or " + "equivalent segments at both ends can be randomly oriented") self._randomly_orient_proven_invertible_segment(segment_name)
def set(self, fieldname, value): """Set the value of a field. If a datatype for a new custom tag is not set, the default for the value assigned to the field will be used (e.g. J for Hashes, i for Integer, etc). Parameters ---------- fieldname : str The name of the field to set. (positional field, predefined tag (uppercase) or custom tag (lowercase)) Raises ------ gfapy.FormatError If **fieldname** is not a valid predefined or custom tag name (and **validate["tags"]**). Returns ------- object **value** """ if fieldname in self._data or self._is_predefined_tag(fieldname): return self._set_existing_field(fieldname, value) elif fieldname in self.__class__.FIELD_ALIAS: return self.set(self.__class__.FIELD_ALIAS[fieldname], value) elif self.virtual: raise gfapy.RuntimeError("Virtual lines do not have tags") elif (self.vlevel == 0) or self._is_valid_custom_tagname(fieldname): self._define_field_methods(fieldname) if self._datatype.get(fieldname, None) is not None: return self._set_existing_field(fieldname, value) elif value is not None: self._datatype[ fieldname] = gfapy.Field._get_default_gfa_tag_datatype( value) self._data[fieldname] = value return self._data[fieldname] else: raise gfapy.FormatError( "{} is not a positional field,".format(fieldname) + "an existing tag, an alias, a predefined tag or a valid custom tag\n" + "positional fields: {}\n".format(", ".join( self.positional_fieldnames)) + "existing tags: {}\n".format(", ".join(self.tagnames)) + "aliases: {}\n".format(", ".join( self.__class__.FIELD_ALIAS.keys())) + "predefined tags: {}\n".format(", ".join( self.__class__.PREDEFINED_TAGS)))
def register_extension(cls, references=[]): # check the definitions if isinstance(cls.POSFIELDS, OrderedDict): for fieldname, datatype in cls.POSFIELDS.items(): cls.DATATYPE[fieldname] = datatype cls.POSFIELDS = list(cls.POSFIELDS.keys()) else: for posfield in cls.POSFIELDS: if posfield not in cls.DATATYPE: raise gfapy.RuntimeError( "Extension {} ".format(str(cls)) + "defines no datatype for the positional field {}". format(posfield)) if hasattr(cls, "TAGS_DATATYPE"): for fieldname, datatype in cls.TAGS_DATATYPE.items(): cls.DATATYPE[fieldname] = datatype if not cls.RECORD_TYPE: raise gfapy.RuntimeError( "Extension {} ".format(str(cls)) + "does not define the RECORD_TYPE constant") if cls.NAME_FIELD is not None: gfapy.lines.finders.Finders.RECORDS_WITH_NAME.append( cls.RECORD_TYPE) for field, klass, refkey in references: if field not in cls.REFERENCE_FIELDS: if not cls.REFERENCE_FIELDS: cls.REFERENCE_FIELDS = [] cls.REFERENCE_FIELDS.append(field) if refkey not in klass.DEPENDENT_LINES: klass.DEPENDENT_LINES.append(refkey) klass._define_reference_getters() if cls.REFERENCE_INITIALIZERS is None: cls.REFERENCE_INITIALIZERS = [] cls.REFERENCE_INITIALIZERS.append((field, klass, refkey)) cls._apply_definitions() gfapy.Line.EXTENSIONS[cls.RECORD_TYPE] = cls gfapy.Line.RECORD_TYPE_VERSIONS["specific"]["gfa2"].append( cls.RECORD_TYPE)
def captured_path(self): if not self.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed\n" + "Line is not connected to a GFA instance\n" + "Line: {}".format(self)) retval = [] for i in range(len(self.segment_names) - 1): retval.append(self.segment_names[i]) retval.append(self.links[i]) retval.append(self.segment_names[-1]) if len(self.segment_names) == len(self.links): retval.append(self.links[-1]) retval.append(self.segment_names[0]) return retval
def _compute_version(self, rt): if rt in Construction.RECORD_TYPE_VERSIONS["generic"]: self._version = "generic" elif rt in Construction.RECORD_TYPE_VERSIONS["different"]: if hasattr(self.__class__, "VERSION"): self._version = self.__class__.VERSION else: raise gfapy.RuntimeError( "GFA version not specified\n" + "Records of type {} ".format(rt) + "have different syntax according to the version") else: for k, v in Construction.RECORD_TYPE_VERSIONS["specific"].items(): if rt in v: self._version = k break if not self._version: self._version = "gfa2"
def disconnect(self): """ Remove the line from the GFA instance it belongs to, if any. The Line instance itself will still exist, but all references from it to other lines are deleted, as well as references to it from other lines. Mandatory references are turned into their non-reference representations (e.g. segments references in the sid fields of E lines or in the from/to lines of L/C lines are changed into symbols). """ if not self.is_connected(): raise gfapy.RuntimeError( "Line {} is not connected to a GFA instance".format(self)) self._remove_field_backreferences() self._remove_field_references() self._disconnect_dependent_lines() self._remove_nonfield_backreferences() self._remove_nonfield_references() self._gfa._unregister_line(self) self._gfa = None
def _check_captured_path_elem_connected(self, item): if not item.is_connected(): raise gfapy.RuntimeError("Cannot compute induced set\n" + "Non-connected element found\n" + "Item: {}\nLine: {}".format(item, self))
def _check_ref_not_self(self, item): if (item.line == self): raise gfapy.RuntimeError( "Line: {}\n".format(self)+ "Item is the line itself\n"+ "A group is not allowed to refer to itself")