Python Alignment.annotations примеры использования

Язык программирования: Python

Пространство имен/Пакет: Bio.Align

Класс/Тип: Alignment

Метод/Функция: annotations

Примеров на hotexamples.com: 4

Python Alignment.annotations - 4 примера найдено. Это лучшие примеры Python кода для Bio.Align.Alignment.annotations, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Alignment(14)

infer_coordinates(9)

column_annotations(4)

annotations(4)

score(3)

misMatches(1)

thickEnd(1)

similarity(1)

repMatches(1)

nCount(1)

matches(1)

matrix(1)

itemRgb(1)

identity(1)

gaps(1)

gap_penalty(1)

extend_penalty(1)

thickStart(1)

Пример #1

Показать файл

 def create_alignment(self, line):
     """Parse one line of FASTA output and return an Alignment object."""
     columns = line.split()
     assert len(columns) == 13
     annotations = {}
     annotations["program"] = self._program
     annotations["database"] = self._database
     if self._query_id is not None:
         assert columns[0] == self._query_id
     query_id = columns[0]
     target_id = columns[1]
     percentage_identity = float(columns[2])
     alignment_length = int(columns[3])
     mismatches = int(columns[4])
     matches = alignment_length - mismatches
     difference = abs(100 * matches / alignment_length -
                      percentage_identity)
     assert difference < 0.015
     gap_opens = int(columns[5])
     query_start = int(columns[6]) - 1
     query_end = int(columns[7])
     target_start = int(columns[8]) - 1
     target_end = int(columns[9])
     annotations["mismatches"] = mismatches
     annotations["evalue"] = float(columns[10])
     annotations["bit_score"] = float(columns[11])
     if self._alignment_representation == "BTOP":
         coordinates = self.parse_btop(columns[12])
     elif self._alignment_representation == "CIGAR":
         coordinates = self.parse_cigar(columns[12])
     coordinates[0, :] += target_start
     query_size = self._query_size
     if query_start < query_end:
         coordinates[1, :] += query_start
     else:
         # mapped to reverse strand
         coordinates[1, :] = coordinates[1, ::-1]
         coordinates[1, :] += query_size - query_start - 1
     query_sequence = Seq(None, length=query_size)
     query = SeqRecord(query_sequence, id=query_id)
     if self._query_description is not None:
         query.description = self._query_description
     target_sequence = Seq(None, length=target_end)
     target = SeqRecord(target_sequence, id=target_id)
     records = [target, query]
     alignment = Alignment(records, coordinates)
     alignment.annotations = annotations
     return alignment

Пример #2

Показать файл

Файл: maf.py Проект: MarkusPiotrowski/biopython

 def create_alignment(
     records,
     aligned_sequences,
     strands,
     annotations,
     column_annotations,
     score,
 ):
     """Create the Alignment object from the collected alignment data."""
     coordinates = Alignment.infer_coordinates(aligned_sequences)
     for record, strand, row in zip(records, strands, coordinates):
         if strand == "-":
             row[:] = row[-1] - row[0] - row
         start = record.seq.defined_ranges[0][0]
         row += start
     alignment = Alignment(records, coordinates)
     if annotations is not None:
         alignment.annotations = annotations
     if column_annotations is not None:
         alignment.column_annotations = column_annotations
     if score is not None:
         alignment.score = score
     return alignment

Пример #3

Показать файл

    def parse(self, stream):
        """Parse the next alignment from the stream."""
        if stream is None:
            raise StopIteration

        identifiers = None
        number_of_sequences = None
        annotations = {}
        for line in stream:
            line = line.rstrip("\r\n")
            if identifiers is None:
                # searching for alignment metadata start
                if not line:
                    continue
                elif line.startswith("#---------------------------------------"):
                    # may appear between alignments
                    continue
                elif line.startswith("#======================================="):
                    # found the alignment metadata start
                    identifiers = []
                    ncols = None
                    sequences = None
                else:
                    raise ValueError("Unexpected line: %s" % line)
            elif sequences is None:
                # parsing the alignment metadata
                if line == "#=======================================":
                    # reached the end of alignment metadata
                    if len(identifiers) == 0:
                        raise ValueError("Number of sequences missing!")
                    if ncols is None:
                        raise ValueError("Length of alignment missing!")
                    sequences = [""] * number_of_sequences
                    aligned_sequences = [""] * number_of_sequences
                    consensus = ""
                    starts = [0] * number_of_sequences
                    column = 0
                    index = 0
                    continue
                if line.strip() == "#":
                    continue
                if not line.startswith("# "):
                    raise ValueError("Unexpected line: %s") % line
                try:
                    key, value = line[2:].split(":", 1)
                except ValueError:
                    # An equal sign is used for Longest_Identity,
                    # Longest_Similarity, Shortest_Identity, and
                    # Shortest_Similarity, which are included if command line
                    # argument -nobrief was used.
                    key, value = line[2:].split(" = ", 1)
                if key == "Aligned_sequences":
                    number_of_sequences = int(value.strip())
                    assert len(identifiers) == 0
                    # Should now expect the record identifiers...
                    for i, line in enumerate(stream):
                        if not line.startswith("# "):
                            raise ValueError("Unexpected line: %s") % line
                        number, identifier = line[2:].split(":")
                        assert i + 1 == int(number)
                        identifiers.append(identifier.strip())
                        if len(identifiers) == number_of_sequences:
                            break
                elif key == "Matrix":
                    annotations["matrix"] = value.strip()
                elif key == "Gap_penalty":
                    annotations["gap_penalty"] = float(value.strip())
                elif key == "Extend_penalty":
                    annotations["extend_penalty"] = float(value.strip())
                elif key == "Length":
                    ncols = int(value.strip())
                elif key == "Identity":
                    annotations["identity"] = int(value.strip().split("/")[0])
                elif key == "Similarity":
                    annotations["similarity"] = int(value.strip().split("/")[0])
                elif key == "Gaps":
                    annotations["gaps"] = int(value.strip().split("/")[0])
                elif key == "Score":
                    annotations["score"] = float(value.strip())
                # TODO:
                # The following are generated if the -nobrief command line
                # argument used. We could simply calculate them from the
                # alignment, but then we have to define what we mean by
                # "similar". For now, simply store them as an annotation.
                elif key == "Longest_Identity":
                    annotations["longest_identity"] = value.strip()
                elif key == "Longest_Similarity":
                    annotations["longest_similarity"] = value.strip()
                elif key == "Shortest_Identity":
                    annotations["shortest_identity"] = value.strip()
                elif key == "Shortest_Similarity":
                    annotations["shortest_similarity"] = value.strip()
                else:
                    raise ValueError("Failed to parse line '%s'" % line)
            else:
                # parse the sequences
                if not line:
                    # empty line
                    if index == number_of_sequences:
                        # reached the end of an alignment block
                        index = 0
                        if column == ncols:
                            # reached the end of the sequences
                            coordinates = Alignment.infer_coordinates(aligned_sequences)
                            records = []
                            n = len(sequences)
                            for i in range(n):
                                start = starts[i]
                                if start == 0:
                                    sequence = Seq(sequences[i])
                                else:
                                    coordinates[i, :] += start
                                    # create a partially defined sequence
                                    length = start + len(sequences[i])
                                    data = {start: sequences[i]}
                                    sequence = Seq(data, length=length)
                                record = SeqRecord(sequence, identifiers[i])
                                records.append(record)
                            alignment = Alignment(records, coordinates)
                            if annotations:
                                alignment.annotations = annotations
                            if consensus:
                                alignment.column_annotations = {
                                    "emboss_consensus": consensus
                                }
                            yield alignment
                            identifiers = None
                            annotations = {}
                    continue
                prefix = line[:21].strip()
                if prefix == "":
                    # match line
                    consensus += line[21:71]
                else:
                    identifier, start = prefix.split(None, 1)
                    assert identifiers[index].startswith(identifier)
                    aligned_sequence, end = line[21:].split(None, 1)
                    start = int(start) - 1  # Python counting
                    end = int(end)
                    length = len(sequences[index])
                    sequence = aligned_sequence.replace("-", "")
                    if length == 0 and len(sequence) > 0:
                        # Record the start
                        starts[index] = start
                    else:
                        if self.align_format == "srspair" and len(sequence) == 0:
                            start += 1
                        assert start == starts[index] + length
                    assert end == start + len(sequence)
                    sequences[index] += sequence
                    aligned_sequences[index] += aligned_sequence
                    if index == 0:
                        column += len(aligned_sequence)
                    else:
                        assert column == len(aligned_sequences[index])
                    index += 1

Пример #4

Показать файл

    def parse(self, stream):
        """Parse the next alignment from the stream."""
        if stream is None:
            raise StopIteration

        for line in stream:
            line = line.strip()
            if not line:
                continue
            elif line == "# STOCKHOLM 1.0":
                # Starting a new alignment
                records = []
                aligned_sequences = []
                references = []
                reference_comments = []
                database_references = []
                nested_domains = []
                gf = defaultdict(list)
                gc = {}
                gs = defaultdict(lambda: {"DR": []})
                gr = defaultdict(dict)
                length = None
            elif line == "//":
                # Reached the end of the alignment.
                skipped_columns = []
                coordinates = Alignment.infer_coordinates(
                    aligned_sequences, skipped_columns
                )
                skipped_columns = set(skipped_columns)
                alignment = Alignment(records, coordinates)
                alignment.annotations = {}
                if references:
                    alignment.annotations["references"] = []
                    for reference in references:
                        reference = dict(reference)
                        reference["title"] = " ".join(reference["title"])
                        reference["author"] = " ".join(reference["author"])
                        reference["location"] = " ".join(reference["location"])
                        alignment.annotations["references"].append(reference)
                if database_references:
                    alignment.annotations["database references"] = database_references
                if nested_domains:
                    alignment.annotations["nested domains"] = nested_domains
                rows, columns = alignment.shape
                AlignmentIterator._store_per_file_annotations(alignment, gf, rows)
                AlignmentIterator._store_per_column_annotations(
                    alignment, gc, columns, skipped_columns
                )
                AlignmentIterator._store_per_sequence_annotations(alignment, gs)
                AlignmentIterator._store_per_sequence_and_per_column_annotations(
                    alignment, gr
                )
                yield alignment
            elif not line.startswith("#"):
                # Sequence
                # Format: "<seqname> <sequence>"
                try:
                    seqname, aligned_sequence = line.split(None, 1)
                except ValueError:
                    # This might be someone attempting to store a zero length sequence?
                    raise ValueError(
                        "Could not split line into sequence name and aligned sequence:\n"
                        + line
                    ) from None
                if length is None:
                    length = len(aligned_sequence)
                elif length != len(aligned_sequence):
                    raise ValueError(
                        f"Aligned sequence {seqname} consists of {len(aligned_sequence)} letters, expected {length} letters)"
                    )
                aligned_sequence = aligned_sequence.replace(".", "-")
                sequence = aligned_sequence.replace("-", "")
                aligned_sequences.append(aligned_sequence)
                seq = Seq(sequence)
                record = SeqRecord(seq, id=seqname)
                records.append(record)
            elif line.startswith("#=GF "):
                # Generic per-File annotation, free text
                # Format: #=GF <feature> <free text>
                feature, text = line[5:].strip().split(None, 1)
                if feature == "RN":
                    assert text.startswith("[")
                    assert text.endswith("]")
                    number = int(text[1:-1])
                    reference = defaultdict(list)
                    reference["number"] = number
                    if reference_comments:
                        reference["comment"] = " ".join(reference_comments)
                        reference_comments = []
                    references.append(reference)
                elif feature == "RM":
                    assert not reference["medline"]
                    reference["medline"] = text
                elif feature == "RT":
                    reference["title"].append(text)
                elif feature == "RA":
                    reference["author"].append(text)
                elif feature == "RL":
                    reference["location"].append(text)
                elif feature == "RC":
                    reference_comments.append(text)
                elif feature == "DR":
                    database_reference = {"reference": text}
                    database_references.append(database_reference)
                elif feature == "DC":
                    assert "comment" not in database_reference
                    database_reference["comment"] = text
                elif feature == "NE":
                    nested_domain = {"accession": text}
                    nested_domains.append(nested_domain)
                elif feature == "NL":
                    assert "location" not in nested_domain
                    nested_domain["location"] = text
                else:
                    # Each feature key could be used more than once,
                    # so store the entries as a list of strings.
                    gf[feature].append(text)
            elif line.startswith("#=GC "):
                # Generic per-Column annotation, exactly 1 char per column
                # Format: "#=GC <feature> <exactly 1 char per column>"
                feature, text = line[5:].strip().split(None, 2)
                if feature not in gc:
                    gc[feature] = ""
                gc[feature] += text.strip()  # append to any previous entry
                # Might be interleaved blocks, so can't check length yet
            elif line.startswith("#=GS "):
                # Generic per-Sequence annotation, free text
                # Format: "#=GS <seqname> <feature> <free text>"
                try:
                    seqname, feature, text = line[5:].strip().split(None, 2)
                except ValueError:
                    # Free text can sometimes be empty, which a one line split throws an error for.
                    # See https://github.com/biopython/biopython/issues/2982 for more details
                    seqname, feature = line[5:].strip().split(None, 1)
                    text = ""
                if feature == "DR":
                    gs[seqname][feature].append(text)
                else:
                    assert feature not in gs[seqname]
                    gs[seqname][feature] = text
            elif line[:5] == "#=GR ":
                # Generic per-Sequence AND per-Column markup
                # Format: "#=GR <seqname> <feature> <exactly 1 char per column>"
                terms = line[5:].split(None, 2)
                assert terms[0] == seqname
                feature = terms[1]
                gr[seqname][feature] = terms[2].strip()