Пример #1
0
def get_blocks(
    seqs: Dict[str, SeqRecord], genes: GFF, hints: Optional[GFF], pad: int,
    merge: bool, target_ids: Optional[Set[str]]
) -> Iterator[Tuple[str, SeqRecord, GFF, Optional[GFF]]]:

    hints_by_seqid = fmap(groupby_seqid, hints)

    for seqid, gene_features in groupby_seqid(
            genes.select_type("gene")).items():
        if seqid not in seqs:
            continue

        gene_itree = IntervalTree(gffrecords_to_intervals(gene_features))
        try:
            hint_itree = fmap(
                IntervalTree,
                fmap(gffrecords_to_intervals,
                     fmap(lambda h: h.get(seqid, []), hints_by_seqid)))
        except ValueError as e:
            if hints_by_seqid is not None:
                print(seqid, hints_by_seqid.get(seqid, []))
            raise e

        if target_ids is None:
            target_features: List[GFFRecord] = gene_features
        else:
            target_features = subset_features_by_id(gene_features, target_ids)

        if merge:
            block_iter = merge_overlapping(target_features, pad)
        else:
            block_iter = pad_features(target_features, pad)

        seen: Set[Tuple[int, int]] = set()
        for start, end in block_iter:
            if (start, end) in seen:
                continue
            else:
                seen.add((start, end))

            yield make_new_subsequence(
                seqid,
                start,
                end,
                gene_itree,
                hint_itree,
                gene_features,
                fmap(lambda h: h.get(seqid, []), hints_by_seqid),
                seqs[seqid],
            )

    return
Пример #2
0
def split_gffs(gff: GFF, group_level: str) -> Tuple[GFF, GFF]:
    keep: Set[GFF3Record] = set()
    drop: Set[GFF3Record] = set()

    for mrna in gff.select_type(group_level):
        if mrna.attributes is None:
            should_keep = True
        elif "should_exclude" in mrna.attributes.custom:
            should_keep = False
        else:
            should_keep = True

        if should_keep:
            keep.update(gff.traverse_children([mrna]))
            keep.update(gff.traverse_parents([mrna]))
        else:
            drop.update(gff.traverse_children([mrna]))
            drop.update(gff.traverse_parents([mrna]))

    kept = prune_gff(keep)
    dropped = prune_gff(drop)
    return kept, dropped
Пример #3
0
def main():
    args = cli(sys.argv[0], sys.argv[1:])

    rows = list()
    for line in args.infile:
        if line.startswith("#"):
            continue
        record = GFFRecord.parse(line)
        record.attributes.id = record.attributes.name
        record.attributes.name = None
        record.attributes.custom = {}
        rows.append(record)

    gff = GFF(rows)
    gff.infer_missing_parents()

    counter = 1

    for mrna in gff.select_type("mRNA"):
        if len(mrna.children) < 2:
            continue

        region = deepcopy(mrna)
        region.type = "repeat_region"
        region.attributes.id = f"repeat_region{counter}"
        region.attributes.ontology_term = ["SO:0000657"],

        mrna.type = "helitron"
        mrna.parents = [region]
        mrna.attributes.parent = [region.attributes.id]
        mrna.attributes.id = f"helitron{counter}"
        mrna.attributes.ontology_term = ["SO:0000544", "SO:helitron"]
        mrna.attributes.custom = {}

        flank3 = [c for c in mrna.children
                  if c.attributes.id.endswith(".3")][0]
        flank5 = [c for c in mrna.children
                  if c.attributes.id.endswith(".5.1")][0]

        flank3.type = "three_prime_flanking_region"
        flank3.attributes.ontology_term = [
            "SO:0001417",
            "SO:three_prime_flanking_region",
            "SO:0000364",
            "SO:transposable_element_flanking_region"
        ]
        flank3.attributes.id = None
        flank3.attributes.parent = [mrna.attributes.id]

        flank5.type = "five_prime_flanking_region"
        flank5.attributes.ontology_term = [
            "SO:0001416",
            "SO:five_prime_flanking_region",
            "SO:0000364",
            "SO:transposable_element_flanking_region"
        ]
        flank5.attributes.id = None
        flank5.attributes.parent = [mrna.attributes.id]

        mrna.source = flank5.source

        print(region, file=args.outfile)
        print(mrna, file=args.outfile)
        if mrna.strand == Strand.MINUS:
            print(flank3, file=args.outfile)
            print(flank5, file=args.outfile)
        else:
            print(flank5, file=args.outfile)
            print(flank3, file=args.outfile)

        counter += 1

    return