示例#1
0
def get_non_absent_ref_tes(te_gff, absence_bed, sample, out, log):
    insertions = []
    tmp_gff = out + "/tmp.ref_nonabs.gff"
    command = ["bedtools", "subtract", "-A", "-a", te_gff, "-b", absence_bed]
    mccutils.run_command_stdout(command, tmp_gff, log=log)

    with open(tmp_gff, "r") as gff:
        for line in gff:
            if "#" not in line:
                line = line.replace(";", "\t")
                split_line = line.split("\t")
                insert = output.Insertion(output.Temp())
                insert.chromosome = split_line[0]
                insert.start = int(split_line[3])
                insert.end = int(split_line[4])
                insert.name = split_line[9].split(
                    "=")[1] + "|reference|NA|" + sample + "|temp|nonab|"
                insert.strand = split_line[6]
                insert.type = "reference"

                insertions.append(insert)

    mccutils.remove(tmp_gff)

    return insertions
示例#2
0
def read_insertion_summary(infile, sample):
    insertions = []
    with open(infile, "r") as inf:
        for x, line in enumerate(inf):
            if x > 0:
                insert = output.Insertion(output.Temp())
                split_line = line.split("\t")
                if len(split_line) == 14:
                    insert.chromosome = split_line[0]
                    insert.start = int(split_line[1]) - 1
                    insert.end = int(split_line[2])
                    insert.family = split_line[3]
                    insert.name = insert.family + "|non-reference|" + split_line[
                        7] + "|" + sample + "|temp|"

                    if "antisense" in split_line[4]:
                        insert.strand = "-"
                    else:
                        insert.strand = "+"

                    insert.support_info.support['class'].value = split_line[5]
                    insert.support_info.support['variantsupport'].value = int(
                        float(split_line[6]))
                    insert.support_info.support['frequency'].value = float(
                        split_line[7])
                    insert.support_info.support['junction1'].value = int(
                        split_line[8])
                    insert.support_info.support[
                        'junction1support'].value = int(split_line[9])
                    insert.support_info.support['junction2'].value = int(
                        split_line[10])
                    insert.support_info.support[
                        'junction2support'].value = int(split_line[11])
                    insert.support_info.support[
                        'fiveprimesupport'].value = int(float(split_line[12]))
                    insert.support_info.support[
                        'threeprimesupport'].value = int(
                            float(split_line[13].replace("\n", "")))
                    insert.type = "non-reference"

                    if insert.end >= insert.start and insert.end > 0 and insert.start > -1:

                        # if split read, use junction positions as start and end
                        if insert.support_info.support[
                                'junction1support'].value > 0 and insert.support_info.support[
                                    'junction2support'].value > 0:
                            insert.start = insert.support_info.support[
                                'junction1'].value
                            insert.end = insert.support_info.support[
                                'junction2'].value
                            insert.name = insert.name + "sr|"

                        # read pair
                        else:
                            insert.name = insert.name + "rp|"

                        insertions.append(insert)
                    else:
                        print(
                            "<TEMP POST> Omitting malformed line from insertion summary results:",
                            line)
                else:
                    print(
                        "<TEMP POST> Omitting malformed line from insertion summary results:",
                        line)

    return insertions