Python GenePredBasics.GenePredFile примеры использования

Язык программирования: Python

Класс/Тип: GenePredBasics

Метод/Функция: GenePredFile

Примеров на hotexamples.com: 2

Python GenePredBasics.GenePredFile - 2 примера найдено. Это лучшие примеры Python кода для GenePredBasics.GenePredFile, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

line_to_entry(10)

entry_to_line(5)

smooth_gaps(5)

GenePredEntry(4)

GenePredFile(2)

GenePredComparison(1)

GenePredLocusStream(1)

bed_to_genepred(1)

entry_to_fake_psl_line(1)

get_directionless_gpd_conversion(1)

Пример #1

Показать файл

Файл: nudge_psl_junctions_to_reference_output_gpd.py Проект: songjiajia2018/Manual-for-running-IDP-pipeline

def main():
    parser = argparse.ArgumentParser(
        description='Use reference junctions when they are close',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--min_intron_size',
                        type=int,
                        default=68,
                        help="INT min intron size")
    parser.add_argument(
        '--min_local_support',
        type=int,
        default=0,
        help=
        "INT min number of junctions within search_size of a junction in order to count it"
    )
    parser.add_argument('--search_size',
                        type=int,
                        default=10,
                        help="INT search space for reference")
    parser.add_argument(
        '--output_fake_psl',
        help="FASTAFILE reference genome to make a fake PSL output")
    parser.add_argument('psl', help="PSLFILENAME or '-' for STDIN")
    parser.add_argument('reference_genepred',
                        help="FASTAFILENAME for reference genepred")
    args = parser.parse_args()

    cpus = multiprocessing.cpu_count()

    genome = {}
    if args.output_fake_psl:
        genome = read_fasta_into_hash(args.output_fake_psl)

    #read in the reference genepred first
    gpf = GenePredBasics.GenePredFile(args.reference_genepred)
    #lets sort entries by chromosome
    ref = {}
    for e in [x.entry for x in gpf.entries]:
        if len(e['exonStarts']) <= 1: continue
        if e['chrom'] not in ref:
            ref[e['chrom']] = {}
        for i in range(1, len(e['exonStarts'])):
            if e['exonEnds'][i - 1] not in ref[e['chrom']]:
                ref[e['chrom']][e['exonEnds'][i - 1]] = {}
            if e['exonStarts'][i] + 1 not in ref[e['chrom']][e['exonEnds'][i -
                                                                           1]]:
                ref[e['chrom']][e['exonEnds'][i - 1]][e['exonStarts'][i] +
                                                      1] = e['strand']
    #Stored all junctions as 1-base

    read_info = {}
    pf = GenericFileReader(args.psl)
    fcount_total = 0
    while True:
        line = pf.readline()
        if not line: break
        if re.match('^#', line): continue
        line = line.rstrip()
        pe = PSLBasics.line_to_entry(line)
        if len(pe['tStarts']) != len(pe['blockSizes']) or len(
                pe['qStarts']) != len(pe['blockSizes']):
            sys.stderr.write("WARNING invalid psl\n")
            continue
        genepred_line = PSLBasics.convert_entry_to_genepred_line(pe)
        ge = GenePredBasics.smooth_gaps(
            GenePredBasics.line_to_entry(genepred_line), args.min_intron_size)
        refjuns = {}
        if pe['tName'] in ref: refjuns = ref[pe['tName']]
        new_ge = nudge(pe, ge, refjuns, args)
        if args.output_fake_psl:
            new_psl_line = GenePredBasics.entry_to_fake_psl_line(
                new_ge, genome)
            print new_psl_line
        else:
            print GenePredBasics.entry_to_line(new_ge)

Пример #2

Показать файл

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('a', nargs=1, help='FILENAME genepred file A')
    parser.add_argument('b', nargs=1, help='FILENAME genepred file B')
    #parser.add_argument('-p',nargs='?',help='INT the number of threads to run.')
    parser.add_argument('--minexoncount',
                        nargs='?',
                        help='INT the minimum number of exons required.')
    parser.add_argument(
        '--minoverlap_internal',
        nargs='?',
        help=
        'FLOAT the fraction (0-1) of the required reciprocal overlap of an internal exon to call an exon a match.'
    )
    parser.add_argument(
        '--minoverlap_first',
        nargs='?',
        help=
        'FLOAT the fraction (0-1) of the required reciprocal overlap of the first exon to call an exon a match.'
    )
    parser.add_argument(
        '--minoverlap_last',
        nargs='?',
        help=
        'FLOAT the fraction (0-1) of the required reciprocal overlap of the last exon to call an exon a match.'
    )
    parser.add_argument(
        '--minoverlap',
        nargs='?',
        help=
        'FLOAT the fraction (0-1) of the required reciprocal overlap of any exon to call an exon a match.'
    )
    parser.add_argument(
        '--leftouterjoin',
        action='store_true',
        help=
        'Output the entry A regardless of whether a matching entry in B is found'
    )
    parser.add_argument('--output_a_not_in_b',
                        action='store_true',
                        help='Output entries that occur in A but not B')
    parser.add_argument(
        '--best_b_only',
        action='store_true',
        help=
        'Output only one entry of B for each A and try to pick the best based on reciprocal overlap'
    )
    parser.add_argument(
        '--allow_a_subset_of_b_fragments',
        action='store_true',
        help=
        'If A is just a subset of B, then call it as a match.  This means all exons of A found a conecutive match, but B could have more exons on either end.'
    )
    parser.add_argument(
        '--allow_any_fragments',
        action='store_true',
        help='If set, allow any partial match, not just the best')
    args = parser.parse_args()

    #pcount = multiprocessing.cpu_count()
    #if args.p: pcount = int(args.p)
    # go through contingencies of overlap requirements and set them
    overlap = [0, 0, 0]
    if args.minoverlap:
        overlap = [
            float(args.minoverlap),
            float(args.minoverlap),
            float(args.minoverlap)
        ]
    if args.minoverlap_first:
        overlap[0] = float(args.minoverlap_last)
    if args.minoverlap_last:
        overlap[2] = float(args.minoverlap_last)
    if args.minoverlap_internal:
        overlap[1] = float(args.minoverlap_internal)

    # read the genepred files
    gpdA = GenePredBasics.GenePredFile(args.a[0])
    gpdB = GenePredBasics.GenePredFile(args.b[0])

    #if pcount > 1:
    #  p = multiprocessing.Pool(processes=pcount)
    for eA in gpdA.entries:
        #if pcount > 1:
        #  p.apply_async(check_B_entries,[eA,overlap,args])
        #else:
        check_B_entries(eA, gpdB, overlap, args)