Exemplo n.º 1
0
                    metavar="fasta_file",
                    type=argparse.FileType('r'),
                    help="FASTA file of the genome.")
parser.add_argument('scaffold_gff3',
                    metavar="gff3_file",
                    type=argparse.FileType('r'),
                    help="corresponding gff3 file of the genome.")

args = parser.parse_args()

# read genome details into memory
genome_fasta = parse_fasta.get_all_sequences(args.genome_fasta, 'fasta')
scaffold_gff3 = parse_gff3.parse_gff3(args.scaffold_gff3, 'exon')

# pick longest transcript in the gff3 file
scaffold_gff3 = parse_gff3.pick_longest_mRNA(scaffold_gff3)

# read the positions from the cov file
for scaf in scaffold_gff3:
    for gene in scaffold_gff3[scaf]:
        gene_coords = scaffold_gff3[scaf][gene].coords
        gene_on_crick = gene_coords[0] > gene_coords[1]

        tx = list(scaffold_gff3[scaf][gene].mRNAs.keys())[0]
        mrna_coords = scaffold_gff3[scaf][gene].mRNAs[tx].details['exon']

        exon_seq = ''
        for i in mrna_coords:
            temp = genome_fasta[scaf][min(i):max(i)]
            if gene_on_crick: temp = reverse_complement(temp)
Exemplo n.º 2
0
def generate_relative_locations(n):
    '''
    Evenly split the range {0..1} depending on n (number of divisions), and 
    returns the midpoint of the sub-ranges.
    
    i.e. if n == 5, return [0.1, 0.3, 0.5, 0.7, 0.9]
    '''
    return [(x + 0.5) / n for x in range(n)]


# read sequences
genome_sequences = parse_fasta.get_all_sequences(args.genome_fasta, 'fasta')

# read coordinates of genes and exons from .gff3 file
scaffold_gff3 = parse_gff3.pick_longest_mRNA(
    parse_gff3.parse_gff3(args.genome_gff3, 'exon'))

# create dictionary to map genes to their respective scaffold (this is needed
# to obtain gene coords based solely on gene names)
gene_to_scaffold = {}
for s in scaffold_gff3:
    for g in scaffold_gff3[s]:
        gene_to_scaffold[g] = s

# print header row for results
print('Gene',
      'Intron relative location',
      'Scaffold',
      'Desired region',
      'Outer region',
      'Outer amplicon length',