Пример #1
0
def test_coordinate_dict_indexing():
    """run the function coordinate_dict
    index_gene_scaffold_coordinates
    to check the gene indexing is working"""
    gene_to_next_gene, gene_to_previous_gene, \
    coordinate_dict, gene_list = index_gene_scaffold_coordinates(INPUT)
    assert_equal(coordinate_dict, coordinate_dict_test)
Пример #2
0
def test_previous3_gene_is_as_expected_scaf_start():
    """run the function gene_to_next_
    gene index_gene_scaffold_coordinates
    to check the gene indexing is working"""
    gene_to_next_gene, gene_to_previous_gene, \
    coordinate_dict, gene_list = index_gene_scaffold_coordinates(INPUT)
    result = gene_to_previous_gene["GPLIN_000000100"]
    assert_equal(result, "NA")
Пример #3
0
def test_next_is_as_expected1():
    """run the function gene_to_next_
    gene index_gene_scaffold_coordinates
    to check the gene indexing is working"""
    gene_to_next_gene, gene_to_previous_gene, \
    coordinate_dict, gene_list = index_gene_scaffold_coordinates(INPUT)
    result = gene_to_next_gene["GPLIN_000000500"]
    # Alhough this is a differenct scaff's gene. Is it true ..
    assert_equal(result, "GPLIN_000020100")
Пример #4
0
            print("file not found: %s" % user_file)
            os._exit(0)
    logger.info(sys.version_info)
    logger.info("Command-line: %s", ' '.join(sys.argv))
    logger.info("Starting testing: %s", time.asctime())
    logger.info("converting gff file to coordinates file")
    get_starts_stops(gff_file, "temp_starts_stops.txt")
    coordinate_file = os.path.join("temp_starts_stops.txt")
    # index the genome with biopython
    logger.info("indexing genome")
    Genome_sequence = SeqIO.index(genome_sequence, "fasta")
    Genome_sequence_time = time.time()
    out = 'import genome file took, %.3f' % (Genome_sequence_time - start_time)
    # populate the dictionaries
    gene_to_next_gene, gene_to_previous_gene, coordinate_dict, \
            gene_list = index_gene_scaffold_coordinates(coordinate_file)

    logger.info(out)
    # open the gff outfile
    name_gff = options.out_file.split(".")[0] + "_upstream_" + str(
        upstream) + "bp.gff"
    gff_outfile = open(name_gff, "w")
    gff_outfile.write("#GFF file from intergenic regions.\n")

    for gene in gene_list:
        # now we need to extract the genic regions, not going into other genes
        final_start, final_stop, \
          direction = get_coordinate_of_interest(gene,
                                                 gene_to_next_gene,
                                                 gene_to_previous_gene,
                                                 coordinate_dict)