Exemplo n.º 1
0
def get_mutation_rep_time(mutation_file, sample_name):
    """ Returns list with replication timings of mutated nucleotides
    with given sample name, mutation motif and final nucleotide"""
    sys.stdout.write("\nConsidering {0} sample: ".format(sample_name))
    mutation_rep_time = []
    # We consider chromosomes separately to avoid memory overflow:
    # All genome in str format ~ 3.1 GB - too much for RAM in my PC
    genome_file_names = core.get_genome_file_names()
    for chromosome in genome_file_names:
        sys.stdout.write(chromosome + ', ')
        sys.stdout.flush()
        mutations_list = core.read_mutations(mutation_file,
                                           mutation_type='subs',
                                           chromosome=chromosome,
                                           sample_names=[sample_name],
                                           final_nucleotides=core.FINAL_NUCL)
        with open(genome_file_names[chromosome]) as genome_file:
            genom = genome_file.read()
        for index, mutation in mutations_list.iterrows():
            # FIXME: Considering mutations_list 20 times - unefficient
            position = mutation['positionFrom']
            motif = genome[position - 2: position + 1]
            if motif in core.MOTIFS:
                rep_time = core.calculate_replication_timing(chromosome,
                                                           position)
                if rep_time == -1:
                    print '\nuncalculatable replication time at\
                    {0}:{1}'.format(chromosome, position)
                mutation_rep_time.append(rep_time)
        del genome
    return mutation_rep_time
Exemplo n.º 2
0
def get_motif_rep_time(chromosome):
    """ Returns list of replication timings of positions in genome
    with particular motif and given chromosome"""
    motif_rep_time = []
    genome_file_names = core.get_genome_file_names()
    with open(genome_file_names[chromosome], 'r') as f:
        genome = f.read()
    for motif in core.MOTIFS:
        # First occurence of beginning of motif
        first_occurence = genome.find(motif, 0)
        while first_occurence >= 0:
            # One +1 because str.find finds start of motif, but we want center
            # Second +1 because str begins with 0th element
            replication_timing = core.calculate_replication_timing(chromosome,
                                                           first_occurence + 2)
            if replication_timing == -1:
                print '\nuncalculatable replication time at {0}:{1}'\
                    .format(chromosome, first_occurence + 2)
            motif_rep_time.append(replication_timing)
            first_occurence = genome.find(motif, first_occurence + 1)
    del genome
    return motif_rep_time