def draw_all_plots(dataDir, outputDir): fileNames = core.get_only_files(dataDir) for fileName in fileNames: y = [] with open(fileName) as readFile: for line in readFile: y.append(float(line)) del y[0] x = [10 * i for i in range(9)] plt.bar(x, y, width=10) # plt.show() plt.savefig(os.path.join(outputDir, os.path.basename(fileName))) return
#!/usr/bin/python """ Splits replication timings of APOBEC-motif in bins with equal number of points in each bin""" import numpy as np from scipy import stats import sys import core if __name__ == '__main__': if len(sys.argv) != 3: sys.exit("Usage: {0} motifRepTimeDir outFile".format(sys.argv[0])) replication_timings = np.array([]) file_names = core.get_only_files(sys.argv[1]) for name in file_names: with open(name) as fin: l = np.array(map(float, fin)) l = l[l != -1] replication_timings = np.concatenate((replication_timings, l), axis=1) print "{0}:\n".format(name) temp = l[l < 0] print(len(temp)) sys.exit() borders = np.linspace(0, 1, num=core.BIN_QUANTITY + 1, endpoint=True) bin_borders = stats.mstats.mquantiles(replication_timings, borders) with open(sys.argv[2], 'w') as fout: fout.write('bin_start\tbin_end\motifs\n') for i in range(len(bin_borders) - 1): print "Bin #{0}".format(i) point_number = sum(map(lambda x: bin_borders[i] < x and
) frequency = 1.0 * attempts_in_bin["mutations"] / attempts_in_bin["motifs"] attempts_in_bin["frequency"] = pandas.Series(frequency, index=attempts_in_bin.index) return attempts_in_bin def estimate_conditional_probability(bin_borders, event_positions): """ P{event occurred in bin[i] | event occurred} dataDir - directory with data, should be splitted and normalized """ events_in_bin = core.split_to_bins(event_positions, bin_borders) conditional_probability = [0] * len(events_in_bin) number_of_events = sum(events_in_bin) for i in range(len(events_in_bin)): conditional_probability[i] = events_in_bin[i] / number_of_events return conditional_probability if __name__ == "__main__": if len(sys.argv) != 4: sys.exit("Usage: {0} motifRepTimeBins mutationRepTimeDir " "outDir".format(sys.argv[0])) OUT_DIR = sys.argv[3] motifs_in_bin = pandas.read_csv(sys.argv[1], sep="\t") mutation_rep_time_files = core.get_only_files(sys.argv[2]) for mutation_file in mutation_rep_time_files: with open(mutation_file) as f: replication_timings = map(float, f) frequency = calculate_frequency(motifs_in_bin, replication_timings) outFile = os.path.join(OUT_DIR, os.path.basename(mutation_file)) frequency.to_csv(outFile, sep="\t")