def get_response_content(fs): # precompute some transition matrices P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix( fs.npop, fs.selection_ratio) MatrixUtil.assert_transition_matrix(P_drift_selection) P_mutation = pgmsinglesite.create_mutation_transition_matrix( fs.npop, fs.mutation_ab, fs.mutation_ba) MatrixUtil.assert_transition_matrix(P_mutation) # define the R table headers headers = ['generation', 'number.of.mutants'] # compute the path samples P = np.dot(P_drift_selection, P_mutation) mypath = PathSampler.sample_endpoint_conditioned_path( fs.nmutants_initial, fs.nmutants_final, fs.ngenerations, P) arr = [[i, nmutants] for i, nmutants in enumerate(mypath)] # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_r_tikz_mi_plot_script(nsels, time_stats): """ At each time point plot mutual information for all matrices. @param time_stats: a list of stats for each time point @return: tikz code corresponding to an R plot """ out = StringIO() time_stats_trans = zip(*time_stats) mi_mut = time_stats_trans[1] mi_min_sels = time_stats_trans[6] mi_max_sels = time_stats_trans[2] y_low = min(mi_min_sels + mi_mut) y_high = max(mi_max_sels + mi_mut) ylim = RUtil.mk_call_str("c", y_low, y_high) print >> out, RUtil.mk_call_str( "plot", "my.table$t", "my.table$mut", type='"n"', ylim=ylim, xlab='"time"', ylab='"MI"', main='"MI for mut process and %d mut.sel processes"' % nsels, ) colors = ("red", "blue", "green", "black", "green", "blue") plot_indices = (1, 2, 3, 4, 5, 6) for c, plot_index in zip(colors, plot_indices): header = g_time_stats_headers[plot_index] print >> out, RUtil.mk_call_str("lines", "my.table$t", "my.table$%s" % header, col='"%s"' % c) return out.getvalue()
def get_latex_documentbody(fs): """ This is obsolete because I am now using pure R output. The latex documentbody should have a bunch of tikz pieces in it. Each tikz piece should have been generated from R. """ Q_mut, Q_sels = get_qmut_qsels(fs) # compute the statistics ER_ratios, NSR_ratios, ER_NSR_ratios = get_statistic_ratios(Q_mut, Q_sels) M = zip(*(ER_ratios, NSR_ratios, ER_NSR_ratios)) column_headers = ('ER.ratio', 'NSR.ratio', 'ER.times.NSR.ratio') table_string = RUtil.get_table_string(M, column_headers) nsels = len(Q_sels) # define the R scripts scripts = [] for name in column_headers: scripts.append(get_r_tikz_script(nsels, name)) # get the tikz codes from R, for each histogram retcode, r_out, r_err, tikz_code_list = RUtil.run_plotter_multiple_scripts( table_string, scripts, 'tikz', width=3, height=2) if retcode: raise RUtil.RError(r_err) # # show some timings print 'R did not fail, but here is its stderr:' print r_err # # write the latex code out = StringIO() #print >> out, '\\pagestyle{empty}' for tikz_code in tikz_code_list: print >> out, tikz_code # return the latex code, consisting mainly of a bunch of tikz plots return out.getvalue()
def get_r_tikz_info_plot(nsels, time_stats): """ @param time_stats: a list of stats for each time point @return: tikz code corresponding to an R plot """ out = StringIO() time_stats_trans = zip(*time_stats) y_low = 0 y_high = math.log(2) ylim = RUtil.mk_call_str("c", y_low, y_high) print >> out, RUtil.mk_call_str( "plot", "my.table$t", "my.table$info.mi.diag.approx", type='"n"', ylim=ylim, xlab='"time"', ylab='"info"', main='"informativeness with respect to MI"', ) colors = ("red", "orange", "green", "blue", "black") plot_indices = (17, 18, 19, 20, 21) for c, plot_index in zip(colors, plot_indices): header = g_time_stats_headers[plot_index] print >> out, RUtil.mk_call_str("lines", "my.table$t", "my.table$%s" % header, col='"%s"' % c) return out.getvalue()
def main(args): # get the end positions, # forcing the first end position to be 5 # and the last end position to be 898. incr = (g_nchar - 5) / float(args.nlengths - 1) stop_positions = [5 + int(i * incr) for i in range(args.nlengths)] stop_positions[-1] = g_nchar # run BEAST and create the R stuff table_string, scripts = get_table_string_and_scripts( stop_positions, args.nsamples) # create the comboscript out = StringIO() print >> out, 'library(ggplot2)' print >> out, 'par(mfrow=c(3,1))' for script in scripts: print >> out, script comboscript = out.getvalue() # create the R output image device_name = Form.g_imageformat_to_r_function['pdf'] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, comboscript, device_name) if retcode: raise RUtil.RError(r_err) # write the image data with open(args.outfile, 'wb') as fout: fout.write(image_data)
def get_response_content(fs): f_info = ctmcmi.get_mutual_info_known_distn # define the R table headers headers = ['log.probability.ratio', 'mutual.information'] # make the array arr = [] for x in np.linspace(fs.x_min, fs.x_max, 101): row = [x] proc = evozoo.AlternatingHypercube_d_1(3) X = np.array([x]) distn = proc.get_distn(X) Q = proc.get_rate_matrix(X) info = f_info(Q, distn, fs.t) row.append(info) arr.append(row) # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_r_tikz_corr_plot(nsels, time_stats): """ @param time_stats: a list of stats for each time point @return: tikz code corresponding to an R plot """ out = StringIO() time_stats_trans = zip(*time_stats) y_low = -1 y_high = 1 ylim = RUtil.mk_call_str("c", y_low, y_high) print >> out, RUtil.mk_call_str( "plot", "my.table$t", "my.table$corr.mi.diag.approx", type='"n"', ylim=ylim, xlab='"time"', ylab='"correlation"', main='"correlation with mutual information"', ) colors = ("red", "orange", "green", "blue", "black") plot_indices = (7, 8, 9, 10, 11) for c, plot_index in zip(colors, plot_indices): header = g_time_stats_headers[plot_index] print >> out, RUtil.mk_call_str("lines", "my.table$t", "my.table$%s" % header, col='"%s"' % c) return out.getvalue()
def get_r_tikz_info_plot(nsels, time_stats): """ @param time_stats: a list of stats for each time point @return: tikz code corresponding to an R plot """ out = StringIO() time_stats_trans = zip(*time_stats) y_low = 0 y_high = math.log(2) ylim = RUtil.mk_call_str('c', y_low, y_high) print >> out, RUtil.mk_call_str( 'plot', 'my.table$t', 'my.table$info.mi.diag.approx', type='"n"', ylim=ylim, xlab='"time"', ylab='"info"', main='"informativeness with respect to MI"') colors = ('red', 'orange', 'green', 'blue', 'black') plot_indices = (17, 18, 19, 20, 21) for c, plot_index in zip(colors, plot_indices): header = g_time_stats_headers[plot_index] print >> out, RUtil.mk_call_str( 'lines', 'my.table$t', 'my.table$%s' % header, col='"%s"' % c) return out.getvalue()
def get_response_content(fs): M, R = get_input_matrices(fs) # create the R table string and scripts headers = [ 't', 'mi.true.mut', 'mi.true.mutsel', 'mi.analog.mut', 'mi.analog.mutsel' ] npoints = 100 t_low = 0.0 t_high = 5.0 t_incr = (t_high - t_low) / (npoints - 1) t_values = [t_low + t_incr * i for i in range(npoints)] # get the data for the R table arr = [] for t in t_values: mi_mut = ctmcmi.get_mutual_information(M, t) mi_mutsel = ctmcmi.get_mutual_information(R, t) mi_analog_mut = ctmcmi.get_ll_ratio_wrong(M, t) mi_analog_mutsel = ctmcmi.get_ll_ratio_wrong(R, t) row = [t, mi_mut, mi_mutsel, mi_analog_mut, mi_analog_mutsel] arr.append(row) # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): # validate and store user input if fs.x_max <= fs.x_min: raise ValueError('check the min and max logs') f_info = divtime.get_fisher_info_known_distn_fast # define the R table headers headers = ['log.probability.ratio', 'fisher.information'] # make the array arr = [] for x in np.linspace(fs.x_min, fs.x_max, 101): row = [x] proc = evozoo.DistinguishedCornerPairHypercube_d_1(3) X = np.array([x]) distn = proc.get_distn(X) Q = proc.get_rate_matrix(X) info = f_info(Q, distn, fs.t) row.append(info) arr.append(row) # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): M, R = get_input_matrices(fs) # create the R table string and scripts headers = [ 't', 'mi.true.mut', 'mi.true.mutsel', 'mi.analog.mut', 'mi.analog.mutsel'] npoints = 100 t_low = 0.0 t_high = 5.0 t_incr = (t_high - t_low) / (npoints - 1) t_values = [t_low + t_incr*i for i in range(npoints)] # get the data for the R table arr = [] for t in t_values: mi_mut = ctmcmi.get_mutual_information(M, t) mi_mutsel = ctmcmi.get_mutual_information(R, t) mi_analog_mut = ctmcmi.get_ll_ratio_wrong(M, t) mi_analog_mutsel = ctmcmi.get_ll_ratio_wrong(R, t) row = [t, mi_mut, mi_mutsel, mi_analog_mut, mi_analog_mutsel] arr.append(row) # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_r_tikz_mi_plot_script(nsels, time_stats): """ At each time point plot mutual information for all matrices. @param time_stats: a list of stats for each time point @return: tikz code corresponding to an R plot """ out = StringIO() time_stats_trans = zip(*time_stats) mi_mut = time_stats_trans[1] mi_min_sels = time_stats_trans[6] mi_max_sels = time_stats_trans[2] y_low = min(mi_min_sels + mi_mut) y_high = max(mi_max_sels + mi_mut) ylim = RUtil.mk_call_str('c', y_low, y_high) print >> out, RUtil.mk_call_str( 'plot', 'my.table$t', 'my.table$mut', type='"n"', ylim=ylim, xlab='"time"', ylab='"MI"', main='"MI for mut process and %d mut.sel processes"' % nsels) colors = ('red', 'blue', 'green', 'black', 'green', 'blue') plot_indices = (1, 2, 3, 4, 5, 6) for c, plot_index in zip(colors, plot_indices): header = g_time_stats_headers[plot_index] print >> out, RUtil.mk_call_str( 'lines', 'my.table$t', 'my.table$%s' % header, col='"%s"' % c) return out.getvalue()
def get_r_tikz_corr_plot(nsels, time_stats): """ @param time_stats: a list of stats for each time point @return: tikz code corresponding to an R plot """ out = StringIO() time_stats_trans = zip(*time_stats) y_low = -1 y_high = 1 ylim = RUtil.mk_call_str('c', y_low, y_high) print >> out, RUtil.mk_call_str( 'plot', 'my.table$t', 'my.table$corr.mi.diag.approx', type='"n"', ylim=ylim, xlab='"time"', ylab='"correlation"', main='"correlation with mutual information"') colors = ('red', 'orange', 'green', 'blue', 'black') plot_indices = (7, 8, 9, 10, 11) for c, plot_index in zip(colors, plot_indices): header = g_time_stats_headers[plot_index] print >> out, RUtil.mk_call_str( 'lines', 'my.table$t', 'my.table$%s' % header, col='"%s"' % c) return out.getvalue()
def get_r_tikz_prop_plot(nsels, time_stats): """ @param time_stats: a list of stats for each time point @return: tikz code corresponding to an R plot """ out = StringIO() time_stats_trans = zip(*time_stats) y_low = 0 y_high = 1 ylim = RUtil.mk_call_str('c', y_low, y_high) print >> out, RUtil.mk_call_str( 'plot', 'my.table$t', 'my.table$prop.mi.diag.approx', type='"n"', ylim=ylim, xlab='"time"', ylab='"proportion"', main='"proportion of same sign difference as MI"') colors = ('red', 'orange', 'green', 'blue', 'black') plot_indices = (12, 13, 14, 15, 16) for c, plot_index in zip(colors, plot_indices): header = g_time_stats_headers[plot_index] print >> out, RUtil.mk_call_str( 'lines', 'my.table$t', 'my.table$%s' % header, col='"%s"' % c) return out.getvalue()
def get_r_tikz_prop_plot(nsels, time_stats): """ @param time_stats: a list of stats for each time point @return: tikz code corresponding to an R plot """ out = StringIO() time_stats_trans = zip(*time_stats) y_low = 0 y_high = 1 ylim = RUtil.mk_call_str("c", y_low, y_high) print >> out, RUtil.mk_call_str( "plot", "my.table$t", "my.table$prop.mi.diag.approx", type='"n"', ylim=ylim, xlab='"time"', ylab='"proportion"', main='"proportion of same sign difference as MI"', ) colors = ("red", "orange", "green", "blue", "black") plot_indices = (12, 13, 14, 15, 16) for c, plot_index in zip(colors, plot_indices): header = g_time_stats_headers[plot_index] print >> out, RUtil.mk_call_str("lines", "my.table$t", "my.table$%s" % header, col='"%s"' % c) return out.getvalue()
def get_response_content(fs): # check the r table RUtil.RTable(fs.table.splitlines()) # make the plot device = Form.g_imageformat_to_r_function[fs.imageformat] image_data = RUtil.run_plotter_concise(fs.table, g_script_body, device) return image_data
def main(args): # set up the logger f = logging.getLogger('toplevel.logger') h = logging.StreamHandler() h.setFormatter(logging.Formatter('%(message)s %(asctime)s')) f.addHandler(h) if args.verbose: f.setLevel(logging.DEBUG) else: f.setLevel(logging.WARNING) f.info('(local) permute columns of the alignment') header_seq_pairs = beasttut.get_456_col_permuted_header_seq_pairs() f.info('(local) run BEAST serially locally and build the R stuff') table_string, scripts = get_table_string_and_scripts( g_start_stop_pairs, args.nsamples, header_seq_pairs) f.info('(local) create the composite R script') out = StringIO() print >> out, 'library(ggplot2)' print >> out, 'par(mfrow=c(3,1))' for script in scripts: print >> out, script comboscript = out.getvalue() f.info('(local) run R to create the pdf') device_name = Form.g_imageformat_to_r_function['pdf'] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, comboscript, device_name, keep_intermediate=True) if retcode: raise RUtil.RError(r_err) f.info('(local) write the .pdf file') with open(args.outfile, 'wb') as fout: fout.write(image_data) f.info('(local) return from toplevel')
def get_r_tikz_stub(): user_script = RUtil.g_stub device_name = 'tikz' retcode, r_out, r_err, tikz_code = RUtil.run_plotter_no_table( user_script, device_name) if retcode: raise RUtil.RError(r_err) return tikz_code
def get_response_content(fs): f_info = divtime.get_fisher_info_known_distn_fast requested_triples = [] for triple in g_process_triples: name, desc, zoo_obj = triple if getattr(fs, name): requested_triples.append(triple) if not requested_triples: raise ValueError('nothing to plot') # define the R table headers r_names = [a.replace('_', '.') for a, b, c in requested_triples] headers = ['t'] + r_names # Spend a lot of time doing the optimizations # to construct the points for the R table. arr = [] for t in cbreaker.throttled(progrid.gen_binary(fs.start_time, fs.stop_time), nseconds=5, ncount=200): row = [t] for python_name, desc, zoo_class in requested_triples: zoo_obj = zoo_class(fs.d) df = zoo_obj.get_df() opt_dep = OptDep(zoo_obj, t, f_info) if df: X0 = np.random.randn(df) xopt = scipy.optimize.fmin(opt_dep, X0, maxiter=10000, maxfun=10000) # I would like to use scipy.optimize.minimize # except that this requires a newer version of # scipy than is packaged for ubuntu right now. # fmin_bfgs seems to have problems sometimes # either hanging or maxiter=10K is too big. """ xopt = scipy.optimize.fmin_bfgs(opt_dep, X0, gtol=1e-8, maxiter=10000) """ else: xopt = np.array([]) info_value = -opt_dep(xopt) row.append(info_value) arr.append(row) arr.sort() npoints = len(arr) # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): # precompute some transition matrices P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix( fs.npop, fs.selection_ratio) MatrixUtil.assert_transition_matrix(P_drift_selection) P_mutation = pgmsinglesite.create_mutation_transition_matrix( fs.npop, fs.mutation_ab, fs.mutation_ba) MatrixUtil.assert_transition_matrix(P_mutation) # define the R table headers headers = [ 'generation', 'number.of.mutants', 'probability', 'log.prob', ] # compute the transition matrix P = np.dot(P_drift_selection, P_mutation) # Compute the endpoint conditional probabilities for various states # along the unobserved path. nstates = fs.npop + 1 M = np.zeros((nstates, fs.ngenerations)) M[fs.nmutants_initial, 0] = 1.0 M[fs.nmutants_final, fs.ngenerations-1] = 1.0 for i in range(fs.ngenerations-2): A_exponent = i + 1 B_exponent = fs.ngenerations - 1 - A_exponent A = np.linalg.matrix_power(P, A_exponent) B = np.linalg.matrix_power(P, B_exponent) weights = np.zeros(nstates) for k in range(nstates): weights[k] = A[fs.nmutants_initial, k] * B[k, fs.nmutants_final] weights /= np.sum(weights) for k, p in enumerate(weights): M[k, i+1] = p arr = [] for g in range(fs.ngenerations): for k in range(nstates): p = M[k, g] if p: logp = math.log(p) else: logp = float('-inf') row = [g, k, p, logp] arr.append(row) # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): f_info = divtime.get_fisher_info_known_distn_fast requested_triples = [] for triple in g_process_triples: name, desc, zoo_obj = triple if getattr(fs, name): requested_triples.append(triple) if not requested_triples: raise ValueError('nothing to plot') # define the R table headers r_names = [a.replace('_', '.') for a, b, c in requested_triples] headers = ['t'] + r_names # Spend a lot of time doing the optimizations # to construct the points for the R table. arr = [] for t in cbreaker.throttled( progrid.gen_binary(fs.start_time, fs.stop_time), nseconds=5, ncount=200): row = [t] for python_name, desc, zoo_class in requested_triples: zoo_obj = zoo_class(fs.d) df = zoo_obj.get_df() opt_dep = OptDep(zoo_obj, t, f_info) if df: X0 = np.random.randn(df) xopt = scipy.optimize.fmin( opt_dep, X0, maxiter=10000, maxfun=10000) # I would like to use scipy.optimize.minimize # except that this requires a newer version of # scipy than is packaged for ubuntu right now. # fmin_bfgs seems to have problems sometimes # either hanging or maxiter=10K is too big. """ xopt = scipy.optimize.fmin_bfgs(opt_dep, X0, gtol=1e-8, maxiter=10000) """ else: xopt = np.array([]) info_value = -opt_dep(xopt) row.append(info_value) arr.append(row) arr.sort() npoints = len(arr) # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): # get the r table rtable = RUtil.RTable(fs.table.splitlines()) header_row = rtable.headers data_rows = rtable.data Carbone.validate_headers(header_row) # check requested variable names as column headers if fs.var_a not in header_row: raise ValueError('the first variable name is not column header') if fs.var_b not in header_row: raise ValueError('the second variable name is not column header') return RUtil.run_with_table(fs.table, fs, get_script_content)
def get_response_content(fs): # define some fixed values N_diploid = 10 N_hap = 2 * N_diploid #Nr = fs.Nr plot_density = 2 # define some mutation rates theta_values = [0.001, 0.01, 0.1, 1.0] # define some selection coefficients to plot Ns_low = 0.0 Ns_high = 3.0 Ns_values = np.linspace(Ns_low, Ns_high, 3 * plot_density + 1) # get the values for each h Nr_values = (0, 5) arr_0 = get_plot_array(N_diploid, Nr_values[0], theta_values, Ns_values) arr_1 = get_plot_array(N_diploid, Nr_values[1], theta_values, Ns_values) if fs.scale_to_2N_200: arr_0 = (200 / float(N_hap)) * np.array(arr_0) arr_1 = (200 / float(N_hap)) * np.array(arr_1) ylab = '"generations * theta * (200 / 2N)"' else: ylab = '"generations * theta"' # define x and y plot limits xlim = (Ns_low, Ns_high) ylim = (np.min((arr_0, arr_1)), np.max((arr_0, arr_1))) if fs.ylogscale: ylogstr = '"y"' else: ylogstr = '""' # http://sphaerula.com/legacy/R/multiplePlotFigure.html out = StringIO() print >> out, mk_call_str( 'par', mfrow='c(1,2)', oma='c(0,0,2,0)', ) print >> out, get_plot('left', Nr_values[0], arr_0, theta_values, Ns_values, xlim, ylim, ylogstr, ylab) print >> out, get_plot('right', Nr_values[1], arr_1, theta_values, Ns_values, xlim, ylim, ylogstr, '""') print >> out, mk_call_str( 'title', '"mean hitting time, 2N=%s"' % N_hap, outer='TRUE', ) script = out.getvalue().rstrip() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table( script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def main(args): # check args if gmpy.popcount(args.ntiles) != 1: raise ValueError('the number of tiles should be a power of two') # set up the logger f = logging.getLogger('toplevel.logger') h = logging.StreamHandler() h.setFormatter(logging.Formatter('%(message)s %(asctime)s')) f.addHandler(h) if args.verbose: f.setLevel(logging.DEBUG) else: f.setLevel(logging.WARNING) f.info('(local) read the xml contents') if args.infile is None: xmldata = sys.stdin.read() else: with open(args.infile) as fin: xmldata = fin.read() f.info('(local) modify the log filename and chain length xml contents') xmldata = beast.set_nsamples(xmldata, args.mcmc_id, args.nsamples) xmldata = beast.set_log_filename(xmldata, args.log_id, args.log_filename) xmldata = beast.set_log_logevery(xmldata, args.log_id, args.log_logevery) f.info('(local) define the hierarchically nested intervals') start_stop_pairs = tuple( (a + 1, b) for a, b in beasttiling.gen_hierarchical_slices( args.tile_width, args.offset, args.tile_width * args.ntiles)) f.info('(local) run BEAST serially locally and build the R stuff') table_string, full_table_string, scripts = get_table_strings_and_scripts( xmldata, args.alignment_id, start_stop_pairs, args.nsamples) if args.full_table_out: f.info('(local) create the verbose R table') with open(args.full_table_out, 'w') as fout: fout.write(full_table_string) f.info('(local) create the composite R script') out = StringIO() print >> out, 'library(ggplot2)' print >> out, 'par(mfrow=c(3,1))' for script in scripts: print >> out, script comboscript = out.getvalue() f.info('(local) run R to create the pdf') device_name = Form.g_imageformat_to_r_function['pdf'] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, comboscript, device_name, keep_intermediate=True) if retcode: raise RUtil.RError(r_err) f.info('(local) write the .pdf file') with open(args.outfile, 'wb') as fout: fout.write(image_data) f.info('(local) return from toplevel')
def hard_coded_analysis(): branch_length = 5.0 sequence_length = 1000 nsequences = 1000 estimate_triple_list = [] column_headers = ('most.info', 'less.info', 'least.info') for i in range(nsequences): # sample sequence changes at three levels of informativeness sequence_changes = sample_sequence_changes( branch_length, sequence_length) # get a distance estimate for each level of informativeness estimate_triple = sample_distance(*sequence_changes) estimate_triple_list.append(estimate_triple) print RUtil.get_table_string(estimate_triple_list, column_headers)
def get_latex_documentbody(fs): """ This is obsolete. """ out = StringIO() table_string, scripts = get_table_string_and_scripts(fs) for script in scripts: retcode, r_out, r_err, tikz_code = RUtil.run_plotter( table_string, script, 'tikz', width=5, height=5) if retcode: raise RUtil.RError(r_err) print >> out, tikz_code return out.getvalue()
def get_table_string_and_scripts_from_logs(start_stop_pairs, log_paths, nsamples): """ This is for analysis of remote execution. """ # build the array for the R table data_arr = [] sequence_lengths = [] midpoints = [] for start_stop_pair, log_path in zip(start_stop_pairs, log_paths): start_pos, stop_pos = start_stop_pair sequence_length = stop_pos - start_pos + 1 means, variations, covs = read_log(log_path, nsamples) midpoint = (start_pos + stop_pos) / 2.0 row = [sequence_length, midpoint] for values in means, variations, covs: corr_info = mcmc.Correlation() corr_info.analyze(values) hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values) row.extend([hpd_low, corr_info.mean, hpd_high]) data_arr.append(row) sequence_lengths.append(sequence_length) midpoints.append(midpoint) # build the table string table_string = RUtil.get_table_string(data_arr, g_headers) # get the scripts scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints) # return the table string and scripts return table_string, scripts
def get_table_string_and_scripts_par(start_stop_pairs, nsamples): """ Local command-line multi-process only. """ # define the pool of processes corresponding to the number of cores mypool = Pool(processes=4) # do the multiprocessing start_stop_n_triples = [(a, b, nsamples) for a, b in start_stop_pairs] post_pairs_list = mypool.map(forked_function, start_stop_n_triples) # build the array for the R table data_arr = [] sequence_lengths = [] midpoints = [] for start_stop_pair, post_pairs in zip(start_stop_pairs, post_pairs_list): start_pos, stop_pos = start_stop_pair sequence_length = stop_pos - start_pos + 1 midpoint = (start_pos + stop_pos) / 2.0 row = [sequence_length, midpoint] for corr_info, hpd_interval in post_pairs: hpd_low, hpd_high = hpd_interval row.extend([hpd_low, corr_info.mean, hpd_high]) data_arr.append(row) sequence_lengths.append(sequence_length) midpoints.append(midpoint) # build the table string table_string = RUtil.get_table_string(data_arr, g_headers) # get the scripts scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints) # return the table string and scripts return table_string, scripts
def get_table_string_and_scripts(start_stop_pairs, nsamples): """ Command-line only. """ # build the array for the R table data_arr = [] sequence_lengths = [] midpoints = [] for start_pos, stop_pos in start_stop_pairs: sequence_length = stop_pos - start_pos + 1 means, variations, covs = get_value_lists(start_pos, stop_pos, nsamples) midpoint = (start_pos + stop_pos) / 2.0 row = [sequence_length, midpoint] for values in means, variations, covs: corr_info = mcmc.Correlation() corr_info.analyze(values) hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values) row.extend([hpd_low, corr_info.mean, hpd_high]) data_arr.append(row) sequence_lengths.append(sequence_length) midpoints.append(midpoint) # build the table string table_string = RUtil.get_table_string(data_arr, g_headers) # get the scripts scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints) # return the table string and scripts return table_string, scripts
def get_table_string_and_scripts(stop_positions, nsamples): """ Command-line only. """ start_position = 1 # build the array for the R table data_arr = [] for stop_position in stop_positions: sequence_length = stop_position - start_position + 1 means, variations, covs = get_value_lists(start_position, stop_position, nsamples) row = [sequence_length] for values in means, variations, covs: corr_info = mcmc.Correlation() corr_info.analyze(values) hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values) row.extend([hpd_low, corr_info.mean, hpd_high]) data_arr.append(row) # build the table string table_string = RUtil.get_table_string(data_arr, g_headers) # get the scripts sequence_lengths = [x - start_position + 1 for x in stop_positions] scripts = get_ggplot2_scripts(sequence_lengths) # return the table string and scripts return table_string, scripts
def test_accumulate(): def add(item, sum): return item + (0 if sum is None else sum) list = (5, 10, 15, 20, 25, 30, 2, -1) # 106 sum = RUtil.accumulate(list, add) assert (106 == sum)
def get_table_string_and_scripts(stop_positions, nsamples): """ Command-line only. """ start_position = 1 # build the array for the R table data_arr = [] for stop_position in stop_positions: sequence_length = stop_position - start_position + 1 means, variations, covs = get_value_lists( start_position, stop_position, nsamples) row = [sequence_length] for values in means, variations, covs: corr_info = mcmc.Correlation() corr_info.analyze(values) hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values) row.extend([hpd_low, corr_info.mean, hpd_high]) data_arr.append(row) # build the table string table_string = RUtil.get_table_string(data_arr, g_headers) # get the scripts sequence_lengths = [x - start_position + 1 for x in stop_positions] scripts = get_ggplot2_scripts(sequence_lengths) # return the table string and scripts return table_string, scripts
def get_response_content(fs): # get the table string and scripts table_string, scripts = get_table_string_and_scripts(fs) # create a comboscript out = StringIO() print >> out, 'par(mfrow=c(3,1))' for script in scripts: print >> out, script comboscript = out.getvalue() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, comboscript, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): # define some fixed values N_diploid = 6 N_hap = 2 * N_diploid plot_density = 8 # define some mutation rates theta_values = [0.001, 0.01, 0.1, 1.0] # define some selection coefficients to plot Ns_low = 0.0 Ns_high = 3.0 Ns_values = np.linspace(Ns_low, Ns_high, 3 * plot_density + 1) # get the values for each h Nr_values = (0, 5) arr_0 = get_plot_array(N_diploid, Nr_values[0], theta_values, Ns_values) arr_1 = get_plot_array(N_diploid, Nr_values[1], theta_values, Ns_values) ylab = '"expected returns to AB"' # define x and y plot limits xlim = (Ns_low, Ns_high) ylim = (np.min((arr_0, arr_1)), np.max((arr_0, arr_1))) ylogstr = '""' # http://sphaerula.com/legacy/R/multiplePlotFigure.html out = StringIO() print >> out, mk_call_str("par", mfrow="c(1,2)", oma="c(0,0,2,0)") print >> out, get_plot("left", Nr_values[0], arr_0, theta_values, Ns_values, xlim, ylim, ylogstr, ylab) print >> out, get_plot("right", Nr_values[1], arr_1, theta_values, Ns_values, xlim, ylim, ylogstr, '""') print >> out, mk_call_str("title", '"expected number of returns to AB, 2N=%s"' % N_hap, outer="TRUE") script = out.getvalue().rstrip() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_table_string_and_scripts(start_stop_pairs, nsamples): """ Command-line only. """ # build the array for the R table data_arr = [] sequence_lengths = [] midpoints = [] for start_pos, stop_pos in start_stop_pairs: sequence_length = stop_pos - start_pos + 1 means, variations, covs = get_value_lists( start_pos, stop_pos, nsamples) midpoint = (start_pos + stop_pos) / 2.0 row = [sequence_length, midpoint] for values in means, variations, covs: corr_info = mcmc.Correlation() corr_info.analyze(values) hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values) row.extend([hpd_low, corr_info.mean, hpd_high]) data_arr.append(row) sequence_lengths.append(sequence_length) midpoints.append(midpoint) # build the table string table_string = RUtil.get_table_string(data_arr, g_headers) # get the scripts scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints) # return the table string and scripts return table_string, scripts
def get_response_content(fs): # read the table rtable = RUtil.RTable(fs.table.splitlines()) header_row = rtable.headers data_rows = rtable.data Carbone.validate_headers(header_row) # get the numpy array of conformant points h_to_i = dict((h, i + 1) for i, h in enumerate(header_row)) axis_headers = fs.axes if not axis_headers: raise ValueError('no Euclidean axes were provided') axis_set = set(axis_headers) header_set = set(header_row) bad_axes = axis_set - header_set if bad_axes: raise ValueError('invalid axes: ' + ', '.join(bad_axes)) axis_lists = [] for h in axis_headers: index = h_to_i[h] try: axis_list = Carbone.get_numeric_column(data_rows, index) except Carbone.NumericError: raise ValueError('expected the axis column %s ' 'to be numeric' % h) axis_lists.append(axis_list) points = np.array(zip(*axis_lists)) # find the set of indices of duplicate points dup_indices = get_dup_indices(points, fs.radius) # get the data rows with duplicate indices removed new_rows = [row for i, row in enumerate(data_rows) if i not in dup_indices] # construct the new table out = StringIO() print >> out, '\t'.join(header_row) print >> out, '\n'.join('\t'.join(row) for row in new_rows) return out.getvalue()
def get_response_content(fs): rtable = RUtil.RTable(fs.table.splitlines()) header_row = rtable.headers data_rows = rtable.data points = get_rtable_info(rtable, fs.annotation, fs.axes) # do the clustering cluster_map = agglom.get_initial_cluster_map(points) w_ssd_map = agglom.get_initial_w_ssd_map(points) b_ssd_map = agglom.get_initial_b_ssd_map(points) q = agglom.get_initial_queue(b_ssd_map) while len(cluster_map) > fs.k: pair = agglom.get_pair_fast(cluster_map, q) agglom.merge_fast(cluster_map, w_ssd_map, b_ssd_map, q, pair) # create the map from a point index to a cluster index point_to_cluster = {} for cluster_index, point_indices in cluster_map.items(): for point_index in point_indices: point_to_cluster[point_index] = cluster_index # define the raw labels which may be big numbers raw_labels = [point_to_cluster[i] for i, p in enumerate(points)] # rename the labels with small numbers raw_to_label = dict((b, a) for a, b in enumerate(sorted(set(raw_labels)))) labels = [raw_to_label[raw] for raw in raw_labels] # get the response lines = ['\t'.join(header_row + [fs.annotation])] for i, (label, data_row) in enumerate(zip(labels, data_rows)): row = data_row + [str(label)] lines.append('\t'.join(row)) # return the response return '\n'.join(lines) + '\n'
def get_table_string_and_scripts_from_logs( start_stop_pairs, log_paths, nsamples): """ This is for analysis of remote execution. """ # build the array for the R table data_arr = [] sequence_lengths = [] midpoints = [] for start_stop_pair, log_path in zip( start_stop_pairs, log_paths): start_pos, stop_pos = start_stop_pair sequence_length = stop_pos - start_pos + 1 means, variations, covs = read_log(log_path, nsamples) midpoint = (start_pos + stop_pos) / 2.0 row = [sequence_length, midpoint] for values in means, variations, covs: corr_info = mcmc.Correlation() corr_info.analyze(values) hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values) row.extend([hpd_low, corr_info.mean, hpd_high]) data_arr.append(row) sequence_lengths.append(sequence_length) midpoints.append(midpoint) # build the table string table_string = RUtil.get_table_string(data_arr, g_headers) # get the scripts scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints) # return the table string and scripts return table_string, scripts
def get_r_tikz_stub(): user_script = RUtil.g_stub device_name = "tikz" retcode, r_out, r_err, tikz_code = RUtil.run_plotter_no_table(user_script, device_name) if retcode: raise RUtil.RError(r_err) return tikz_code
def get_response_content(fs): f_info = ctmcmi.get_mutual_info_known_distn requested_triples = [] for triple in g_process_triples: name, desc, zoo_obj = triple if getattr(fs, name): requested_triples.append(triple) if not requested_triples: raise ValueError('nothing to plot') # define the R table headers headers = ['t'] if fs.log4: headers.append('log.4') if fs.log3: headers.append('log.3') r_names = [a.replace('_', '.') for a, b, c in requested_triples] headers.extend(r_names) # Spend a lot of time doing the optimizations # to construct the points for the R table. times = np.linspace(fs.start_time, fs.stop_time, 101) arr = [] for t in times: row = [t] if fs.log4: row.append(math.log(4)) if fs.log3: row.append(math.log(3)) for python_name, desc, zoo_obj in requested_triples: X = np.array([]) info_value = f_info( zoo_obj.get_rate_matrix(X), zoo_obj.get_distn(X), t) row.append(info_value) arr.append(row) # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_table_string_and_scripts(fs): nstates = fs.nresidues**fs.nsites if nstates > 256: raise ValueError('the mutation rate matrix is too big') # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # sample a bunch of mutation-selection rate matrices Q_sels = [] for selection_index in range(fs.nselections): # sample the selection parameters if fs.low_var: v = 0.2 elif fs.medium_var: v = 1 elif fs.high_var: v = 5.0 elif fs.really_high_var: v = 25.0 s = math.sqrt(v) if fs.neg_skew: sels = [-random.expovariate(1 / s) for i in range(nstates)] elif fs.no_skew: sels = [random.gauss(0, s) for i in range(nstates)] elif fs.pos_skew: sels = [random.expovariate(1 / s) for i in range(nstates)] # define the mutation-selection rate matrix using Halpern-Bruno Q = np.zeros_like(Q_mut) for i in range(nstates): for j in range(nstates): if i != j: tau = math.exp(-(sels[j] - sels[i])) coeff = math.log(tau) / (1 - 1 / tau) Q[i, j] = Q_mut[i, j] * coeff for i in range(nstates): Q[i, i] = -np.sum(Q[i]) Q_sels.append(Q) # define the time points incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) times = [fs.t_low + i * incr for i in range(fs.ntimes)] # compute the statistics nsels = len(Q_sels) pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times] mi_sign_lists, time_stats = zip(*pairs) ncrossing_list = [] # look at how the signs change over time for each selection sample for signs in zip(*mi_sign_lists): count = 0 for sign_a, sign_b in iterutils.pairwise(signs): if sign_a != sign_b: count += 1 ncrossing_list.append(count) # get the R scripts scripts = [ get_r_band_script(nsels, time_stats), get_r_prop_script(nsels, time_stats), get_r_cross_script(ncrossing_list) ] table_string = RUtil.get_table_string(time_stats, g_time_stats_headers) return table_string, scripts
def get_response_content(fs): Q_mut, Q_sels = get_qmut_qsels(fs) # compute the statistics ER_ratios, NSR_ratios, ER_NSR_ratios = get_statistic_ratios(Q_mut, Q_sels) M = zip(*(ER_ratios, NSR_ratios, ER_NSR_ratios)) column_headers = ('ER.ratio', 'NSR.ratio', 'ER.times.NSR.ratio') table_string = RUtil.get_table_string(M, column_headers) nsels = len(Q_sels) # get the R script comboscript = get_r_comboscript(nsels, column_headers) # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, comboscript, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_R_tick_cmd(axis, positions): """ @param axis: 1 for x, 2 for y @param positions: a sequence of positions @return: a single line R command to draw the ticks """ s = 'c(' + ', '.join(str(x) for x in positions) + ')' return RUtil.mk_call_str('axis', axis, at=s)
def get_response_content(fs): # check the r table RUtil.RTable(fs.table.splitlines()) # make the plot device = Form.g_imageformat_to_r_function[fs.imageformat] image_data = RUtil.run_plotter_concise( fs.table, g_script_body, device) return image_data
def get_table_string_and_scripts(fs): nstates = fs.nresidues ** fs.nsites if nstates > 256: raise ValueError('the mutation rate matrix is too big') # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # sample a bunch of mutation-selection rate matrices Q_sels = [] for selection_index in range(fs.nselections): # sample the selection parameters if fs.low_var: v = 0.2 elif fs.medium_var: v = 1 elif fs.high_var: v = 5.0 elif fs.really_high_var: v = 25.0 s = math.sqrt(v) if fs.neg_skew: sels = [-random.expovariate(1/s) for i in range(nstates)] elif fs.no_skew: sels = [random.gauss(0, s) for i in range(nstates)] elif fs.pos_skew: sels = [random.expovariate(1/s) for i in range(nstates)] # define the mutation-selection rate matrix using Halpern-Bruno Q = np.zeros_like(Q_mut) for i in range(nstates): for j in range(nstates): if i != j: tau = math.exp(-(sels[j] - sels[i])) coeff = math.log(tau) / (1 - 1/tau) Q[i, j] = Q_mut[i, j] * coeff for i in range(nstates): Q[i, i] = -np.sum(Q[i]) Q_sels.append(Q) # define the time points incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) times = [fs.t_low + i*incr for i in range(fs.ntimes)] # compute the statistics nsels = len(Q_sels) pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times] mi_sign_lists, time_stats = zip(*pairs) ncrossing_list = [] # look at how the signs change over time for each selection sample for signs in zip(*mi_sign_lists): count = 0 for sign_a, sign_b in iterutils.pairwise(signs): if sign_a != sign_b: count += 1 ncrossing_list.append(count) # get the R scripts scripts = [ get_r_band_script(nsels, time_stats), get_r_prop_script(nsels, time_stats), get_r_cross_script(ncrossing_list)] table_string = RUtil.get_table_string(time_stats, g_time_stats_headers) return table_string, scripts
def get_response_content(fs): f_info = ctmcmi.get_mutual_info_known_distn requested_triples = [] for triple in g_process_triples: name, desc, zoo_obj = triple if getattr(fs, name): requested_triples.append(triple) if not requested_triples: raise ValueError('nothing to plot') # define the R table headers headers = ['t'] if fs.log4: headers.append('log.4') if fs.log3: headers.append('log.3') r_names = [a.replace('_', '.') for a, b, c in requested_triples] headers.extend(r_names) # Spend a lot of time doing the optimizations # to construct the points for the R table. times = np.linspace(fs.start_time, fs.stop_time, 101) arr = [] for t in times: row = [t] if fs.log4: row.append(math.log(4)) if fs.log3: row.append(math.log(3)) for python_name, desc, zoo_obj in requested_triples: X = np.array([]) info_value = f_info(zoo_obj.get_rate_matrix(X), zoo_obj.get_distn(X), t) row.append(info_value) arr.append(row) # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_plot_scripts(sequence_lengths): scripts = [] # get the plot for the mean out = StringIO() print >> out, RUtil.mk_call_str( 'plot', 'my.table$sequence.length', 'my.table$mean.mean', xlab="''", ylab="'mean'", xaxt="'n'", main="'posterior statistics of rates among branches'", #type='"n"', ) print >> out, get_R_tick_cmd(1, sequence_lengths) scripts.append(out.getvalue().rstrip()) # get the plot for the mean out = StringIO() print >> out, RUtil.mk_call_str( 'plot', 'my.table$sequence.length', 'my.table$var.mean', xlab="''", ylab="'coeff of variation'", xaxt="'n'", #type='"n"', ) print >> out, get_R_tick_cmd(1, sequence_lengths) scripts.append(out.getvalue().rstrip()) # get the plot for the mean out = StringIO() print >> out, RUtil.mk_call_str( 'plot', 'my.table$sequence.length', 'my.table$cov.mean', xlab="'sequence length'", ylab="'parent-child correlation'", xaxt="'n'", #type='"n"', ) print >> out, get_R_tick_cmd(1, sequence_lengths) scripts.append(out.getvalue().rstrip()) return scripts
def get_table_string_and_scripts(fs): """ The latex documentbody should have a bunch of tikz pieces in it. Each tikz piece should have been generated from R. """ nstates = fs.nresidues ** fs.nsites if nstates > 256: raise ValueError("the mutation rate matrix is too big") # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # sample a bunch of mutation-selection rate matrices Q_sels = [] for selection_index in range(fs.nselections): # sample the selection parameters if fs.low_var: v = 0.2 elif fs.medium_var: v = 1 elif fs.high_var: v = 5.0 elif fs.really_high_var: v = 25.0 s = math.sqrt(v) if fs.neg_skew: sels = [-random.expovariate(1 / s) for i in range(nstates)] elif fs.no_skew: sels = [random.gauss(0, s) for i in range(nstates)] elif fs.pos_skew: sels = [random.expovariate(1 / s) for i in range(nstates)] # define the mutation-selection rate matrix using Halpern-Bruno Q = np.zeros_like(Q_mut) for i in range(nstates): for j in range(nstates): if i != j: tau = math.exp(-(sels[j] - sels[i])) coeff = math.log(tau) / (1 - 1 / tau) Q[i, j] = Q_mut[i, j] * coeff for i in range(nstates): Q[i, i] = -np.sum(Q[i]) Q_sels.append(Q) # define the time points incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) times = [fs.t_low + i * incr for i in range(fs.ntimes)] # compute the statistics nsels = len(Q_sels) time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times] # get the R scripts scripts = [ # get_r_tikz_mi_plot(nsels, time_stats), get_r_tikz_corr_plot(nsels, time_stats), get_r_tikz_prop_plot(nsels, time_stats), get_r_tikz_info_plot(nsels, time_stats), ] table_string = RUtil.get_table_string(time_stats, g_time_stats_headers) return table_string, scripts
def get_response_content(fs): # create the R table string and scripts headers = [ 'entropy', 'analog'] distributions = [] nstates = 4 npoints = 5000 arr = [] best_pair = None for i in range(npoints): weights = [random.expovariate(1) for j in range(nstates)] total = sum(weights) distn = [x / total for x in weights] entropy = -sum(p * math.log(p) for p in distn) sum_squares = sum(p*p for p in distn) sum_cubes = sum(p*p*p for p in distn) analog = math.log(sum_squares / sum_cubes) row = [entropy, analog] arr.append(row) dist = (entropy - 1.0)**2 + (analog - 0.4)**2 if (best_pair is None) or (dist < best_pair[0]): best_pair = (dist, distn) # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script out = StringIO() title = ', '.join(str(x) for x in best_pair[1]) print >> out, RUtil.mk_call_str( 'plot', 'my.table$entropy', 'my.table$analog', pch='20', main='"%s"' % title) script = out.getvalue() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_r_prop_script(nsels, time_stats): """ @param time_stats: a list of stats for each time point @return: R code """ out = StringIO() time_stats_trans = zip(*time_stats) y_low = 0 y_high = 1 ylim = RUtil.mk_call_str('c', y_low, y_high) print >> out, RUtil.mk_call_str( 'plot', 'my.table$t', 'my.table$prop.sel.vs.mut', type='"l"', ylim=ylim, xlab='"time"', ylab='"proportion"', main='"proportion of mut-sel MI greater than mutation MI"') return out.getvalue()
def get_response_content(fs): # legend labels label_a = 'N=%d mu=%f' % (fs.nstates_a, fs.mu_a) label_b = 'N=%d mu=%f' % (fs.nstates_b, fs.mu_b) arr, headers = make_table(fs) # compute the max value ymax = math.log(max(fs.nstates_a, fs.nstates_b)) nfifths = int(math.floor(ymax * 5.0)) + 1 ylim = RUtil.mk_call_str('c', 0, 0.2 * nfifths) # write the R script body out = StringIO() print >> out, RUtil.mk_call_str( 'plot', 'my.table$t', 'my.table$alpha', type='"n"', ylim=ylim, xlab='"time"', ylab='"information"', main='"comparison of an information criterion for two processes"', ) # draw some horizontal lines for i in range(nfifths+1): print >> out, RUtil.mk_call_str( 'abline', h=0.2*i, col='"lightgray"', lty='"dotted"') colors = ('darkblue', 'darkred') for c, header in zip(colors, headers[1:]): print >> out, RUtil.mk_call_str( 'lines', 'my.table$t', 'my.table$%s' % header, col='"%s"' % c, ) legend_names = (label_a, label_b) legend_name_str = 'c(' + ', '.join('"%s"' % s for s in legend_names) + ')' legend_col_str = 'c(' + ', '.join('"%s"' % s for s in colors) + ')' legend_lty_str = 'c(' + ', '.join('1' for s in colors) + ')' print >> out, RUtil.mk_call_str( 'legend', '"%s"' % fs.legend_placement, legend_name_str, col=legend_col_str, lty=legend_lty_str, ) script_body = out.getvalue() # create the R plot image table_string = RUtil.get_table_string(arr, headers) device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script_body, device_name) if retcode: raise RUtil.RError(r_err) return image_data