def main(): gdata = groups_data.GroupsData.FromGroupsFile( open('../data/thermodynamics/groups_species.csv', 'r')) decomposer = group_decomposition.GroupDecomposer(gdata) pdata = pseudoisomers_data.PseudoisomersData.FromFile( '../data/thermodynamics/dG0.csv') dstats = DecompositionStats(gdata) for pisomer in pdata: if not pisomer.Train(): continue mol = pisomer.Mol() decomposition = None if mol: decomposition = decomposer.Decompose(pisomer.Mol()) dstats.AddDecomposition(pisomer, decomposition) else: logging.warning('Cannot get a Mol for pseudoisomer %s' % pisomer) continue map = dstats.GetGroupMap() rare_groups = [] for i in range(21): groups = map.GetGroupsByNumExamples(i) l = [{'group': g, 'pseudoisomers': ps} for g, ps in groups] rare_groups.append({'count': i, 'groups': l}) image_name = 'images/groups_histo.png' map.PlotHistogram('../res/' + image_name) most_common_groups = map.GetMostCommonGroups() mcg_dicts = [{'count': c, 'group': g} for g, c in most_common_groups] pairwise_map = dstats.GetPairwiseGroupMap() frequent_pairs = pairwise_map.GetMostFrequentCooccurences() freq_pairs_dicts = [{ 'count': c, 'groups': [ga, gb] } for ga, gb, c in frequent_pairs] ld_pairs = pairwise_map.GetLinearlyDependentPairs() ld_pairs_dicts = [{ 'ratio': r, 'pisomers': p, 'count': len(p), 'groups': [ga, gb] } for ga, gb, r, p in ld_pairs] template_data = { 'rare_groups_by_count': rare_groups, 'most_common_groups': mcg_dicts, 'histo_image_name': image_name, 'frequent_pairs': freq_pairs_dicts, 'ld_pairs': ld_pairs_dicts, 'groups_data': gdata, 'group_map': map } templates.render_to_file('analyze_training_groups.html', template_data, '../res/analyze_training_groups.html')
def Main(): options, _ = MakeOpts().parse_args(sys.argv) estimators = thermodynamic_estimators.LoadAllEstimators() input_filename = path.abspath(options.input_filename) if not path.exists(input_filename): logging.fatal('Input filename %s doesn\'t exist' % input_filename) print 'Will read pathway definitions from %s' % input_filename thermo = estimators[options.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name thermo_data = thermodynamic_data.WrapperThermoData(thermo) # Create a bounds instance kegg_instance = kegg.Kegg.getInstance() # Create output directories out_dir = options.output_dir if not path.exists(out_dir): util._mkdir(out_dir) pathgraph_dir = path.join(out_dir, 'pathway_graphs/') util._mkdir(pathgraph_dir) print 'Executing MTDF analysis' pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename) results = [] for pathway_data in pathway_iterator: if pathway_data.skip: print 'Skipping pathway', pathway_data.name continue print 'Analyzing pathway', pathway_data.name model = pathway_data.GetStoichiometricModel(kegg_instance) model_bounds = pathway_data.GetBounds() mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data) result = mtdf_opt.FindMTDF(model_bounds) print 'Optimization status', result.status result.WriteAllGraphs(pathgraph_dir) results.append(result) mtdf = result.opt_val print '\tMTDF for', pathway_data.name, '= %.2g' % mtdf output_filename = path.join(out_dir, 'results.html') print 'Writing output to', output_filename template_data = {'analysis_type': 'MTDF', 'results':results} templates.render_to_file('pathway_optimization_results.html', template_data, output_filename)
def Main(): options, _ = MakeOpts().parse_args(sys.argv) estimators = thermodynamic_estimators.LoadAllEstimators() input_filename = path.abspath(options.input_filename) if not path.exists(input_filename): logging.fatal('Input filename %s doesn\'t exist' % input_filename) print 'Will read pathway definitions from %s' % input_filename thermo = estimators[options.thermodynamics_source] print "Using the thermodynamic estimations of: " + thermo.name thermo_data = thermodynamic_data.WrapperThermoData(thermo) # Create a bounds instance kegg_instance = kegg.Kegg.getInstance() # Create output directories out_dir = options.output_dir if not path.exists(out_dir): util._mkdir(out_dir) pathgraph_dir = path.join(out_dir, 'pathway_graphs/') util._mkdir(pathgraph_dir) print 'Executing MTDF analysis' pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename) results = [] for pathway_data in pathway_iterator: if pathway_data.skip: print 'Skipping pathway', pathway_data.name continue print 'Analyzing pathway', pathway_data.name model = pathway_data.GetStoichiometricModel(kegg_instance) model_bounds = pathway_data.GetBounds() mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data) result = mtdf_opt.FindMTDF(model_bounds) print 'Optimization status', result.status result.WriteAllGraphs(pathgraph_dir) results.append(result) mtdf = result.opt_val print '\tMTDF for', pathway_data.name, '= %.2g' % mtdf output_filename = path.join(out_dir, 'results.html') print 'Writing output to', output_filename template_data = {'analysis_type': 'MTDF', 'results': results} templates.render_to_file('pathway_optimization_results.html', template_data, output_filename)
def main(): gdata = groups_data.GroupsData.FromGroupsFile( open('../data/thermodynamics/groups_species.csv', 'r')) decomposer = group_decomposition.GroupDecomposer(gdata) pdata = pseudoisomers_data.PseudoisomersData.FromFile( '../data/thermodynamics/dG0.csv') dstats = DecompositionStats(gdata) for pisomer in pdata: if not pisomer.Train(): continue mol = pisomer.Mol() decomposition = None if mol: decomposition = decomposer.Decompose(pisomer.Mol()) dstats.AddDecomposition(pisomer, decomposition) else: logging.warning('Cannot get a Mol for pseudoisomer %s' % pisomer) continue map = dstats.GetGroupMap() rare_groups = [] for i in range(21): groups = map.GetGroupsByNumExamples(i) l = [{'group': g, 'pseudoisomers': ps} for g,ps in groups] rare_groups.append({'count': i, 'groups': l}) image_name = 'images/groups_histo.png' map.PlotHistogram('../res/' + image_name) most_common_groups = map.GetMostCommonGroups() mcg_dicts = [{'count': c, 'group': g} for g,c in most_common_groups] pairwise_map = dstats.GetPairwiseGroupMap() frequent_pairs = pairwise_map.GetMostFrequentCooccurences() freq_pairs_dicts = [{'count': c, 'groups': [ga, gb]} for ga, gb, c in frequent_pairs] ld_pairs = pairwise_map.GetLinearlyDependentPairs() ld_pairs_dicts = [{'ratio': r, 'pisomers': p, 'count': len(p), 'groups': [ga, gb]} for ga, gb, r, p in ld_pairs] template_data = {'rare_groups_by_count': rare_groups, 'most_common_groups': mcg_dicts, 'histo_image_name': image_name, 'frequent_pairs': freq_pairs_dicts, 'ld_pairs': ld_pairs_dicts, 'groups_data': gdata, 'group_map': map} templates.render_to_file('analyze_training_groups.html', template_data, '../res/analyze_training_groups.html')
def main(): pdata = pseudoisomers_data.PseudoisomersData.FromFile( '../data/thermodynamics/dG0.csv') decomposer = group_decomposition.GroupDecomposer.FromGroupsFile( open('../data/thermodynamics/groups_species.csv', 'r')) train_pseudoisomers = [] test_pseudoisomers = [] skip_pseudoisomers = [] for pisomer in pdata: mol = pisomer.Mol() decomposition = None if mol: decomposition = decomposer.Decompose(pisomer.Mol()) if decomposition.unassigned_nodes: logging.warning('%s didn\'t decompose', pisomer) pisomer_dict = {'data': pisomer, 'decomposition': decomposition} if pisomer.Train(): train_pseudoisomers.append(pisomer_dict) elif pisomer.Test(): test_pseudoisomers.append(pisomer_dict) elif pisomer.Skip(): skip_pseudoisomers.append(pisomer_dict) else: logging.warning('Found a pseudoisomer that is not marked as' ' test, train, or skip.') template_data = { 'pseudoisomer_collections': [{ 'name': 'Train', 'pseudoisomers': train_pseudoisomers }, { 'name': 'Test', 'pseudoisomers': test_pseudoisomers }, { 'name': 'Skip', 'pseudoisomers': skip_pseudoisomers }] } templates.render_to_file('pseudoisomers_ground_truth.html', template_data, '../res/pseudoisomers_ground_truth.html')
def main(): pdata = pseudoisomers_data.PseudoisomersData.FromFile( '../data/thermodynamics/dG0.csv') decomposer = group_decomposition.GroupDecomposer.FromGroupsFile( open('../data/thermodynamics/groups_species.csv', 'r')) train_pseudoisomers = [] test_pseudoisomers = [] skip_pseudoisomers = [] for pisomer in pdata: mol = pisomer.Mol() decomposition = None if mol: decomposition = decomposer.Decompose(pisomer.Mol()) if decomposition.unassigned_nodes: logging.warning('%s didn\'t decompose', pisomer) pisomer_dict = {'data': pisomer, 'decomposition': decomposition} if pisomer.Train(): train_pseudoisomers.append(pisomer_dict) elif pisomer.Test(): test_pseudoisomers.append(pisomer_dict) elif pisomer.Skip(): skip_pseudoisomers.append(pisomer_dict) else: logging.warning('Found a pseudoisomer that is not marked as' ' test, train, or skip.') template_data = {'pseudoisomer_collections': [{'name': 'Train', 'pseudoisomers': train_pseudoisomers}, {'name': 'Test', 'pseudoisomers': test_pseudoisomers}, {'name': 'Skip', 'pseudoisomers': skip_pseudoisomers} ]} templates.render_to_file('pseudoisomers_ground_truth.html', template_data, '../res/pseudoisomers_ground_truth.html')
if cost is not None: print '\t*Protein Cost for', pathway_data.name, '= %.2g' % cost if optima: print 'Found', len(optima), 'near-optima for', pathway_data.name optima = np.array(optima) mean_opt = np.mean(optima) mean_diff = np.mean(np.abs(optima - mean_opt)) print 'Mean optimum', mean_opt print 'Mean diff from mean', mean_diff print 'Percent diff %s%%' % (100*mean_diff / mean_opt) print 'StdDev opt', np.std(optima) else: # Use default conditions to show the failure res = opt.FindOptimum(model_bounds) result_dict['result'] = res results.append(result_dict) output_filename = path.join(out_dir, 'results.html') print 'Writing output to', output_filename template_data = {'analysis_type': 'Protein Cost', 'kinetic_data': kin_data, 'results':results} templates.render_to_file('protein_optimization_results.html', template_data, output_filename) if __name__ == "__main__": Main()