Exemplo n.º 1
0
def main():
    gdata = groups_data.GroupsData.FromGroupsFile(
        open('../data/thermodynamics/groups_species.csv', 'r'))
    decomposer = group_decomposition.GroupDecomposer(gdata)
    pdata = pseudoisomers_data.PseudoisomersData.FromFile(
        '../data/thermodynamics/dG0.csv')

    dstats = DecompositionStats(gdata)

    for pisomer in pdata:
        if not pisomer.Train():
            continue

        mol = pisomer.Mol()
        decomposition = None
        if mol:
            decomposition = decomposer.Decompose(pisomer.Mol())
            dstats.AddDecomposition(pisomer, decomposition)
        else:
            logging.warning('Cannot get a Mol for pseudoisomer %s' % pisomer)
            continue

    map = dstats.GetGroupMap()
    rare_groups = []
    for i in range(21):
        groups = map.GetGroupsByNumExamples(i)
        l = [{'group': g, 'pseudoisomers': ps} for g, ps in groups]
        rare_groups.append({'count': i, 'groups': l})

    image_name = 'images/groups_histo.png'
    map.PlotHistogram('../res/' + image_name)

    most_common_groups = map.GetMostCommonGroups()
    mcg_dicts = [{'count': c, 'group': g} for g, c in most_common_groups]

    pairwise_map = dstats.GetPairwiseGroupMap()
    frequent_pairs = pairwise_map.GetMostFrequentCooccurences()
    freq_pairs_dicts = [{
        'count': c,
        'groups': [ga, gb]
    } for ga, gb, c in frequent_pairs]
    ld_pairs = pairwise_map.GetLinearlyDependentPairs()
    ld_pairs_dicts = [{
        'ratio': r,
        'pisomers': p,
        'count': len(p),
        'groups': [ga, gb]
    } for ga, gb, r, p in ld_pairs]

    template_data = {
        'rare_groups_by_count': rare_groups,
        'most_common_groups': mcg_dicts,
        'histo_image_name': image_name,
        'frequent_pairs': freq_pairs_dicts,
        'ld_pairs': ld_pairs_dicts,
        'groups_data': gdata,
        'group_map': map
    }
    templates.render_to_file('analyze_training_groups.html', template_data,
                             '../res/analyze_training_groups.html')
Exemplo n.º 2
0
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    estimators = thermodynamic_estimators.LoadAllEstimators()
    
    input_filename = path.abspath(options.input_filename)
    if not path.exists(input_filename):
        logging.fatal('Input filename %s doesn\'t exist' % input_filename)
        
    print 'Will read pathway definitions from %s' % input_filename
    
    thermo = estimators[options.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name
    thermo_data = thermodynamic_data.WrapperThermoData(thermo)
    
    # Create a bounds instance
    kegg_instance = kegg.Kegg.getInstance()

    # Create output directories
    out_dir = options.output_dir
    if not path.exists(out_dir):
        util._mkdir(out_dir)
    pathgraph_dir = path.join(out_dir, 'pathway_graphs/')
    util._mkdir(pathgraph_dir)
    
    print 'Executing MTDF analysis'
    pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename)
    results = []
    for pathway_data in pathway_iterator:
        if pathway_data.skip:
            print 'Skipping pathway', pathway_data.name
            continue
        
        print 'Analyzing pathway', pathway_data.name
                
        model = pathway_data.GetStoichiometricModel(kegg_instance)
        model_bounds = pathway_data.GetBounds()
        
        mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data)
        result = mtdf_opt.FindMTDF(model_bounds)
        
        print 'Optimization status', result.status
        
        result.WriteAllGraphs(pathgraph_dir)
        results.append(result)
        
        mtdf = result.opt_val
        print '\tMTDF for', pathway_data.name, '= %.2g' % mtdf
    
    
    output_filename = path.join(out_dir, 'results.html')
    print 'Writing output to', output_filename
    template_data = {'analysis_type': 'MTDF',
                     'results':results}
    templates.render_to_file('pathway_optimization_results.html',
                             template_data,
                             output_filename)
Exemplo n.º 3
0
def Main():
    options, _ = MakeOpts().parse_args(sys.argv)
    estimators = thermodynamic_estimators.LoadAllEstimators()

    input_filename = path.abspath(options.input_filename)
    if not path.exists(input_filename):
        logging.fatal('Input filename %s doesn\'t exist' % input_filename)

    print 'Will read pathway definitions from %s' % input_filename

    thermo = estimators[options.thermodynamics_source]
    print "Using the thermodynamic estimations of: " + thermo.name
    thermo_data = thermodynamic_data.WrapperThermoData(thermo)

    # Create a bounds instance
    kegg_instance = kegg.Kegg.getInstance()

    # Create output directories
    out_dir = options.output_dir
    if not path.exists(out_dir):
        util._mkdir(out_dir)
    pathgraph_dir = path.join(out_dir, 'pathway_graphs/')
    util._mkdir(pathgraph_dir)

    print 'Executing MTDF analysis'
    pathway_iterator = pathway.KeggPathwayIterator.FromFilename(input_filename)
    results = []
    for pathway_data in pathway_iterator:
        if pathway_data.skip:
            print 'Skipping pathway', pathway_data.name
            continue

        print 'Analyzing pathway', pathway_data.name

        model = pathway_data.GetStoichiometricModel(kegg_instance)
        model_bounds = pathway_data.GetBounds()

        mtdf_opt = mtdf_optimizer.MTDFOptimizer(model, thermo_data)
        result = mtdf_opt.FindMTDF(model_bounds)

        print 'Optimization status', result.status

        result.WriteAllGraphs(pathgraph_dir)
        results.append(result)

        mtdf = result.opt_val
        print '\tMTDF for', pathway_data.name, '= %.2g' % mtdf

    output_filename = path.join(out_dir, 'results.html')
    print 'Writing output to', output_filename
    template_data = {'analysis_type': 'MTDF', 'results': results}
    templates.render_to_file('pathway_optimization_results.html',
                             template_data, output_filename)
Exemplo n.º 4
0
def main():
    gdata = groups_data.GroupsData.FromGroupsFile(
        open('../data/thermodynamics/groups_species.csv', 'r'))
    decomposer = group_decomposition.GroupDecomposer(gdata)
    pdata = pseudoisomers_data.PseudoisomersData.FromFile(
        '../data/thermodynamics/dG0.csv')
    
    dstats = DecompositionStats(gdata)

    for pisomer in pdata:
        if not pisomer.Train():
            continue
        
        mol = pisomer.Mol()
        decomposition = None
        if mol:
            decomposition = decomposer.Decompose(pisomer.Mol())
            dstats.AddDecomposition(pisomer, decomposition)
        else:
            logging.warning('Cannot get a Mol for pseudoisomer %s' % pisomer)
            continue
        
    map = dstats.GetGroupMap()
    rare_groups = []
    for i in range(21):
        groups = map.GetGroupsByNumExamples(i)
        l = [{'group': g, 'pseudoisomers': ps} for g,ps in groups]
        rare_groups.append({'count': i, 'groups': l})
    
    image_name = 'images/groups_histo.png'
    map.PlotHistogram('../res/' + image_name)
    
    most_common_groups = map.GetMostCommonGroups()
    mcg_dicts = [{'count': c, 'group': g} for g,c in most_common_groups]
    
    pairwise_map = dstats.GetPairwiseGroupMap()
    frequent_pairs = pairwise_map.GetMostFrequentCooccurences()
    freq_pairs_dicts = [{'count': c, 'groups': [ga, gb]} for ga, gb, c in frequent_pairs]
    ld_pairs = pairwise_map.GetLinearlyDependentPairs()
    ld_pairs_dicts = [{'ratio': r, 'pisomers': p, 'count': len(p), 'groups': [ga, gb]}
                      for ga, gb, r, p in ld_pairs]
    
    template_data = {'rare_groups_by_count': rare_groups,
                     'most_common_groups': mcg_dicts,
                     'histo_image_name': image_name,
                     'frequent_pairs': freq_pairs_dicts,
                     'ld_pairs': ld_pairs_dicts,
                     'groups_data': gdata,
                     'group_map': map}
    templates.render_to_file('analyze_training_groups.html',
                             template_data,
                             '../res/analyze_training_groups.html')
Exemplo n.º 5
0
def main():
    pdata = pseudoisomers_data.PseudoisomersData.FromFile(
        '../data/thermodynamics/dG0.csv')
    decomposer = group_decomposition.GroupDecomposer.FromGroupsFile(
        open('../data/thermodynamics/groups_species.csv', 'r'))

    train_pseudoisomers = []
    test_pseudoisomers = []
    skip_pseudoisomers = []
    for pisomer in pdata:
        mol = pisomer.Mol()
        decomposition = None
        if mol:
            decomposition = decomposer.Decompose(pisomer.Mol())

            if decomposition.unassigned_nodes:
                logging.warning('%s didn\'t decompose', pisomer)

        pisomer_dict = {'data': pisomer, 'decomposition': decomposition}
        if pisomer.Train():
            train_pseudoisomers.append(pisomer_dict)
        elif pisomer.Test():
            test_pseudoisomers.append(pisomer_dict)
        elif pisomer.Skip():
            skip_pseudoisomers.append(pisomer_dict)
        else:
            logging.warning('Found a pseudoisomer that is not marked as'
                            ' test, train, or skip.')

    template_data = {
        'pseudoisomer_collections': [{
            'name': 'Train',
            'pseudoisomers': train_pseudoisomers
        }, {
            'name': 'Test',
            'pseudoisomers': test_pseudoisomers
        }, {
            'name': 'Skip',
            'pseudoisomers': skip_pseudoisomers
        }]
    }
    templates.render_to_file('pseudoisomers_ground_truth.html', template_data,
                             '../res/pseudoisomers_ground_truth.html')
Exemplo n.º 6
0
def main():
    pdata = pseudoisomers_data.PseudoisomersData.FromFile(
        '../data/thermodynamics/dG0.csv')
    decomposer = group_decomposition.GroupDecomposer.FromGroupsFile(
        open('../data/thermodynamics/groups_species.csv', 'r'))
    
    train_pseudoisomers = []
    test_pseudoisomers = []
    skip_pseudoisomers = []
    for pisomer in pdata:
        mol = pisomer.Mol()
        decomposition = None
        if mol:
            decomposition = decomposer.Decompose(pisomer.Mol())
            
            if decomposition.unassigned_nodes:
                logging.warning('%s didn\'t decompose', pisomer)
            
        pisomer_dict = {'data': pisomer,
                        'decomposition': decomposition}
        if pisomer.Train():
            train_pseudoisomers.append(pisomer_dict)
        elif pisomer.Test():
            test_pseudoisomers.append(pisomer_dict)
        elif pisomer.Skip():
            skip_pseudoisomers.append(pisomer_dict)
        else:
            logging.warning('Found a pseudoisomer that is not marked as'
                            ' test, train, or skip.')

    template_data = {'pseudoisomer_collections': 
                     [{'name': 'Train',
                       'pseudoisomers': train_pseudoisomers},
                      {'name': 'Test',
                       'pseudoisomers': test_pseudoisomers},
                      {'name': 'Skip',
                       'pseudoisomers': skip_pseudoisomers}
                     ]}
    templates.render_to_file('pseudoisomers_ground_truth.html',
                             template_data,
                             '../res/pseudoisomers_ground_truth.html')
Exemplo n.º 7
0
            if cost is not None:
                print '\t*Protein Cost for', pathway_data.name, '= %.2g' % cost
            if optima:
                print 'Found', len(optima), 'near-optima for', pathway_data.name 
                optima = np.array(optima)
                mean_opt = np.mean(optima)
                mean_diff = np.mean(np.abs(optima - mean_opt))
                print 'Mean optimum', mean_opt
                print 'Mean diff from mean', mean_diff
                print 'Percent diff %s%%' % (100*mean_diff / mean_opt)
                print 'StdDev opt', np.std(optima)
        else:
            # Use default conditions to show the failure
            res = opt.FindOptimum(model_bounds)
            result_dict['result'] = res            
        results.append(result_dict)
    
    output_filename = path.join(out_dir, 'results.html')
    print 'Writing output to', output_filename
    template_data = {'analysis_type': 'Protein Cost',
                     'kinetic_data': kin_data,
                     'results':results}
    templates.render_to_file('protein_optimization_results.html',
                             template_data,
                             output_filename)
    

if __name__ == "__main__":
    Main()