def main(): #Read command line options opt = parse_args() if opt.logo: ccmpred.logo.logo() #set OMP environment variable for number of threads os.environ['OMP_NUM_THREADS'] = str(opt.num_threads) print("Using {0} threads for OMP parallelization.".format( os.environ["OMP_NUM_THREADS"])) ccm = CCMpred(opt.alnfile, opt.matfile) ############################## ### Setup ############################## #read alignment and compute amino acid counts and frequencies ccm.read_alignment(opt.aln_format, opt.max_gap_ratio) ccm.compute_sequence_weights(opt.weight, opt.wt_ignore_gaps, opt.wt_cutoff) ccm.compute_frequencies(opt.pseudocounts, opt.pseudocount_single, opt.pseudocount_pair) #if alternative scores are specified: compute these and exit if opt.omes: ccm.compute_omes(opt.omes_fodoraldrich) ccm.write_matrix() sys.exit(0) if opt.mi: ccm.compute_mutual_info(opt.mi_normalized, opt.mi_pseudocounts) ccm.write_matrix() sys.exit(0) #setup L2 regularization ccm.specify_regularization(opt.lambda_single, opt.lambda_pair_factor, reg_type=opt.reg_type, scaling=opt.scaling) #intialise single and pair potentials either: # - according to regularization priors # - from file ccm.intialise_potentials(opt.initrawfile) ############################## ### Optimize objective function (pLL or CD) with optimization algorithm (CG, GD or ADAM) ############################## if opt.optimize: # specify objective function objfun = OBJ_FUNC[opt.objfun](opt, ccm) # specify optimizer alg = ALGORITHMS[opt.algorithm](opt, ccm) #minimize objective function with optimizer ccm.minimize(objfun, alg) else: print( "\nDo not optimize but load couplings from binary raw file {0}\n". format(opt.initrawfile)) ############################## ### Post Processing ############################## # Compute contact score (frobenius norm) by possibly recentering potentials # TODO: other scores can be added ... ccm.compute_contact_matrix(recenter_potentials=opt.centering_potentials, frob=opt.frob) # and bias correction to contact score ccm.compute_correction(apc=opt.apc, entropy_correction=opt.entropy_correction) #specify meta data, and write (corrected) contact matrices to files ccm.write_matrix() if opt.cd_alnfile and hasattr(ccm.f, 'msa_sampled'): ccm.write_sampled_alignment(opt.cd_alnfile) if opt.out_binary_raw_file: ccm.write_binary_raw(opt.out_binary_raw_file) exitcode = 0 if opt.optimize: if ccm.algret['code'] < 0: exitcode = -ccm.algret['code'] sys.exit(exitcode)
def main(): # read command line options opt = parse_args() # print logo if opt.logo: ccmpred.logo.logo() # set OMP environment variable for number of threads os.environ['OMP_NUM_THREADS'] = str(opt.num_threads) print("Using {0} threads for OMP parallelization.".format( os.environ["OMP_NUM_THREADS"])) # instantiate CCMpred ccm = CCMpred() # specify possible file paths ccm.set_alignment_file(opt.alnfile) ccm.set_matfile(opt.matfile) ccm.set_pdb_file(opt.pdbfile) ccm.set_initraw_file(opt.initrawfile) # read alignment and possible remove gapped sequences and positions ccm.read_alignment(opt.aln_format, opt.max_gap_pos, opt.max_gap_seq) # compute sequence weights (in order to reduce sampling bias) ccm.compute_sequence_weights(opt.weight, opt.wt_cutoff) # compute amino acid counts and frequencies adding pseudo counts for non-observed amino acids ccm.compute_frequencies(opt.pseudocounts, opt.pseudocount_single, opt.pseudocount_pair) # read pdb file if CCMpred is setup as a constrained run if opt.pdbfile: ccm.read_pdb(opt.contact_threshold) # if alternative scores are specified: compute these and exit if opt.omes: ccm.compute_omes(opt.omes_fodoraldrich) ccm.write_matrix() sys.exit(0) if opt.mi: ccm.compute_mutual_info(opt.mi_normalized, opt.mi_pseudocounts) ccm.write_matrix() sys.exit(0) # setup L2 regularization ccm.specify_regularization(opt.lambda_single, opt.lambda_pair_factor, reg_type="L2", scaling="L", single_prior=opt.single_prior) # intialise single and pair potentials either: # - according to regularization priors # - from initrawfile (accounting for removal of many gapped positions, if applicable) ccm.intialise_potentials() # optimize objective function (pLL or CD/PCD) with optimization algorithm (LBFGS, CG, GD or ADAM) if opt.optimize: #initialize log object ccm.initiate_logging(opt.plot_opt_progress) #minimize objective function with corresponding optimization algorithm ccm.minimize(opt) else: print("\nDo not optimize but use model parameters provided by {0}\n". format(opt.initrawfile)) ### Post Processing #specify meta data, and write (corrected) contact matrices to files if opt.matfile: # Compute contact score (frobenius norm) by recentering potentials # TODO: other scores can be added ... ccm.compute_contact_matrix(recenter_potentials=True, frob=True) # compute corrected contact maps (removing entropy/phylogenetic biases) # TODO: other corrections can be added ... ccm.compute_correction( apc_file=opt.apc_file, entropy_correction_file=opt.entropy_correction_file) ccm.write_matrix() # write model parameters in binary format if opt.out_binary_raw_file: ccm.write_binary_raw(opt.out_binary_raw_file) exitcode = 0 if opt.optimize: if ccm.algret['code'] < 0: exitcode = -ccm.algret['code'] sys.exit(exitcode)
def main(alnfile,outfile,pair_mat): # read command line options # print logo ccmpred.logo.logo() # set OMP environment variable for number of threads os.environ['OMP_NUM_THREADS'] = str(opt.num_threads) print("Using {0} threads for OMP parallelization.".format(os.environ["OMP_NUM_THREADS"])) # instantiate CCMpred ccm = CCMpred() # specify possible file paths ccm.set_alignment_file(alnfile) ccm.set_matfile(oufile+'.ccmraw') # read alignment and possible remove gapped sequences and positions ccm.read_alignment() # compute sequence weights (in order to reduce sampling bias) ccm.compute_sequence_weights("simple", 0.8) # compute amino acid counts and frequencies adding pseudo counts for non-observed amino acids ccm.compute_frequencies("uniform_pseudocounts") # setup L2 regularization ccm.specify_regularization(10, 0.2,pair_mat) # intialise single and pair potentials either: # - according to regularization priors # - from initrawfile (accounting for removal of many gapped positions, if applicable) ccm.intialise_potentials() # optimize objective function (pLL or CD/PCD) with optimization algorithm (LBFGS, CG, GD or ADAM) #initialize log object ccm.initiate_logging() #minimize objective function with corresponding optimization algorithm ccm.minimize() ### Post Processing #specify meta data, and write (corrected) contact matrices to files # Compute contact score (frobenius norm) by recentering potentials # TODO: other scores can be added ... ccm.compute_contact_matrix(recenter_potentials=True, frob=True) # compute corrected contact maps (removing entropy/phylogenetic biases) # TODO: other corrections can be added ... ccm.compute_correction( apc_file=outfile, entropy_correction_file=None ) ccm.write_matrix() # write model parameters in binary format exitcode = 0 if ccm.algret['code'] < 0: exitcode =-ccm.algret['code'] sys.exit(exitcode)