def get_aa_probs(pdb_id, wildtype, mutation, position): getIndividualProteins = GetIndividualProteins() file_path = FLAGS.input_features # Get protein from protein features and add to batch_factory batchFactory = create_batch(file_path, pdb_id) # Print PDB_i, mutation in the form: wildtype:position:mutation print("PDB: {}, mutation: {}{}{}".format(pdb_id, wildtype, position, mutation)) print("size of data: {}".format(batchFactory.data_size())) # Get next batch from batch_factory batch, _ = batchFactory.next(int(batchFactory.data_size())) # Locate the amino acid in the chain based on the mutational position aa_data = batch['data'][int(position) - 1] # Initialize graph and start session with tf.Graph().as_default(): # Load the network model (convolutional neural network) model = CNNModel() session = tf.Session() model.batch_size = 1 # The model initializes based on trained model parameters. # The model is then used to infer on the data– a softmax is added to # give appropriate probabilities. logits = model.predict(session, [aa_data]) # Clean the objects to release memory del batchFactory, getIndividualProteins # Return inferred logits return logits
def main(hparams): checkpoint_callback = ModelCheckpoint(filepath=hparams.save_dir, save_top_k=1, verbose=True, monitor='val_loss', mode='min') #引入cnn的模型 cnnmodel = CNNModel.load_from_checkpoint(hparams.cnn_model_dir) cnn_features = cnnmodel.cnn cnn_classification = cnnmodel.classification datamodule = GANDataModule(hparams.batch_size, cnn_features, hparams.data_dir, hparams.valid) # train_dataloader = data.train_dataloader() # valid_dataloader = data.val_dataloader() # test_dataloader = data.test_dataloader() logger = TensorBoardLogger(save_dir="./lightning_logs", name='gan_logs') # trainer = pl.Trainer(checkpoint_callback=checkpoint_callback, logger=logger, progress_bar_refresh_rate=50, # gpus=hparams.gpus, min_epochs=hparams.min_epochs, max_epochs=hparams.max_epochs) # trainer = pl.Trainer(hparams, checkpoint_callback=checkpoint_callback, logger=logger, progress_bar_refresh_rate=50) trainer = pl.Trainer.from_argparse_args( hparams, checkpoint_callback=checkpoint_callback, logger=logger, progress_bar_refresh_rate=50) if hparams.train == True: model = GAN(cnn_classification, hparams) trainer.fit(model, datamodule=datamodule) else: test_model = GAN.load_from_checkpoint(checkpoint_path=hparams.load_dir) trainer.test(test_model, datamodule=datamodule)
def setUpClass(cls): cls.train_dataset, cls.train_labels, cls.valid_dataset, cls.valid_labels, cls.test_dataset, cls.test_labels = get_data_4d() dataholder = DataHolder(cls.train_dataset, cls.train_labels, cls.valid_dataset, cls.valid_labels, cls.test_dataset, cls.test_labels) config = Config() cls.model = CNNModel(config, dataholder)
def predict_ddg(input_dir_features, pdb_id, mutations, data_set_name): mutation_dataframe = [] chain_id = None if len(pdb_id) == 5: chain_id = pdb_id[4] pdb_id = pdb_id[:4] batch_factory = prepare_batch(input_dir_features=input_dir_features, pdb_id=pdb_id) print(batch_factory.data_size()) batch, _ = batch_factory.next(batch_factory.data_size()) chain_ids = batch['chain_ids'] print("Chain_id: {}".format(chain_ids)) # Extract index of first residue from PDB - and attempt to use this as # offset into model mmcif_parser = Bio.PDB.MMCIFParser() cif_path = os.path.join(FLAGS.pdb_dir, pdb_id.lower() + ".cif") if not os.path.exists(cif_path): pdbl = PDBList() pdbl.retrieve_pdb_file(pdb_id, pdir="./data/PDB/") structure = mmcif_parser.get_structure(pdb_id, cif_path) # Loop through all rows for _, mutation in mutations.iterrows(): #mutation[['PDBFileID','chain','wildtype', 'mutation', 'position']] wt, res_id, mutant, chain = mutation[[ 'wildtype', 'position', 'mutation', 'chain' ]] print(wt, res_id, mutant) icode = ' ' if res_id.isdigit(): res_index = int(res_id) else: res_index = re.match("\d+", res_id).group(0) icode = res_id.replace(res_index, "") res_index = int(res_index) try: # Extract residue in PDB pdb_res = structure[0][chain][(' ', res_index, icode)] except KeyError: raise MissingResidueError("Missing residue: " + str((' ', res_index, icode)) + ". Perhaps a removed HETATM?") # Check that PDB and mutation record agree on wt assert (Bio.PDB.Polypeptide.three_to_one(pdb_res.get_resname()) == wt) chain_res_index = structure[0][chain].get_list().index(pdb_res) try: mutant_index = Bio.PDB.Polypeptide.one_to_index(mutant) wt_index = Bio.PDB.Polypeptide.one_to_index(wt) with tf.Graph().as_default(): model = CNNModel() logits = model.predict(tf.Session(), [batch['data'][res_index - 1]])[0][0] # wildtype and mutant probability: print("Wildtype prob: {} and mutation prob: {}.".format( logits[wt_index], logits[mutant_index])) mutation['w_prob'] = logits[wt_index] mutation['m_prob'] = logits[mutant_index] print(mutation) # Add unfolded chain mutations mutation['m_u_prob'] = unfolded_prob(data_set_name, mutant) mutation['w_u_prob'] = unfolded_prob(data_set_name, wt) print(mutation) mutation_dataframe.append(pd.DataFrame(mutation).transpose()) except Exception as e: print(e) continue ''' Her er det svært uden chain_ids ''' #model_chain_index_offset = np.nonzero(chain_ids==chain_id)[0][0] #model_res_index = model_chain_index_offset + chain_res_index if len(mutation_dataframe) > 0: return pd.concat(mutation_dataframe) return [] # TODO: forklar! ''' model_sequence = "" for index in np.argmax(batch["model_output"], axis=1): if index < 20: model_sequence += Bio.PDB.Polypeptide.index_to_one(index) else: model_sequence += 'X' #assert(model_sequence[model_res_index] == wt) wt_aa_index = Bio.PDB.Polypeptide.one_to_index(wt) mutant_aa_index = Bio.PDB.Polypeptide.one_to_index(mutant) ''' ''' wt_aa_index = Bio.PDB.Polypeptide.one_to_index(wt)
number_of_exp = 10 DECAY = np.random.random_sample([number_of_exp]) DECAY = np.append(DECAY, 0.96) number_of_exp += 1 DECAY.sort() results = [] duration = [] info = [] for i, de in enumerate(DECAY): print("\n ({0} of {1})".format(i + 1, number_of_exp)) my_config = Config(tunning=True, decay_rate=de) attrs = vars(my_config) config_info = ["%s: %s" % item for item in attrs.items()] info.append(config_info) my_model = CNNModel(my_config, my_dataholder) train_model(my_model, my_dataholder, 10001, 1000, False) current_dur = get_time(train_model, 10001) score = check_valid(my_model) results.append(score) duration.append(current_dur) DECAY = list(DECAY) best_result = max(list(zip(results, DECAY, duration, info))) result_string = """In an experiment with {0} decay rate values the best one is {1} with valid accuracy = {2}. \nThe training takes {3:.2f} seconds using the following params: \n{4}""".format(number_of_exp, best_result[1], best_result[0], best_result[2],
import flask import werkzeug import time import os from flask import Flask, flash, request, redirect, url_for from werkzeug.utils import secure_filename from Face_dectection import faceDetection from CNN import CNNModel from ImageProcess import ImageProcess from SVM import SVM import cv2 cnn = CNNModel() svm = SVM() app = flask.Flask(__name__) app.secret_key="key" @app.route('/', methods = ['GET', 'POST']) def requestCheck(): if request.method == 'POST': # check if the post request has the file part if 'image' not in request.files: return "No file part" file = request.files['image'] # if user does not select file, browser also # submit an empty part without filename if file.filename == '': return 'No selected file' if file and file.filename: filename = secure_filename(file.filename)