Exemplo n.º 1
0
def main(fragment_file, lead_file):
    fragment_mols = read_file(fragment_file)
    lead_mols = read_file(lead_file)
    fragment_mols += lead_mols

    logging.info("Read %s molecules for fragmentation library",
                 len(fragment_mols))
    logging.info("Read %s lead moleculs", len(lead_mols))

    fragments, used_mols = get_fragments(fragment_mols)
    logging.info("Num fragments: %s", len(fragments))
    logging.info("Total molecules used: %s", len(used_mols))
    assert len(fragments)
    assert len(used_mols)
    encodings, decodings = get_encodings(fragments)
    save_decodings(decodings)
    logging.info("Saved decodings")

    lead_mols = np.asarray(
        fragment_mols[-len(lead_mols):])[used_mols[-len(lead_mols):]]

    X = encode_list(lead_mols, encodings)

    logging.info("Building models")
    actor, critic = build_models(X.shape[1:])

    X = clean_good(X, decodings)

    logging.info("Training")
    history = train(X, actor, critic, decodings)
    logging.info("Saving")
    np.save("History/history.npy", history)
Exemplo n.º 2
0
def main(fragment_file, lead_file):
    fragment_mols = read_file(fragment_file)
    lead_mols = read_file(lead_file)
    fragment_mols += lead_mols

    logging.info("Read %s molecules for fragmentation library",
                 len(fragment_mols))
    logging.info("Read %s lead molecules", len(lead_mols))

    fragments, used_mols = get_fragments(fragment_mols)
    logging.info("Num fragments: %s", len(fragments))
    logging.info("Total molecules used: %s", len(used_mols))
    assert len(fragments)
    assert len(used_mols)
    # =============================================================================
    #    encodings, decodings = get_encodings(fragments)
    #    save_encodings(encodings)
    #    save_decodings(decodings)
    #    logging.info("Saved encodings and decodings")
    # =============================================================================
    lead_mols = np.asarray(
        fragment_mols[-len(lead_mols):])[used_mols[-len(lead_mols):]]

    decodings = read_decodings()
    encodings = read_encodings()
    logging.info("Loaded encodings and decodings")

    X = encode_list(lead_mols, encodings)
    #print(X.shape)
    if X.shape[0] == 0:
        return -1
    logging.info("Building models")
    actor, critic = build_models(X.shape[1:])

    # X = clean_good(X, decodings)
    # logging.info("Remaining molecules after clean good: %s",X.shape[0])

    if X.shape[0] == 0:
        return -1
    logging.info("Training")
    history = train(X, actor, critic, decodings)
    logging.info("Saving")
    np.save("History/history.npy", history)
    actor.save('./saved_models/generation')
    critic.save('./saved_models/critic')
Exemplo n.º 3
0
def main(fragment_file, lead_file):
    fragment_mols = read_file(fragment_file)
    lead_mols = read_file(lead_file)
    fragment_mols += lead_mols


    fragments, used_mols = get_fragments(fragment_mols)
    encodings, decodings = get_encodings(fragments)
    save_decodings(decodings)

    lead_mols = np.asarray(fragment_mols[-len(lead_mols):])[used_mols[-len(lead_mols):]]

    X = encode_list(lead_mols, encodings)

    actor, critic = build_models(X.shape[1:])

    X = clean_good(X, decodings)

    history = train(X, actor, critic, decodings)

    np.save("History/history.npy", history)
def main(epoch,gen):
    if gen == 1:
        lead_file = "Data/trial.csv"
        fragment_file = "Data/molecules.smi"
        fragment_mols = read_file(fragment_file)
        lead_mols = read_file(lead_file)
        fragment_mols += lead_mols

        logging.info("Read %s molecules for fragmentation library", len(fragment_mols))
        logging.info("Read %s lead molecules", len(lead_mols))

        fragments, used_mols = get_fragments(fragment_mols)
        logging.info("Num fragments: %s", len(fragments))
        logging.info("Total molecules used: %s", len(used_mols))
        assert len(fragments)
        assert len(used_mols)
        lead_mols = np.asarray(fragment_mols[-len(lead_mols):])[used_mols[-len(lead_mols):]]

        decodings = read_decodings()
        encodings = read_encodings()
        logging.info("Loaded encodings and decodings")

        X = encode_list(lead_mols, encodings)
        modify_mols(X,decodings)
        epoch=1000
    file_name = './past outputs/out'+str(epoch)+'.csv'
    logging.info("Collecting and storing all molecules in {}".format(file_name))
    Show_Epoch.main(epoch,file_name)

    df = pd.read_csv('./past outputs/out'+str(epoch)+'.csv',sep=";")
        
    moli = []
    molm = []

    for i in range(len(df)):
        if (Chem.MolFromSmiles(df.iloc[i,1])) is not None:
            moli.append(Chem.MolFromSmiles(df.iloc[i,0]))
            molm.append(Chem.MolFromSmiles(df.iloc[i,1]))
    logging.info("Predicting pIC50 values of the initial molecules")
    ini = get_pIC50(moli)
    logging.info("Predicting pIC50 values of the predicted molecules")
    mod = get_pIC50(molm)
    ini = np.asarray(ini)
    mod = np.asarray(mod)
    
    changes =  pd.DataFrame(np.transpose(np.asarray([ini,mod])),columns=['Initial_pIC','Modified_pIC'])
    changes['Initial_mol'] = df.iloc[:,0]
    changes['Modified_mol'] = df.iloc[:,1]
    changes['Delta'] = changes['Modified_pIC'] - changes['Initial_pIC']
    changes.sort_values(by='Delta',ascending=False,inplace=True)

    inact_to_act = changes.loc[(changes['Modified_pIC']>7) & (changes['Initial_pIC']<7),['Modified_pIC','Initial_pIC','Delta']].sort_values(by='Delta',ascending=False)
    
    changes.to_csv('./past outputs/out_pIC'+str(epoch)+'.csv',index=False)
    inact_to_act.to_csv('./past outputs/act_pIC'+str(epoch)+'.csv',index=False)
    
    print(inact_to_act.head())
    print(changes.head())
    from rdkit.Chem import Draw
    moli = []
    molm = []
    for i in range(5):
        moli.append(Chem.MolFromSmiles(changes.iloc[i,2]))
        moli.append(Chem.MolFromSmiles(changes.iloc[i,3]))
    plot = Draw.MolsToGridImage(moli, molsPerRow=2)
    plot.show()
    #plot.save('/past outputs/epoch.png')
    bins = np.linspace(4,10,14)
    #changes = changes.loc[changes.Delta>0]
    plt.hist(changes['Initial_pIC'], bins, alpha=0.5, label='initial',color='blue')
    plt.hist(changes['Modified_pIC'], bins, alpha=0.5, label='modified',color='green')
    plt.legend(loc='upper right')
    plt.show()

    sp = changes.loc[changes['Delta']>0].sum()['Delta']
    sn = changes.loc[changes['Delta']<0].sum()['Delta']
    cp = changes.loc[changes['Delta']>0].count()['Delta']
    cn = changes.loc[changes['Delta']<0].count()['Delta']
    print('Sum of positive changes = {}\tNo. of +ves = {}\nSum of negative changes = {}\tNo. of -ves = {}'.format(sp,cp,sn,cn))
    return 0