Пример #1
0
def main():
    # generate some random data with 36 features
    data1 = np.random.normal(loc=-.25, scale=0.5, size=(500, 36))
    data2 = np.random.normal(loc=.25, scale=0.5, size=(500, 36))
    data = np.vstack((data1, data2))

    som = SOM(10, 10)  # initialize the SOM
    som.fit(data, 10000, save_e=True, interval=100
            )  # fit the SOM for 10000 epochs, save the error every 100 steps
    som.plot_error_history(
        filename='images/som_error.png')  # plot the training error history

    targets = np.array(500 * [0] +
                       500 * [1])  # create some dummy target values

    # now visualize the learned representation with the class labels
    som.plot_point_map(data,
                       targets, ['Class 0', 'Class 1'],
                       filename='images/som.png')
    som.plot_class_density(data,
                           targets,
                           t=0,
                           name='Class 0',
                           filename='images/class_0.png')
    som.plot_distance_map(filename='images/distance_map.png'
                          )  # plot the distance map after training
Пример #2
0
def main():
    # generate some virtual peptide sequences
    libnum = 1000  # 1000 sequences per sublibrary
    h = Helices(seqnum=libnum)
    r = Random(seqnum=libnum)
    n = AMPngrams(seqnum=libnum, n_min=4)
    h.generate_sequences()
    r.generate_sequences(proba='AMP')
    n.generate_sequences()

    # calculate molecular descirptors for the peptides
    d = PeptideDescriptor(seqs=np.hstack(
        (h.sequences, r.sequences, n.sequences)),
                          scalename='pepcats')
    d.calculate_crosscorr(window=7)

    # train a som on the descriptors and print / plot the training error
    som = SOM(x=12, y=12)
    som.fit(data=d.descriptor, epochs=100000, decay='hill')
    print("Fit error: %.4f" % som.error)
    som.plot_error_history(filename="som_error.png")

    # load known antimicrobial peptides (AMPs) and transmembrane sequences
    dataset = load_AMPvsTM()
    d2 = PeptideDescriptor(dataset.sequences, 'pepcats')
    d2.calculate_crosscorr(7)
    targets = np.array(libnum * [0] + libnum * [1] + libnum * [2] + 206 * [3])
    names = ['Helices', 'Random', 'nGrams', 'AMP']

    # plot som maps with location of AMPs
    som.plot_point_map(np.vstack((d.descriptor, d2.descriptor[206:])),
                       targets,
                       names,
                       filename="peptidesom.png")
    som.plot_density_map(np.vstack((d.descriptor, d2.descriptor)),
                         filename="density.png")
    som.plot_distance_map(colormap='Reds', filename="distances.png")

    colormaps = ['Oranges', 'Purples', 'Greens', 'Reds']
    for i, c in enumerate(set(targets)):
        som.plot_class_density(np.vstack((d.descriptor, d2.descriptor)),
                               targets,
                               c,
                               names,
                               colormap=colormaps[i],
                               filename='class%i.png' % c)

    # get neighboring peptides (AMPs / TMs) for a sequence of interest
    my_d = PeptideDescriptor(seqs='GLFDIVKKVVGALLAG', scalename='pepcats')
    my_d.calculate_crosscorr(window=7)
    som.get_neighbors(datapoint=my_d.descriptor,
                      data=d2.descriptor,
                      labels=dataset.sequences,
                      d=0)
Пример #3
0
def run(data, musics):

    som = SOM(30, 30)  # initialize the SOM
    som.fit(data, 20000)  # fit the SOM for 2000 epochs

    #targets = len(data) * [0]   # create some dummy target values
    # vizualizando as paradas, ver se pego o que eu precido
    # som.plot_point_map(data, targets, ['class 1', 'class 2'], filename='./results/som.png')
    # som.plot_class_density(data, targets, 0, filename='./results/class_0.png', names=['a', 'b', 'c'], mode)
    # som.plot_density_map(data, filename='som1.png')

    #preparando para clusterizar, denovo
    winners = som.winner_map(data)
    #plt.imshow(winners, interpolation='nearest', extent=(0.5,10.5,0.5,10.5))
    #plt.colorbar()
    #plt.show()

    points = []
    for i in range(0, len(winners)):
        for j in range(0, len(winners[0])):
            points.append([i, j, winners[i][j]])

    # create dendrogram para imprimir
    #dendrogram = sch.dendrogram(sch.linkage(points, method='ward'))

    # create clusters
    hc = AgglomerativeClustering(n_clusters=6,
                                 affinity='euclidean',
                                 linkage='ward')
    # save clusters for chart
    y_hc = hc.fit_predict(points)

    for j in range(0, len(y_hc)):
        point = points[j]
        winners[point[0], point[1]] = y_hc[j]

    #vizualizando a qualidade da clusterizacao
    #plt.matshow(winners)
    #plt.show()

    for music in musics:
        winner = som.winner(music.toObject()['valor'])
        music.setGroupKon(winners[winner[0]][winner[1]])
Пример #4
0
def main(in_file, out_file, x, y, epochs, ref=None, test=False, verbose=0):
    if test:
        df = pd.DataFrame(in_file, columns=range(in_file.shape[1]))
    else:
        df = pd.read_table(in_file, sep='\t', low_memory=True, index_col=0)

    s = df.shape[0]
    df.dropna(axis=0, how='any', inplace=True)
    sn = df.shape[0]
    if s != sn:
        logger.warning('%d rows dropped due to missing values' % (s - sn))

    s = df.shape[1]
    df = df.select_dtypes(include=[np.number])
    sn = df.shape[1]
    if s != sn:
        logger.warning('%d columns dropped due to non-numeric data type' % (s - sn))

    basedir = os.path.dirname(os.path.abspath(__file__))
    som = SOM(x, y)
    if ref == 'IRCI':
        som = som.load('/SOM.pkl')
        embedding = som.winner_neurons(df.values)
    else:
        som.fit(df.values, epochs, verbose=verbose)
        embedding = som.winner_neurons(df.values)
        if ref == 'Create':
            som.save(basedir + '/SOM.pkl')

    emb_df = pd.DataFrame({'ID': df.index})
    emb_df['X'] = embedding[:, 1]
    emb_df['Y'] = embedding[:, 0]
    if test:
        return emb_df
    else:
        emb_df.to_csv(out_file, index=False, sep='\t')
Пример #5
0
from data import gen_kura_data
from som import SOM
import params
from visualizer import visualize_history

if __name__ == '__main__':
    X = gen_kura_data(params)

    som = SOM(resolution=params.resolution,
              latent_dim=params.latent_dim,
              sigma_max=params.sigma_max,
              sigma_min=params.sigma_min,
              tau=params.tau,
              seed=params.seed)
    history = som.fit(X, num_epoch=params.num_epoch)

    visualize_history(X, history, params)
Пример #6
0
import numpy as np
from som import SOM

# generate some random data with 36 features
data1 = np.random.normal(loc=-.25, scale=0.5, size=(500, 36))
data2 = np.random.normal(loc=.25, scale=0.5, size=(500, 36))
data = np.vstack((data1, data2))

som = SOM(10, 10)  # initialize the SOM
som.fit(data, 2000)  # fit the SOM for 2000 epochs

targets = 500 * [0] + 500 * [1]  # create some dummy target values

# now visualize the learned representation with the class labels
som.plot_point_map(data, targets, ['class 1', 'class 2'], filename='som.png')
som.plot_class_density(data,
                       targets,
                       1, ['class 1', 'class 2'],
                       filename='class_0.png')
Пример #7
0
import numpy as np
from som import SOM

# generate some random data with 36 features
data1 = np.random.normal(loc=-.25, scale=0.5, size=(500, 36))
data2 = np.random.normal(loc=.25, scale=0.5, size=(500, 36))
data = np.vstack((data1, data2))

som = SOM(10, 10)  # initialize the SOM
som.fit(data, 10000, save_e=True, interval=100)  # fit the SOM for 10000 epochs, save the error every 100 steps
som.plot_error_history(filename='images/som_error.png')  # plot the training error history

targets = np.array(500 * [0] + 500 * [1])  # create some dummy target values
print(targets.shape)
print(data.shape)
# # now visualize the learned representation with the class labels
# som.plot_point_map(data, targets, ['Class 0', 'Class 1'], filename='images/som.png')
# som.plot_class_density(data, targets, t=0, name='Class 0', filename='images/class_0.png')
# som.plot_distance_map(filename='images/distance_map.png')  # plot the distance map after training
Пример #8
0
    for col in range(1, num_cols):
        col_values.append(input_sheet.col_values(col)[1:])
    x = np.array(col_values, dtype='|S4')
    y = x.astype(np.float)
    maxs = [max(y[col]) for col in range(0, num_cols - 1)]
    mins = [min(y[col]) for col in range(0, num_cols - 1)]
    data_points = []
    for row in range(1, num_rows):
        values = []
        for col in range(1, num_cols):
            values.append(
                (float(input_sheet.cell(row, col).value) - mins[col - 1]) /
                (maxs[col - 1] - mins[col - 1]))
        d = DataPoint(values, int(output_sheet.cell(row, 0).value))
        data_points.append(d)
    print(num_rows - 1, " points with dimesion=", num_cols - 1, " are added")
    return data_points


data_points = load_data("722.xlsx")
s = SOM(2, 8, 27)
s.load_input_data(data_points)
s.fit(2, 0.1)

v = LVQ(2, 6, 5)
v.load_data(data_points)
v.train(5, 0.01, 2)

s.predict(data_points)
v.predict(data_points)
Пример #9
0
# generate some virtual peptide sequences
libnum = 1000  # 1000 sequences per sublibrary
h = Helices(seqnum=libnum)
r = Random(seqnum=libnum)
n = AMPngrams(seqnum=libnum, n_min=4)
h.generate_sequences()
r.generate_sequences(proba='AMP')
n.generate_sequences()

# calculate molecular descirptors for the peptides
d = PeptideDescriptor(seqs=np.hstack((h.sequences, r.sequences, n.sequences)), scalename='pepcats')
d.calculate_crosscorr(window=7)

# train a som on the descriptors and print / plot the training error
som = SOM(x=12, y=12)
som.fit(data=d.descriptor, epochs=100000, decay='hill')
print("Fit error: %.4f" % som.error)
som.plot_error_history(filename="som_error.png")

# load known antimicrobial peptides (AMPs) and transmembrane sequences
dataset = load_AMPvsTM()
d2 = PeptideDescriptor(dataset.sequences, 'pepcats')
d2.calculate_crosscorr(7)
targets = np.array(libnum*[0] + libnum*[1] + libnum*[2] + 206*[3])
names = ['Helices', 'Random', 'nGrams', 'AMP']

# plot som maps with location of AMPs
som.plot_point_map(np.vstack((d.descriptor, d2.descriptor[206:])), targets, names, filename="peptidesom.png")
som.plot_density_map(np.vstack((d.descriptor, d2.descriptor)), filename="density.png")
som.plot_distance_map(colormap='Reds', filename="distances.png")
Пример #10
0
# generate some virtual peptide sequences
libnum = 1000  # 1000 sequences per sublibrary
h = Helices(seqnum=libnum)
r = Random(seqnum=libnum)
n = AMPngrams(seqnum=libnum, n_min=4)
h.generate_sequences()
r.generate_sequences(proba='AMP')
n.generate_sequences()

# calculate molecular descirptors for the peptides
d = PeptideDescriptor(seqs=np.hstack((h.sequences, r.sequences, n.sequences)), scalename='pepcats')
d.calculate_crosscorr(window=7)

# train a som on the descriptors and print / plot the training error
som = SOM(x=10, y=10)
som.fit(data=d.descriptor, epochs=10000)
print("Fit error: %.4f" % som.error)
som.plot_error_history(filename="som_error.png")

# load known antimicrobial peptides (AMPs) and transmembrane sequences
dataset = load_AMPvsTM()
d2 = PeptideDescriptor(dataset.sequences, 'pepcats')
d2.calculate_crosscorr(7)
targets = np.array(libnum*[0] + libnum*[1] + libnum*[2] + 206*[3])
names = ['Helices', 'Random', 'nGrams', 'AMP']

# plot som maps with location of AMPs
som.plot_point_map(np.vstack((d.descriptor, d2.descriptor[206:])), targets, names, filename="peptidesom.png")
som.plot_density_map(np.vstack((d.descriptor, d2.descriptor)), filename="density.png")

colormaps = ['Oranges', 'Purples', 'Greens', 'Reds']