Exemplo n.º 1
0
def createModel():
    maxlen = 150
    index_dict = pickle.load(open('./w2v_index.pkl', 'r'))
    vec_dict = pickle.load(open('./w2v_vec.pkl', 'r'))
    n_words = len(index_dict.keys())
    print n_words
    vec_matrix = np.zeros((n_words + 1, 100))
    for k, i in index_dict.items():  #将所有词索引与词向量一一对应
        try:
            vec_matrix[i, :] = vec_dict[k]
        except:
            print k, i
            print vec_dict[k]
            exit(1)
    labels = getLabels()
    sentences = loadData('./sen_cut.txt')
    X_train, X_test, y_train, y_test = train_test_split(sentences,
                                                        labels,
                                                        test_size=0.2)
    X_train = text2index(index_dict, X_train)
    X_test = text2index(index_dict, X_test)
    print u"训练集shape: ", np.shape(X_train)
    print u"测试集shape: ", np.shape(X_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    print('Pad sequences (samples x time)')
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)  #扩展长度不足的补0
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
    print u"训练集shape: ", np.shape(X_train)
    print u"测试集shape: ", np.shape(X_test)
    train_lstm(n_words + 1, vec_matrix, X_train, y_train, X_test, y_test)
Exemplo n.º 2
0
def getSVMModel(verbose=0):

    train_X, test_X, train_Y, test_Y = loadData(
        '/root/Desktop/hack/CHD/New_Model/dataset.csv')

    train_X = train_X[:len(train_X) / 100]
    train_Y = train_Y[:len(train_Y) / 100]

    model = svm.SVC(kernel="rbf", C=100, gamma=0.1)

    #train model
    model.fit(train_X, train_Y)
    predicted_labels = model.predict(test_X)

    if (verbose == 1):
        print("FINISHED classifying. accuracy score : ")
        print(accuracy_score(test_Y, predicted_labels))

    return model
Exemplo n.º 3
0
def load_data(fileobj, batch_size):
    """ Loads the dataset

    :type dataset: FileObj, typically created by `open(filename, 'rb')`
    :param dataset: the path to the dataset
    
    :type batch_size: int
    :param batch_size: Maximum number of examples to load.
    """

    # Load the dataset
    data_set = loadData(fileobj, batch_size)

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        #print(len(data_xy[1]))
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    data_set_x, data_set_y = shared_dataset(data_set)

    return (data_set_x, data_set_y)
Exemplo n.º 4
0
def load_data(fileobj, batch_size):
    """ Loads the dataset

    :type dataset: FileObj, typically created by `open(filename, 'rb')`
    :param dataset: the path to the dataset
    
    :type batch_size: int
    :param batch_size: Maximum number of examples to load.
    """

    # Load the dataset
    data_set = loadData(fileobj, batch_size)
    
    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        #print(len(data_xy[1]))
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    data_set_x, data_set_y = shared_dataset(data_set)

    return (data_set_x, data_set_y)
Exemplo n.º 5
0
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Activation
import numpy as np
import keras
import math
from LoadData import loadData


#Load data
train_X, test_X, train_Y, test_Y = loadData('dataset4.csv')

def KerasModel ():
	model = Sequential()
	model.add(Dense(12, input_dim=9, activation='relu'))
	#model.add(Dense(12, activation='relu'))
	model.add(Dense(1, activation='sigmoid'))

	adamm = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=math.pow(10,-8), decay=0.0)
	model.compile(optimizer=adamm,
              loss='binary_crossentropy',
              metrics=['accuracy'])

	history = model.fit(x=np.array(train_X), y=np.array(train_Y), validation_data=(np.array(test_X), np.array(test_Y)) , nb_epoch=50, batch_size=32)

	plt.plot(history.history['acc'])
Exemplo n.º 6
0
    return model

'''Define the necessary parameters'''

fs=64   #sampling rate of processed EEG and Audio signals
n_hidden=2 
dropout=0.25
trainPredWin=60*fs    #Training prediction Window
numBlocks=48          #EEG Signal divided in 48 blocks, 60s each



'''############## MAIN CODE ################'''
for Subject in range(1,2):       #Choose which subject to process
    workingDir='E:\dolhopia\DNN\EasyModel'  #Provide your own working path here
    a, b, chans, eeg=loadData(Subject)   #Load Dataset. a and b show to which stream subject was attending
    numChans=len(chans)     
    numSides=2
    Audio =io.loadmat(workingDir+'/Data/wav.mat');
      
    LAGS15 = [250]        #Define the Lag
 
    
    eegData=np.zeros((numBlocks,trainPredWin,numChans),dtype=np.float32)
    targetAudio=np.zeros((numBlocks,trainPredWin,numSides),dtype=np.float32)
    
    '''Split Dataset in 48 blocks'''
    for block in range(numBlocks):
        eegData[block,:,:]=eeg['eegNH'+str(Subject)][block*trainPredWin:(block+1)*trainPredWin,:]
        targetAudio[block,:,a]=Audio["EnvA"][block*trainPredWin:(block+1)*trainPredWin,0] #Here you need to Load the envelopes for the attended signal
        targetAudio[block,:,b]=Audio["EnvU"][block*trainPredWin:(block+1)*trainPredWin,0] #Here you need to Load the envelopes for the unattended signal
Exemplo n.º 7
0
import pandas as pd
import random as rand
#Módulo para importar la base de datos propia
from LoadData import loadData

# Importar el set de datos de nuestra base propia: ############################
'''Especificar el nombre de la carpeta de la base de datos:'''
DatabaseName0 = "FinalDatabase30"
DatabaseName01 = "FinalDatabase301"
DatabaseName02 = "FinalDatabase302"
DatabaseName03 = "FinalDatabase303"
DatabaseName04 = "FinalDatabase304"
DatabaseName05 = "FinalDatabase305"
''' En esta variación utilizamos DTt solo '''
''' ================================================================ '''
[data0a, data0b, data0c] = loadData(DatabaseName0)
train_experiments0 = []
train_solutions0 = []
test_experiments0 = []
test_solutions0 = []
validation_experiments0 = []
validation_solutions0 = []
''' ================================================================ '''
[data01a, data01b, data01c] = loadData(DatabaseName01)
train_experiments01 = []
train_solutions01 = []
test_experiments01 = []
test_solutions01 = []
validation_experiments01 = []
validation_solutions01 = []
''' ================================================================ '''
Exemplo n.º 8
0
def cleanUp():
    df = loadData()
    df = df.drop_duplicates(subset=['TimeSt', 'Latitude', 'Longitude'],
                            keep='first')
    return df
Exemplo n.º 9
0
from sklearn.cross_validation import KFold
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import warnings

warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Activation
import numpy as np
import keras
import math
from LoadData import loadData

#Load data
train_X, test_X, train_Y, test_Y = loadData('diabetes.csv')


def KerasModel():
    model = Sequential()
    model.add(Dense(8, input_dim=8, activation='relu'))
    model.add(Dense(12, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    adamm = keras.optimizers.Adam(lr=0.001,
                                  beta_1=0.9,
                                  beta_2=0.999,
                                  epsilon=math.pow(10, -8),
                                  decay=0.0)
    model.compile(optimizer=adamm,
                  loss='binary_crossentropy',
Exemplo n.º 10
0
network = makeNetwork(in_layer_size, 2000, out_layer_size)

dataset = SupervisedDataSet(in_layer_size, out_layer_size)
trainer = BackpropTrainer(network, dataset, learningrate=0.1/200)

for epoch in range(3):
    print("Epoch " + str(epoch+1))
    
    training_file = open("training-data.pkl", "rb")
    validation_file = open("validation-data.pkl", "rb")

    batch_iter = 0
    while True:
        print("batch_iter: " + str(batch_iter))
        training_set_x, training_set_y = loadData(training_file, 1)
        print(len(training_set_x))
        if len(training_set_x) == 0:
            break
        
        print("there")
        for i in range(len(training_set_x)):
            dataset.addSample(training_set_x[i],training_set_y[i])
        print("here")
        trainer.train()
        print("now")
        dataset.clear()
        batch_iter += 1
    
    # Clear references to these so the garbage collector can clean them
    # once the garbage collector chooses to.
Exemplo n.º 11
0
            print(line, file=output)
            handle.write(line + '\n')
    print("*********************************************\n", file=output)

    # load data
    dataset = args.dataset.lower()
    if dataset == 'synthetic':
        load_path = args.load_path + dataset + '/' + str(args.ncliq) + '/'
    else:
        load_path = args.load_path + dataset + '/'
    features = args.features.lower()
    labels = args.labels.lower()
    transmode = args.transfer_mode
    B_from_A = args.b_from_a
    visualize = args.visualize
    n_nodes, n_features, n_labels, A, Afeatures, Alabels, Atrain, Atest, B, Bfeatures, Blabels = loadData(
        load_path, features, labels, transmode, B_from_A, visualize)

    if labels == 'pagerank':
        n_layers = max(10, args.depth)
    else:
        n_layers = max(1, args.depth)

    n_embedding = args.nembedding
    topology_similarity = args.topology_similarity
    embedding_type = args.embedding_type
    embedding_similarity = args.embedding_similarity
    if embedding_type == 'skipgram':
        embedding_similarity = 'softmax'
    if embedding_similarity == 'softmax':
        n_negative = args.nnegative
        scale_negative = args.scale_negative
Exemplo n.º 12
0
from LoadData import loadData

X, Y = loadData('./Train', spectrogram_step=15)
X_test, Y_test = loadData('./Test', spectrogram_step=15)

import tflearn
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Building Residual Network

# | Adam | epoch: 040 | loss: 0.03133 - acc: 0.9901 | val_loss: 0.06419 - val_acc: 0.9819 -- iter: 5206/5206
net = tflearn.input_data(shape=[None, 121, 35])

net = tflearn.max_pool_1d(net, 4)
net = tflearn.conv_1d(net, 128, 6, activation='relu')
net = tflearn.max_pool_1d(net, 2)
net = tflearn.conv_1d(net, 128, 3, activation='relu')
net = tflearn.avg_pool_1d(net, 2)

net = tflearn.fully_connected(net, 128, activation='relu')
net = tflearn.dropout(net, 0.7)

net = tflearn.fully_connected(net, 14, activation='softmax')

net = tflearn.regression(net,
                         optimizer='adam',
                         loss='categorical_crossentropy',
                         learning_rate=0.005)
Exemplo n.º 13
0
from LoadData import loadData
import os

X, _, P = loadData("./Train")

i = 0
for elem in X:
    value = abs(sum(elem))
    if value < 1:
        os.remove(P[i])
        print("Bardzo slaby plik: " + P[i])
    elif value < 3:
        print("Slaby plik: " + P[i])
    i = i + 1
Exemplo n.º 14
0
def main():
    id_ens = dict_idEns["maths"]
    X, y = loadData(id_ens) #df_year1, df_year2
    menuOptions(X, y)
    
    print("final")
Exemplo n.º 15
0
    'AD': np.array([0, 0, 1])
}
group = {0: 'CN', 1: 'MCI', 2: 'AD'}
num_labels = 3

# file
data_path = '/home/share/data/ADNI1_Annual_2_Yr_3T'
summary_path = '/home/zhengjiaqi/zhengjiaqi/summary/'

# device
device = '/gpu:1'
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4"

trainNum = 270
Xshape = (111, 111, 111)
X, Y = loadData(data_path, groupMap, Xshape)
X = X.reshape((-1, 111, 111, 111, 1))
Y = Y.reshape((-1, 3))
X_train = X[:270]
X_test = X[270:]
Y_train = Y[:270]
Y_test = Y[270:]


# Accuracy function
def get_accuracy(predictions, labels):
    return 100 * tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(predictions, 1), tf.argmax(labels, 1)),
                tf.float32))