示例#1
0
def getContInputOutput(trainData):
    sequence_length = 1440  #entire day of data
    # create a dictionary to map pitches to integers
    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(trainData) - sequence_length - PRED_TIME):
        curr, fut_ = trainData[i +
                               sequence_length], trainData[i +
                                                           sequence_length +
                                                           PRED_TIME]
        sequence_in = trainData[i:i + sequence_length]
        sequence_out = (fut_ - curr) / curr
        # print type(sequence_out)
        network_input.append([price for price in sequence_in])
        network_output.append(sequence_out)
    print "cont", network_output[0]
    n_patterns = len(network_input)
    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    network_output = np.asarray(network_output)
    # normalize input
    network_input = np_utils.normalize(network_input)
    network_output = np_utils.normalize(network_output)
    # network_output = np_utils.to_categorical(network_output)
    return network_input, network_output
示例#2
0
def feature_preprocess(feat):
    """ 
    Input feature is extracted according to Section 4.2 in the paper
    """
    # subject classeme + object classeme
    # feat[:, 0: 70]

    # subject TrajectoryShape + HoG + HoF + MBH motion feature
    # (since this feature is Bag-of-Word type, we l1-normalize it so that
    # each element represents the fraction instead of count)
    feat[:, 70: 1070] = np_utils.normalize(feat[:, 70: 1070], -1, 1)
    feat[:, 1070: 2070] = np_utils.normalize(feat[:, 1070: 2070], -1, 1)
    feat[:, 2070: 3070] = np_utils.normalize(feat[:, 2070: 3070], -1, 1)
    feat[:, 3070: 4070] = np_utils.normalize(feat[:, 3070: 4070], -1, 1)
    # object TrajectoryShape + HoG + HoF + MBH motion feature
    feat[:, 4070: 5070] = np_utils.normalize(feat[:, 4070: 5070], -1, 1)
    feat[:, 5070: 6070] = np_utils.normalize(feat[:, 5070: 6070], -1, 1)
    feat[:, 6070: 7070] = np_utils.normalize(feat[:, 6070: 7070], -1, 1)
    feat[:, 7070: 8070] = np_utils.normalize(feat[:, 7070: 8070], -1, 1)

    # relative posititon + size + motion feature
    # feat[:, 8070: 9070]
    # feat[:, 9070: 10070]
    # feat[:, 10070: 11070]
    return feat
示例#3
0
def getBinInputOutput(trainData):
    sequence_length = 900  #15 hours
    # create a dictionary to map pitches to integers
    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(trainData) - sequence_length - PRED_TIME):
        curr, fut_ = trainData[i +
                               sequence_length], trainData[i +
                                                           sequence_length +
                                                           PRED_TIME]
        sequence_in = trainData[i:i + sequence_length]
        sequence_out = (fut_ - curr) / curr
        if (fut_ - curr) > 0: sequence_out = 1
        else: sequence_out = 0
        network_input.append([price for price in sequence_in])
        network_output.append(sequence_out)
    n_patterns = len(network_input)
    print "###### done with sequencing 1"
    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    network_output = np.asarray(network_output)
    # normalize input
    network_input = np_utils.normalize(network_input)
    # network_output = np_utils.normalize(network_output)
    network_outputCat = np_utils.to_categorical(network_output, 2)
    # for ind in range(len(network_output)):
    # 	print network_outputCat[ind], network_output[ind]
    print "###### done with sequencing 2"
    return network_input, network_outputCat
示例#4
0
	def isGameOver(self,rawPixels):
		signalEncode = self.signalEncoder(rawPixels)
		cut = signalEncode[:,40:80,:]
		flatten = normalize(cut.reshape((1,1200)))
		if self.model.predict_classes(flatten,verbose=0) == 0:
			print('game over!')
			return True
		return False
示例#5
0
    def sum_word_embeddings(self, text):
        tokens = self.tokenize_text([text])
        X = self.transform_texts(tokens)[0]

        embed = numpy.zeros(self.EMBED_DIM)
        embeddings = self.model.layers[1].get_weights()[0]

        for (i, word) in enumerate(X):
            embed += embeddings[word]
        embed = np_utils.normalize(embed)[0]

        return embed
示例#6
0
def feature_preprocess(feat):
    # subject classeme + object classeme
    feat[:, 70:1070] = np_utils.normalize(feat[:, 70:1070], -1, 1)
    # subject HoG + HoF + MBH motion feature
    feat[:, 1070:2070] = np_utils.normalize(feat[:, 1070:2070], -1, 1)
    feat[:, 2070:3070] = np_utils.normalize(feat[:, 2070:3070], -1, 1)
    feat[:, 3070:4070] = np_utils.normalize(feat[:, 3070:4070], -1, 1)
    # object HoG + HoF + MBH motion feature
    feat[:, 4070:5070] = np_utils.normalize(feat[:, 4070:5070], -1, 1)
    feat[:, 5070:6070] = np_utils.normalize(feat[:, 5070:6070], -1, 1)
    feat[:, 6070:7070] = np_utils.normalize(feat[:, 6070:7070], -1, 1)
    # relativity feature
    feat[:, 7070:8070] = np_utils.normalize(feat[:, 7070:8070], -1, 1)
    return feat
示例#7
0
def gen_model(dest_model,
              batch_size,
              epochs,
              nb_classes,
              num_model,
              path_data,
              path_dataset=None):
    img_data, labels = iter_images(batch_size, batch_size, path_data,
                                   path_dataset)

    data = np.asarray(img_data)
    data = data.astype('float32') / 255.0

    labels = np.asarray(labels)

    # Split the data
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.30,
                                                        shuffle=True)

    np_utils.normalize(x_train)
    np_utils.normalize(x_test)

    y_train_binary = to_categorical(y_train, num_classes=nb_classes)
    y_test_binary = to_categorical(y_test, num_classes=nb_classes)

    if os.path.exists(dest_model):
        model = load_model(dest_model)
    else:
        model, history = train_model(x_train, y_train_binary, nb_classes,
                                     batch_size, epochs, num_model)
        save_model(model, history, dest_model)
        del history
    save_scores(dest_model, model, x_test, y_test_binary)
    del model
    keras.backend.clear_session()
    gc.collect()
示例#8
0
def prepareInputData(data):
    dataset = np.array(data)
    # dataset = np.random.shuffle(dataset)

    rows, cols = dataset.shape

    ### Extracting X in shape (:,1:cols) i.e. except 1st column
    X = dataset[:, 1:cols]

    ### Normalizing X
    X = normalize(X, axis=1)
    # X = X/255

    # Extracting Y as the first column
    Y = dataset[:, 0]

    ### Resizing Y in the form (len(Y),1)
    Y = Y.reshape(Y.shape[0], 1)
    return X, Y
示例#9
0
文件: method1.py 项目: Emieeel/test
def keras_model(cleaned, train_test, test, used_data, save_id):
    model = Sequential()

    X_train = cleaned[used_data].as_matrix()
    Y_train = np_utils.to_categorical(cleaned['is_delayed'].as_matrix())
    input_dim = X_train.shape[1]

    X_test = np_utils.normalize(train_test[used_data].as_matrix(), axis=0)

    model.add(Dense(2 * input_dim, input_dim=input_dim, activation='relu'))
    model.add(Dense(input_dim, activation='relu'))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    model.fit(X_train, Y_train, epochs=50, batch_size=200)
    predictions = model.predict_proba(X_test)
    predictions0 = predictions[:, 0]
    predictions1 = predictions[:, 1]
    print('keras')
    print(roc_auc_score(train_test['is_delayed'], predictions0))
    print(roc_auc_score(train_test['is_delayed'], predictions1))
示例#10
0
        for i in range(sample_sent_len):
            for j, index in enumerate(sentence[-step_size:]):
                x[0, j] = index
            preds = model.predict(x)[0][-1]
            next_index = sample(preds)
            sentence.append(next_index)

        print_sentence(sentence)


# 임의의 문장을 추출
sample_sentences(num_sentences=20, sample_sent_len=15)

# 가중치 정규화
norm_weights = np_utils.normalize(model.get_weights()[0])


# 가까운 의미의 단어를 표시하는 함수
def print_closest_words(word, nb_closest=10):
    index = word_index[word]
    distances = np.dot(norm_weights, norm_weights[index])
    c_indexes = np.argsort(np.squeeze(distances))[-nb_closest:][::-1]
    for c_index in c_indexes:
        print(index_word[c_index], distances[c_index])


# 가까운 의미의 단어
words = [
    "3",
    "two",
示例#11
0
    Tanh(16, 10),
])

# train
net.train(x_train, y_train, learning_rate=0.2, epochs=500)

# test
print('Accuracy in test set: ', net.get_accuracy(x_test, y_test))

# saving model
net.save_model('model_tanh8.json')

#loading data MNIST
(x_train, y_train), (x_test, y_test) = load_data()

x_train = normalize(x_train)
x_test = normalize(x_test)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

x_train = x_train.reshape(x_train.shape[0], 28 * 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28 * 28, 1)
y_train = y_train.reshape(y_train.shape[0], 10, 1)
y_test = y_test.reshape(y_test.shape[0], 10, 1)

# neural network build
net = NeuralNetwork([
    Sigmoid(28 * 28, 32),
    Sigmoid(32, 32),
    Sigmoid(32, 10),
])
示例#12
0
def main():
    MAX_VOCAB = 6000
    WINDOW_SIZE = 4
    LEVEL = 'char'
    EMBED_DIM = 100
    MAX_TOKEN_LEN = 15
    NB_LAYERS = 1
    NB_EPOCHS = 3

    cutoff = 10000000
    words = codecs.open('../data/Austen_Sense.txt', 'r', encoding='utf8') \
                  .read().lower().split()[:cutoff]
    print('Loaded', len(words), 'words')

    cnt = Counter(words)
    most_comm = [k for k, v in cnt.most_common(500)]
    print('Most frequent:', most_comm[:50])

    word_to_int = {'UNK': 0}
    for w, c in cnt.most_common(MAX_VOCAB):
        word_to_int[w] = len(word_to_int)
    int_to_word = [None] * len(word_to_int)
    for k, v in word_to_int.items():
        int_to_word[v] = k

    if LEVEL == 'char':
        char_vector_dict, char_idx = index_characters(int_to_word)
        print(char_vector_dict.keys())
        model = build_model(vocab_size=len(word_to_int),
                            embed_dim=EMBED_DIM,
                            level=LEVEL,
                            token_len=MAX_TOKEN_LEN,
                            token_char_vector_dict=char_vector_dict,
                            nb_recurrent_layers=NB_LAYERS)

        most_comm_X = vectorize_tokens(tokens=most_comm,
                                       char_vector_dict=char_vector_dict,
                                       max_len=MAX_TOKEN_LEN)
        print(most_comm_X.shape, '!!!')

    elif LEVEL == 'word':
        model = build_model(vocab_size=len(word_to_int),
                            embed_dim=50,
                            level=LEVEL,
                            token_len=None,
                            token_char_vector_dict=None,
                            nb_recurrent_layers=None)
    model.summary()

    sampling_table = make_sampling_table(size=len(word_to_int))

    for e in range(NB_EPOCHS):
        idx = 0
        losses = []

        for idx in range(WINDOW_SIZE, len(words)-WINDOW_SIZE):
            seq = []
            for w in words[(idx - WINDOW_SIZE): (idx + WINDOW_SIZE)]:
                try:
                    seq.append(word_to_int[w])
                except KeyError:
                    seq.append(0)

            couples, labels = skipgrams(seq, len(word_to_int),
                                        window_size=4,
                                        negative_samples=1.,
                                        shuffle=True,
                                        categorical=False,
                                        sampling_table=sampling_table)

            if len(couples) > 1:
                couples = np.array(couples, dtype='int32')

                c_inp = couples[:, 1]
                c_inp = c_inp[:, np.newaxis]

                if LEVEL == 'word':
                    p_inp = couples[:, 0]
                    p_inp = p_inp[:, np.newaxis]
                elif LEVEL == 'char':
                    tokens = [int_to_word[i] for i in couples[:, 0]]
                    p_inp = vectorize_tokens(tokens=tokens,
                                             char_vector_dict=char_vector_dict,
                                             max_len=MAX_TOKEN_LEN)
                else:
                    raise ValueError('Wrong level param: word or char')

                labels = np.array(labels, dtype='int32')
                
                loss = model.train_on_batch({'pivot': p_inp, 'context': c_inp},
                                            {'label': labels})
                losses.append(loss)

                if idx % 5000 == 0:
                    print(np.mean(losses))

                if idx % 10000 == 0:
                    print(np.mean(losses))

                    print('Compiling repr func')
                    get_activations = K.function([model.layers[0].input,
                                                  K.learning_phase()],
                                                 [model.layers[6].output, ])
                    activations = get_activations([most_comm_X, 0])[0]
                    activations = np.array(activations, dtype='float32')

                    print(activations.shape, '-----')
                    norm_weights = np_utils.normalize(activations)

                    # dimension reduction:
                    tsne = TSNE(n_components=2)
                    coor = tsne.fit_transform(norm_weights)

                    plt.clf()
                    sns.set_style('dark')
                    sns.plt.rcParams['axes.linewidth'] = 0.4
                    fig, ax1 = sns.plt.subplots()

                    labels = most_comm
                    # first plot slices:
                    x1, x2 = coor[:, 0], coor[:, 1]
                    ax1.scatter(x1, x2, 100,
                                edgecolors='none',
                                facecolors='none')
                    # clustering on top (add some colouring):
                    clustering = AgglomerativeClustering(linkage='ward',
                                                         affinity='euclidean',
                                                         n_clusters=10)
                    clustering.fit(coor)
                    # add names:
                    axes = zip(x1, x2, most_comm, clustering.labels_)
                    for x, y, name, cluster_label in axes:
                        ax1.text(x, y, name, ha='center', va="center",
                                 color=plt.cm.spectral(cluster_label / 10.),
                                 fontdict={'family': 'Arial', 'size': 8})
                    # control aesthetics:
                    ax1.set_xlabel('')
                    ax1.set_ylabel('')
                    ax1.set_xticklabels([])
                    ax1.set_xticks([])
                    ax1.set_yticklabels([])
                    ax1.set_yticks([])
                    sns.plt.savefig('embeddings.pdf', bbox_inches=0)
    'DDOS attack-HOIC': 'DDoS-Attack'
}
train_csv = './dataset/idsX_train_clean.csv'

df = pd.read_csv(train_csv)
df = df.dropna()
df = shuffle(df)
df['Label'].replace(mask_label, inplace=True)
y = df.pop('Label')
X = df.drop(columns=dropped_cols, axis=1)
del [df]
X[X < 0] = 0
encoder = LabelEncoder()
y = encoder.fit_transform(y)
data_y = to_categorical(y)
data_x = normalize(X.to_numpy())
del [X, y]
inputDim = len(data_x[0])
outputDim = data_y.shape[1]
print(data_y.shape)

model = get_model(inputDim, outputDim)
model.summary()
model_json = model.to_json()
with open(PATH + "dnn-model.json", "w") as json_file:
    json_file.write(model_json)
plot_model(model,
           to_file=PATH + 'model-dnn.png',
           show_layer_names=True,
           show_shapes=True)
train_x, val_x, train_y, val_y = train_test_split(data_x,
示例#14
0
def process(args):

    print "Loading graph..."
    if args.format == "adjlist":
        G = graph.load_adjacencylist(args.input, undirected=args.undirected)
    elif args.format == "edgelist":
        G = graph.load_edgelist(args.input, undirected=args.undirected)
    elif args.format == "mat":
        G = graph.load_matfile(args.input,
                               variable_name=args.matfile_variable_name,
                               undirected=args.undirected)
    else:
        raise Exception(
            "Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'"
            % args.format)

    print("Number of nodes: {}".format(len(G.nodes())))

    num_walks = len(G.nodes()) * args.number_walks

    print("Number of walks: {}".format(num_walks))

    data_size = num_walks * args.walk_length

    print("Data size (walks*length): {}".format(data_size))

    if data_size < args.max_memory_data_size:
        #print("Walking...")
        #walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
        #                                    path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))
        print("Training...")
        max_features = len(G.nodes())  # vocabulary size
        dim_proj = args.representation_size  # embedding space dimension
        nb_epoch = 1  # number of training epochs

        # Neural network ( in Keras )
        model = Sequential()
        model.add(
            WordContextProduct(max_features, proj_dim=dim_proj,
                               init="uniform"))
        model.compile(loss='mse', optimizer='rmsprop')
        sampling_table = sequence.make_sampling_table(max_features)

        print("Fitting tokenizer on walks...")
        tokenizer = text.Tokenizer(nb_words=max_features)

        print "Epochs: %d" % nb_epoch
        #tokenizer.fit_on_texts( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length))

        for e in range(nb_epoch):
            print('-' * 40)
            print('Epoch', e)
            print('-' * 40)

            #progbar = generic_utils.Progbar(tokenizer.document_count)
            samples_seen = 0
            losses = []

            #        for i, seq in enumerate(tokenizer.texts_to_sequences_generator( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length) )):

            for i, seq in enumerate(
                    build_deepwalk_corpus_minibatch_iter(
                        G, args.number_walks, args.walk_length)):
                # get skipgram couples for one text in the dataset
                couples, labels = sequence.skipgrams(
                    seq,
                    max_features,
                    window_size=5,
                    negative_samples=1.,
                    sampling_table=sampling_table)
                if couples:
                    # one gradient update per sentence (one sentence = a few 1000s of word couples)
                    X = np.array(couples, dtype="int32")
                    print "Started fitting..."
                    loss = model.fit(X, labels)

                    print "Dumping..."

                    # Dump weights to a temp file
                    weights = model.layers[0].get_weights()[0]

                    norm_weights = np_utils.normalize(weights)

                    # TODO: save weights with indices
                    np.savetxt(args.output, norm_weights)

                    losses.append(loss)
                    if len(losses) % 100 == 0:
                        #                progbar.update(i, values=[("loss", np.mean(losses))])
                        losses = []
                    samples_seen += len(labels)
            print('Samples seen:', samples_seen)
        print("Training completed!")

    else:
        print(
            "Data size {} is larger than limit (max-memory-data-size: {}).  Dumping walks to disk."
            .format(data_size, args.max_memory_data_size))
        print("Walking...")

        #TODO: IMPLEMENT THAT
        print "Not implemented yet..."
        sys.exit(1)

    print "Optimization done. Saving..."
    # recover the embedding weights trained with skipgram:
    weights = model.layers[0].get_weights()[0]

    # we no longer need this
    del model

    norm_weights = np_utils.normalize(weights)

    # TODO: save weights with indices
    np.savetxt(args.output, norm_weights)
    print "Saved!"
示例#15
0
def read_img(image_path_list):
    x_dataset = np.array(
        [image_process.image_process(x) for x in image_path_list])
    x_dataset = np_utils.normalize(x_dataset)
    return x_dataset
示例#16
0
n_FC = n_FCls[(param_ind//54)%3]

param_ind2 = 249
batchSizels = [32,64,128,256] 
learningRatels = [1e-6,5e-6,1e-5,5e-5,1e-4,5e-4,1e-3]
DropoutRatels1 = [0.0,0.2,0.3]
DropoutRatels2 = [0.3,0.5,0.6]
batchSize = batchSizels[param_ind2%4] 
learningRate=learningRatels[(param_ind2//4)%7]
DropoutRate1 = DropoutRatels1[(param_ind2//28)%3]
DropoutRate2 = DropoutRatels2[(param_ind2//84)%3]
# Batch Size: 64|Learning Rate: 0.001|DropoutRate1: 0.3|DropoutRate2: 0.6
X_enhancers = np.load(file_path+cell_line+'/K562enhancer_50_10.npy')
X_promoters = np.load(file_path+cell_line+'/K562promoter_50_10.npy')
labels = np.load(file_path+cell_line+'/K562_labels.npy')
X_enhancers=np_utils.normalize(X_enhancers,axis=0)
X_promoters=np_utils.normalize(X_promoters,axis=0)

def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.
        Only computes a batch-wise average of recall.
        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall
    def precision(y_true, y_pred):
        """Precision metric.
print("Fold: " + str(fold));
C = Cs[fold];
gamma = gammas[fold];
BM = Best_model

print "Loading Features"
curSupervecPath = os.path.join(supervecPath, "trainset_" + str(fold), str(nMixtures));

V_feat = utl.readfeatures(curSupervecPath, V)
O_feat = utl.readfeatures(curSupervecPath, O)
T_feat = utl.readfeatures(curSupervecPath, T)
E_feat = utl.readfeatures(curSupervecPath, E)

X_t = np.concatenate((T_feat,E_feat),axis=0)
X_train = normalize(X_t)

input_shape = X_train.shape[1]
dropout_rate = 0.25
opt = Adam(lr=1e-4) #Generator optimizer
dopt = Adam(lr=1e-3) #Discriminator optimizer

# Build Generative model ...

g_input = Input(shape=(input_shape,))
x = g_input
for i in range(len(args.gen_layers_shape)):
    x = Dense(args.gen_layers_shape[i],
              init=args.init,
              activation=args.gen_activation,
              bias=args.bias)(x)
示例#18
0
def process(args):

  print "Loading graph..."
  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)

  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  if data_size < args.max_memory_data_size:
    #print("Walking...")
    #walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
    #                                    path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))
    print("Training...")
    max_features = len(G.nodes())  # vocabulary size
    dim_proj = args.representation_size  # embedding space dimension
    nb_epoch = 1   # number of training epochs

    # Neural network ( in Keras )
    model = Sequential()
    model.add(WordContextProduct(max_features, proj_dim=dim_proj, init="uniform"))
    model.compile(loss='mse', optimizer='rmsprop')
    sampling_table = sequence.make_sampling_table(max_features)

    print("Fitting tokenizer on walks...")
    tokenizer = text.Tokenizer(nb_words=max_features)

    print "Epochs: %d" % nb_epoch
    #tokenizer.fit_on_texts( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length))

    for e in range(nb_epoch):
        print('-'*40)
        print('Epoch', e)
        print('-'*40)

        #progbar = generic_utils.Progbar(tokenizer.document_count)
        samples_seen = 0
        losses = []

#        for i, seq in enumerate(tokenizer.texts_to_sequences_generator( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length) )):

        for i, seq in enumerate( build_deepwalk_corpus_minibatch_iter(G, args.number_walks, args.walk_length) ):
            # get skipgram couples for one text in the dataset
            couples, labels = sequence.skipgrams(seq, max_features, window_size=5, negative_samples=1., sampling_table=sampling_table)
            if couples:
                # one gradient update per sentence (one sentence = a few 1000s of word couples)
                X = np.array(couples, dtype="int32")
                print "Started fitting..."
                loss = model.fit(X, labels)

                print "Dumping..."

                # Dump weights to a temp file
                weights = model.layers[0].get_weights()[0]

                norm_weights = np_utils.normalize(weights)

                # TODO: save weights with indices
                np.savetxt( args.output, norm_weights )

                losses.append(loss)
                if len(losses) % 100 == 0:
    #                progbar.update(i, values=[("loss", np.mean(losses))])
                    losses = []
                samples_seen += len(labels)
        print('Samples seen:', samples_seen)
    print("Training completed!")

  else:
    print("Data size {} is larger than limit (max-memory-data-size: {}).  Dumping walks to disk.".format(data_size, args.max_memory_data_size))
    print("Walking...")

    #TODO: IMPLEMENT THAT
    print "Not implemented yet..."
    sys.exit(1)

  print "Optimization done. Saving..."
  # recover the embedding weights trained with skipgram:
  weights = model.layers[0].get_weights()[0]

  # we no longer need this
  del model

  norm_weights = np_utils.normalize(weights)

  # TODO: save weights with indices
  np.savetxt( args.output, norm_weights )
  print "Saved!"
示例#19
0
def experiment(dataFile, optimizer='adam', epochs=10, batch_size=10):

    #Creating data for analysis
    time_gen = int(time.time())
    global model_name
    model_name = f"{dataFile}_{time_gen}"
    #$ tensorboard --logdir=logs/
    tensorboard = TensorBoard(log_dir='logs/{}'.format(model_name))

    seed = 7
    np.random.seed(seed)
    cvscores = []
    print('optimizer: {} epochs: {} batch_size: {}'.format(
        optimizer, epochs, batch_size))

    data = loadData(dataFile)
    data_y = data.pop('Label')

    #transform named labels into numerical values
    encoder = LabelEncoder()
    encoder.fit(data_y)
    data_y = encoder.transform(data_y)
    dummy_y = to_categorical(data_y)
    data_x = normalize(data.values)

    #define 5-fold cross validation test harness
    inputDim = len(data_x[0])
    print('inputdim = ', inputDim)

    #Separate out data
    #X_train, X_test, y_train, y_test = train_test_split(data_x, dummy_y, test_size=0.2)
    num = 0
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=7)
    start = timer()
    for train_index, test_index in sss.split(X=np.zeros(data_x.shape[0]),
                                             y=dummy_y):
        X_train, X_test = data_x[train_index], data_x[test_index]
        y_train, y_test = dummy_y[train_index], dummy_y[test_index]

        #create model
        model = baseline_model(inputDim, y_train.shape)

        #train
        print("Training " + dataFile + " on split " + str(num))
        model.fit(x=X_train,
                  y=y_train,
                  epochs=epochs,
                  batch_size=batch_size,
                  verbose=2,
                  callbacks=[tensorboard],
                  validation_data=(X_test, y_test))

        #save model
        model.save(f"{resultPath}/models/{model_name}.model")

        num += 1

    elapsed = timer() - start

    scores = model.evaluate(X_test, y_test, verbose=1)
    print(model.metrics_names)
    acc, loss = scores[1] * 100, scores[0] * 100
    print('Baseline: accuracy: {:.2f}%: loss: {:.2f}'.format(acc, loss))

    resultFile = os.path.join(resultPath, dataFile)
    with open('{}.result'.format(resultFile), 'a') as fout:
        fout.write('{} results...'.format(model_name))
        fout.write('\taccuracy: {:.2f} loss: {:.2f}'.format(acc, loss))
        fout.write('\telapsed time: {:.2f} sec\n'.format(elapsed))
示例#20
0
def train_pixelnet(dataset, batchsize, npix, max_epochs, validation_steps, run_id, bottleneck):

    datadir = 'data'
    datafile = os.path.join(datadir, '{}.h5'.format(dataset))

    validation_set_path = os.path.join(datadir, '{}-validation-sets.json'.format(dataset))
    validation_set = data.load_validation_set(validation_set_path, run_id)

    if dataset == 'uhcs':
        nclasses = 4
        cropbar = 38
    elif dataset == 'spheroidite':
        nclasses = 2
        cropbar = None

    model_dir = os.path.join('models', 'crossval', dataset, 'run{:02d}'.format(run_id))
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)

    images, labels, names = data.load_dataset(datafile, cropbar=cropbar)

    images = data.preprocess_images(images, equalize=True, tf=False)

    # add a channel axis (of size 1 since these are grayscale inputs)
    images = images[:,:,:,np.newaxis]
    images = np.repeat(images, 3, axis=-1)
    images = applications.vgg16.preprocess_input(images)

    # train/validation split
    train_idx, val_idx = data.validation_split(validation_set, names)
    ntrain = len(train_idx)

    X_train, y_train, names_train = images[train_idx], labels[train_idx], names[train_idx]
    X_val, y_val, names_val = images[val_idx], labels[val_idx], names[val_idx]

    inv_freq = y_train.size / np.bincount(y_train.flat)
    class_weights = np.squeeze(normalize(np.sqrt(inv_freq), order=1))

    # don't use alpha-balanced version of focal loss...
    # class_weights = None
    focus_param = 2.0

    # write the validation set to the model directory as well...
    with open(os.path.join(model_dir, 'validation_set.txt'), 'w') as vf:
        for name in names_val:
            print(name, file=vf)

    N, h, w, _ = images.shape

    steps_per_epoch = int(ntrain / batchsize)
    print('steps_per_epoch: {}'.format(steps_per_epoch))

    max_epochs = 25
    validation_steps = 10

    base_model = vgg.fully_conv_model()

    layernames = [
        'block1_conv2_relu', 'block2_conv2_relu', 'block3_conv3_relu', 'block4_conv3_relu', 'block5_conv3_relu', 'fc2_relu'
    ]

    hc = hypercolumn.build_model(base_model, layernames, batchnorm=True, mode='sparse', relu=False)
    model = pixelnet.build_model(hc, nclasses=nclasses, width=1024, mode='sparse', dropout_rate=0.1, l2_reg=0.0)

    opt = adamw.AdamW(lr=1e-3, weight_decay=5e-4, amsgrad=True)
    for layer in base_model.layers:
        layer.trainable = False


    # model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc'])
    model.compile(
        loss=losses.focal_crossentropy_loss(focus_param=focus_param, class_weights=class_weights),
        optimizer=opt,
        metrics=['acc']
    )

    csv_logger = callbacks.CSVLogger(os.path.join(model_dir, 'training-1.log'))
    checkpoint = callbacks.ModelCheckpoint(
        os.path.join(
            model_dir,
            'weights.{epoch:03d}-{val_loss:.4f}.hdf5'
        ),
        save_best_only=False,
        save_weights_only=True,
        period=25
    )

    training_data = px_utils.random_pixel_samples(
        X_train, y_train, nclasses=nclasses,
        replace_samples=False, horizontal_flip=True, vertical_flip=True,
        rotation_range=360, zoom_range=0.5, intensity_shift=0.05
    )


    f = model.fit_generator(
        training_data,
        steps_per_epoch,
        epochs=max_epochs,
        callbacks=[csv_logger, checkpoint],
        validation_data=px_utils.random_pixel_samples(X_val, y_val, nclasses=nclasses, replace_samples=False),
        validation_steps=validation_steps,
    )

    for layer in base_model.layers:
        layer.trainable = True

    # fine-tune the whole network
    opt = adamw.AdamW(lr=1e-5, weight_decay=5e-4, amsgrad=True)
    model.compile(
        loss=losses.focal_crossentropy_loss(focus_param=focus_param, class_weights=class_weights),
        optimizer=opt,
        metrics=['acc']
    )

    csv_logger = callbacks.CSVLogger(os.path.join(model_dir, 'finetune-1.log'))
    checkpoint = callbacks.ModelCheckpoint(
        os.path.join(
            model_dir,
            'weights-finetune.{epoch:03d}-{val_loss:.4f}.hdf5'
        ),
        save_best_only=False,
        save_weights_only=True,
        period=25
    )

    f = model.fit_generator(
        training_data,
        steps_per_epoch,
        epochs=max_epochs,
        callbacks=[csv_logger, checkpoint],
        validation_data=px_utils.random_pixel_samples(X_val, y_val, nclasses=nclasses, replace_samples=False),
        validation_steps=validation_steps,
    )
示例#21
0
def evaluate_tf(model,X_enhancers,X_promoters, labels):
    X_enhancers=np_utils.normalize(X_enhancers,axis=0)
    X_promoters=np_utils.normalize(X_promoters,axis=0)
    evaluate(model,(X_enhancers,X_promoters),labels)
示例#22
0
        model.save_weights(os.path.join(save_dir, model_save_fname),
                           overwrite=True)

if test_model:
    print("It's test time!")
    print('Load model...')
    model.load_weights(os.path.join(save_dir, model_load_fname))

    # recover the embedding weights trained with skipgram:
    weights = model.layers[0].get_weights()[0]

    # we no longer need this
    del model

    weights[:skip_top] = np.zeros((skip_top, dim_proj))
    norm_weights = np_utils.normalize(weights)

    word_index = tokenizer.word_index
    reverse_word_index = dict([(v, k) for k, v in list(word_index.items())])
    word_index = tokenizer.word_index

    def embed_word(w):
        i = word_index.get(w)
        if (not i) or (i < skip_top) or (i >= max_features):
            return None
        return norm_weights[i]

    def closest_to_point(point, nb_closest=10):
        proximities = np.dot(norm_weights, point)
        tups = list(zip(list(range(len(proximities))), proximities))
        tups.sort(key=lambda x: x[1], reverse=True)
printArrayInfo(trainImages, trainLabels)
#showImageAndLabel(trainImages,trainLabels,144)

#Reshaping the array to 4-dims so that it can work with the Keras API (not necessary)
#trainImages = trainImages.reshape(trainImages.shape[0],28,28,1)
#testImages = testImages.reshape(testImages.shape[0],28,28,1)

# Making sure that the values are float
trainImages = trainImages.astype('float32')
testImages = testImages.astype('float32')
''' 
Normalizing the RGB codes by dividing it to the max RGB value.
trainImages /= 255
testImages /= 255
'''
trainImages = normalize(trainImages)
testImages = normalize(testImages)

#Categorize the labels (not required here...)
#trainLabels = np_utils.to_categorical(trainLabels,10)
#testLabels = np_utils.to_from keras.utils.np_utils import to_categorical, normalizeategorical(testLabels,10)

#Building the Convolutionafrom keras.utils.np_utils import to_categorical, normalize Neural Network (CNN)
model = Sequential([
    #transforms the format of the images from a 2d-array (of 28 by 28 pixels), to a 1d-array of 28 * 28 = 784 pixels
    Flatten(input_shape=(28, 28)),
    #Densely-connected, or fully-connected, neural layers.
    Dense(128, activation=tf.nn.relu),
    Dense(128, activation=tf.nn.relu),
    Dense(10, activation=tf.nn.softmax)
])
示例#24
0
    print 'Epoch:', e
    progbar = generic_utils.Progbar(tokenizer.document_count)
    samples_seen, losses = 0, []
    for i, seq in enumerate(tokenizer.texts_to_sequences_generator(text_generator())):
        couples, labels = sequence.skipgrams(seq, max_features, window_size=4, negative_samples=1., sampling_table=sampling_table)
        if couples:
            X = np.array(couples, dtype="int32")
            loss = model.train_on_batch(X, labels)
            losses.append(loss)
            if len(losses) % 100 == 0:
                progbar.update(i, values=[("loss", np.mean(losses))])
                losses = []
            samples_seen += len(labels)

weights = model.layers[0].get_weights()[0]
weights[:skip_top] = np.zeros((skip_top, dim_proj))
norm_weights = np_utils.normalize(weights)
del model

word_index = tokenizer.word_index
reverse_word_index = dict([(v, k) for k, v in list(word_index.items())])

# ----- 测试 -----

words = ["我"]

for w in words:
    print '='*4, w, '='*4
    for r in closest_to_word(w):
        print r[0], r[1]
示例#25
0
print(keras.__version__)

# 4. Load pre-shuffled MNIST data into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# 5. Preprocess input data
#X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
#X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
#X_train = X_train.astype('float32')
#X_test = X_test.astype('float32')
#X_train /= 255
#X_test /= 255

# 6. Preprocess class labels
X_train = np_utils.normalize(X_train, axis=1)
X_test = np_utils.normalize(X_test, axis=1)

Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

# 7. Define model architecture
model = Sequential()

#model.add(Convolution2D(32, 3, 3, activation='relu', input_shape=(1,28,28)))
#model.add(Convolution2D(32, 3, 3, activation='relu'))
#model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))