示例#1
0
def get_parse():
    ap = argparse.ArgumentParser()
    #list_of_methods = ['wordnet', 'word2vec', 'onehot', 'glove']
    list_of_modes = ['train', 'sample']
    #ap.add_argument('-m', '--method', required=False, help='Method to use for WSD. Default = wordnet.', default='wordnet', choices = list_of_methods)
    ap.add_argument('-d', '--data', required=True, help='Training data file.')
    ap.add_argument('-r',
                    '--reference',
                    required=True,
                    help='References data file.')
    ap.add_argument('-lf',
                    '--load_file',
                    required=False,
                    help='Filename selected for loading trained model.')
    ap.add_argument('-m',
                    '--mode',
                    required=False,
                    help='Choose between training mode or sampling mode.',
                    default='train',
                    choices=list_of_modes)
    ap.add_argument('-ep',
                    '--epoch',
                    required=False,
                    help='Number of epoch',
                    default='100000',
                    type=int)
    ap.add_argument('-bs',
                    '--batch_size',
                    required=False,
                    help='Batch size',
                    default='64',
                    type=int)
    ap.add_argument('-ih',
                    '--image_height',
                    required=False,
                    help='Image height',
                    default='480',
                    type=int)
    ap.add_argument('-iw',
                    '--image_width',
                    required=False,
                    help='Image width',
                    default='720',
                    type=int)
    # ap.add_argument('-ls', '--latent_size', required=False, help='Latent vector size * n = intput size', default='2', type=int)
    # ap.add_argument('-lr', '--learning_rate', required=False, help='Learning rate', default='5000', choices=list_of_modes)

    args = vars(ap.parse_args())
    x = data_reader.read(args['data'], args['image_height'],
                         args['image_width'])
    y = data_reader.read(args['reference'], args['image_height'],
                         args['image_width'])

    return x, y, args
#!/usr/bin/env python

from data_reader import read
import matplotlib.pyplot as plt
import sys

gtr_w = read(sys.argv[-1])
gtr_w.plot_normalized_ft()
plt.xlabel('Frequency [Hz]')
plt.ylabel('Amplitude [V]')
plt.show()
示例#3
0
# Custom libs
import prototypes
import distances
from precalcs import get_classes
from knn import knn
from data_reader import read
from args import args
from nn import nn
from random import shuffle
from numpy import mean

# Getting the arguments
dataset = read(args.d)
k = args.k
p = args.p
classes = get_classes(dataset)
repetitions = args.r
window = args.window
division = args.split
gen_repetitions = args.repetitions

distance = getattr(distances, args.distance)

if (args.shuffle): shuffle(dataset)

training_size_index = int(len(dataset) * division)
training = dataset[0:training_size_index]
evaluation = dataset[training_size_index:len(dataset)]

alpha = 0.01
e = 2
import nlp_model as nm
import data_reader

nlp = nm.initialize_model('en', blank=True)
nm.initialize_textcat(nlp)

data_reader.read()
optimizer = nlp.begin_training()
print("Training model...")
示例#5
0
from random import shuffle
import numpy

# Custom libs
import distances
import args
from data_reader import read
from precalcs import precalcs
from precalcs import swap_array

# Getting the arguments
arguments = args.args
kfold = arguments.kfold
k = arguments.k
distance = getattr(distances, arguments.distance)
dataset = read(arguments.d)
w = arguments.w
swap = arguments.swap

# Shuffles dataset if it's said so
precalcs_time_begin = time()

if (arguments.shuffle): shuffle(dataset)
if (arguments.distance != "euclidean"): precalcs(dataset)

precalcs_time_endtime = time()
precalcs_time = precalcs_time_endtime - precalcs_time_begin
if (swap): swap_array(dataset)
print "Pre-processing time: " + str(precalcs_time) + ' seconds'
print ""
示例#6
0
    biases = tf.get_variable(name="biases",
                             shape=[1],
                             initializer=tf.initializers.constant)
    output = tf.matmul(input_data, weights) + biases
    return tf.nn.sigmoid(output)


def train(input_x, input_y, ephocs=10, batch_size=100):
    x = tf.placeholder("float", shape=[None, input_x.shape[1]], name="x-input")
    y = tf.placeholder("float", shape=[None, input_y.shape[1]], name="y-input")
    output = inference(x)
    # 定义损失函数
    cost = -tf.reduce_sum(y * tf.log(output) + (1 - y) * tf.log(1 - output))  # 逻辑回归的损失函数
    entry_cost = tf.train.GradientDescentOptimizer(0.0003).minimize(cost)
    batches = input_x.shape[0] // batch_size
    if input_x.shape[0] % batch_size != 0:
        batches += 1
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        for _ in range(ephocs):
            for batch in range(batches):
                start = batch * batch_size % input_x.shape[0]
                end = min(start + batch_size, input_x.shape[0])
                sess.run([entry_cost], feed_dict={x: input_x[start:end], y: input_y[start:end]})
            c = sess.run([cost], feed_dict={x: input_x, y: input_y})
            print(c)


data, label = data_reader.read()
train(data, label)
from collections import defaultdict

from data_reader import read
from nlp_model import initialize_model
from similarity import Similarity
from similarity_trainer import SimilarityTrainer

# path = 'data.psv'
path = 'D:\Dev\Java\clinical-trials\\trials_combined_text.psv'
maxRecords = 120

correctLabels, textRows = read(path, '|')

labels = sorted(set(correctLabels))
labelString = ' '.join(labels)

#en_core_web_lg
nlp = initialize_model('en')
similarity = Similarity(nlp, labelString, boost=5, threshold=0.7)
size = min(len(correctLabels), maxRecords)
trainer = SimilarityTrainer(correctLabels, textRows, similarity, size)
#trainer.detect_entities()

misinterpretations, correctGuesses = trainer.calculate_similarity()

print("Correct Guesses", correctGuesses, correctGuesses / size * 100)

misDict = defaultdict(lambda: [])
for m in misinterpretations:
    key = m.get_key()
    misDict[key].append(m)
示例#8
0
def get_cost(theta, x, y):
    h = sigmoid(np.dot(x, theta))
    return -1 * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))


def batch_gradient_descending(theta, x, y, learning_rate, batch_size, ephocs):
    costs = []
    rows = x.shape[0]
    batches = rows // batch_size
    if rows % batch_size != 0:
        batches += 1
    for ephoc in range(ephocs):
        for batch in range(batches):
            start = batch * batch_size % rows
            end = min(start + batch_size, rows)
            t_x = x[start:end]
            t_y = y[start:end]
            theta = theta - learning_rate * get_grad(theta, t_x, t_y)
            cost = get_cost(theta, x, y)
            costs.append(cost)
        # 使用学习率衰减模型,更新迭代学习率
        learning_rate = learning_rate / (1 + 0.99 * ephoc)
    show.show_cost(costs)


x, y = data_reader.read()
theta = np.random.rand(x.shape[1], 1)
learning_rate = 0.1
batch_size = 100
batch_gradient_descending(theta, x, y, learning_rate, batch_size, 20)
示例#9
0
PLOT = False

if __name__ == '__main__':
    if '-u' in sys.argv:
        UNC = True

    if '-c' in sys.argv:
        CONNECT = True

    if '-e' in sys.argv:
        EXPECTED = True

    if '-p' in sys.argv:
        PLOT = True

    gtr_waves = data_reader.read(sys.argv[-1], strings=['E'], frets=['0'])
    peaks_all = []

    fig = plt.figure()
    ax = fig.gca(projection='3d')

    if PLOT:
        i = 0
        for gtr_wave in gtr_waves:
            x_s, y_s = gtr_wave.normalized_abs_dft()
            peaks = gtr_wave.ft_peaks(min_dist=0.001, thres=0.2)
            peaks_all.append(peaks)

            ax.plot(x_s, np.full(x_s.size, i) + D_MIN, y_s)

            for peak in peaks:
示例#10
0
from operator import itemgetter

from joblib import Parallel, delayed
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import data_reader as dr
import grid_search as gs
from vectorizer import Vectorizer

df = dr.read(file_names=['data/rt-polarity.pos', 'data/rt-polarity.neg'],
             labels=[1, 0])

# Task 1
X_train, X_test, y_train, y_test = train_test_split(df.values[:, 0],
                                                    df.values[:,
                                                              1].astype('int'),
                                                    test_size=0.2,
                                                    random_state=0)

# Task 2
v = Vectorizer(docs=X_train)

# Task 3-4
result = Parallel(n_jobs=4)(
    delayed(gs.grid_search)(X=X_train, y=y_train, f_num=f_num, v=v)
    for f_num in range(1, 6))
result = sorted(result, key=itemgetter(0), reverse=True)
best_c = result[0][1]
best_penalty = result[0][2]