Exemplo n.º 1
0
 def __init__(self):
     self.response = None
     self.data = None
     self.more = None
     self.loader = Loader(self.SHORTNAME, 'uuid')
from PyQt5 import QtCore, QtWidgets
from loader import Loader
from plotwindow import PlotWindow
from ui_plottermanagerwindow import PlotterManagerWindow
from pathlib import Path
import sys

app = QtWidgets.QApplication(sys.argv)
keithley_logger_temp_path = Path('C:/', 'Users', 'Justin', 'Desktop', 'Working', 'Code', 'Keithley Logger Work')
work_dir = Path('C:/', 'Users', 'Justin', 'Desktop', 'Working', 'Code', 'Keithley Logger Work', 'MagField',
                'MagField')
log_drive = Path(keithley_logger_temp_path, 'Log Drive', 'Fake Data')
log_drive_2 = Path(keithley_logger_temp_path, 'Log Drive', 'Mag Data Fake')
file_prefix = 'Fake Data'
fake_data_loader = Loader(log_drive, file_prefix, quiet=True)
mag_data_fake_loader = Loader(log_drive_2, 'Mag Data Fake', quiet=True)
mag_data_loader = Loader(work_dir, 'MagField', quiet=True)
plotter1 = PlotWindow(fake_data_loader)
plotter2 = PlotWindow(mag_data_fake_loader)
plotter3 = PlotWindow(mag_data_loader)

ui = PlotterManagerWindow([plotter1, plotter2, plotter3])
ui.show()
sys.exit(app.exec_())
def train(args):
    save_weights_only = args["save_weights_only"]
    loader = Loader(args["label"], args["img_dir"], load_all=args["load_all"])
    net = LPRNet(loader.get_num_chars() + 1)

    if args["pretrained"]:
        net.load_weights(args["pretrained"])
        print("Pretrained model loaded")

    model = net.model
    train_dataset = tf.data.Dataset.from_generator(
        loader,
        output_types=(tf.float32,
                      tf.int32, tf.int32)).batch(args["batch_size"]).shuffle(
                          len(loader)).prefetch(tf.data.experimental.AUTOTUNE)
    print("Training data loaded")

    if args["valid_label"] and args["valid_img_dir"]:
        evaluator = Evaluator(net, args["valid_label"], args["valid_img_dir"],
                              args["valid_batch_size"])
        print("Validation data loaded")
    else:
        evaluator = None

    learning_rate = keras.optimizers.schedules.ExponentialDecay(
        args["learning_rate"],
        decay_steps=args["decay_steps"],
        decay_rate=args["decay_rate"],
        staircase=args["staircase"])
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    best_val_loss = float("inf")
    for step, (imgs, labels, label_lengths) in enumerate(train_dataset):
        if step == args["num_steps"]:
            break
        with tf.GradientTape() as tape:
            logits = model(imgs, training=True)
            batch_size, times = logits.shape[:2]
            logits_lengths = tf.expand_dims(tf.tile(
                tf.constant([times], tf.int32),
                tf.constant([batch_size], tf.int32)),
                                            axis=1)
            loss_value = ctc_loss(labels, logits, logits_lengths,
                                  label_lengths)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        loss_value = float(loss_value)
        print("[batch {} - Seen: {} samples] "
              "Training loss: {}, "
              "learning_rate: {} ".format(
                  step + 1, (step + 1) * args["batch_size"], float(loss_value),
                  optimizer._decayed_lr(tf.float32).numpy()))

        # Log every 10 batches.
        if step % args["valid_interval"] == 0 and step > 0:
            if evaluator is not None:
                val_loss, _, _ = evaluator.evaluate()
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    if save_weights_only:
                        net.save_weights(
                            os.path.join(args["saved_dir"], "weights_best.pb"))
                    else:
                        net.save(
                            os.path.join(args["saved_dir"], "model_best.pb"))
                    print("save best at batch: {}, loss: {}".format(
                        step + 1, val_loss))

    if save_weights_only:
        net.save_weights(os.path.join(args["saved_dir"], "weights_last.pb"))
    else:
        net.save(os.path.join(args["saved_dir"], "model_last.pb"))
Exemplo n.º 4
0
from database import Patient,Medic,Agenda
from database import Speciality
from database import User
from database import Turno
from pony.orm import commit
from pony.orm import select,delete
from loader import Loader
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import calendar
from datetime import date
import datetime

# Load configurations from YML file.
config = Loader().settings

# HELPER #

def agenda_cargador(data):
    
    agenda = {}
    meses = ["Enero-1","Febrero-2","Marzo-3","Abril-4","Mayo-5","Junio-6",
                        "Julio-7","Agosto-8",
                        "Septiembre-9","Octubre-10",
                        "Noviembre-11","Diciembre-12"]


    for mes in meses:
        agenda[mes] = {
            'lunes': {
Exemplo n.º 5
0
from os import environ
from loader import Loader
import actions

LOADER = Loader()


def lambda_handler(event, context):
    # return event
    status = LOADER.personalize_cli.describe_event_tracker(
        eventTrackerArn=event['eventTrackerArn'])['eventTracker']

    actions.take_action_delete(status['status'])
    return status['status']
Exemplo n.º 6
0
    return args, batch1, batch2


args, batch1, batch2 = parse_args()
if not args.restore_folder:
    with open(os.path.join(args.savefolder, 'args.txt'), 'w+') as f:
        for arg in vars(args):
            argstring = "{}: {}\n".format(arg, vars(args)[arg])
            f.write(argstring)
            print(argstring[:-1])

if not os.path.exists("{}/output".format(args.savefolder)):
    os.mkdir("{}/output".format(args.savefolder))

load1 = Loader(batch1, labels=np.arange((batch1.shape[0])), shuffle=True)
load2 = Loader(batch2, labels=np.arange((batch2.shape[0])), shuffle=True)

print("Domain 1 shape: {}".format(batch1.shape))
print("Domain 2 shape: {}".format(batch2.shape))
model = args.model(args, x1=batch1, x2=batch2, name=args.modelname)

plt.ioff()
fig = plt.figure(figsize=(4, 10))
np.set_printoptions(precision=3)
decay = model.args.learning_rate / (args.training_steps -
                                    args.training_steps_decayafter)

for i in range(1, args.training_steps):

    if i % 10 == 0: print("Iter {} ({})".format(i, now()))
Exemplo n.º 7
0
def data_processor(filename, config):
    input_image = tiff.imread('input_images/{0}'.format(filename))
    label_image = tiff.imread('label_images/{0}'.format(filename[:-1]))
    label_image = label_image[:, :, :1] / 255

    if config.augment:
        angle = randint(0, 360)
        input_image = ndimage.rotate(input_image, angle, reshape=False)
        label_image = ndimage.rotate(label_image, angle, reshape=False)

    input_image = (input_image - np.mean(input_image)) / np.std(input_image)
    return (filename, input_image, label_image)


test_label_files = label_files[:5]
test_loader = Loader(test_label_files, 5, processor=data_processor)
test_loader.start()
test_batch = test_loader.get_batch(5)
test_loader.stop()

batch_size = 2
train_label_files = label_files[5:]
train_loader = Loader(train_label_files,
                      batch_size * 4,
                      processor=data_processor,
                      randomize=True,
                      augment=True)
train_loader.start()

shouldLoad = False
modelName = m.modelName + "-x2-msr"
Exemplo n.º 8
0
from handlers.lol import LolHandler
from handlers.start import StartHandler
from handlers.test import TestHandler
from loader import Loader
from store import GLOBAL_STORE


loader = Loader([StartHandler, LolHandler, TestHandler, 2])


# print(GLOBAL_STORE.fetch('start').command_handler())
# print(GLOBAL_STORE.fetch('lol').command_handler())
# print(GLOBAL_STORE.fetch('test').command_handler())

print(loader.get(StartHandler).command_handler())
Exemplo n.º 9
0
def output_batch_correction(rawfiles):
    """Use already trained models to output batch corrected data."""
    try:
        model_dir = os.path.join(args.output_dir, 'models', 'batch_corrected')
        data_dir = os.path.join(args.output_dir, 'batch_corrected')
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.mkdir(data_dir)
        ref = rawfiles[0]
        refx = get_data(ref)
        refname = os.path.split(ref)[-1]

        print("Starting to output {} batch corrected files...".format(len(rawfiles)))
        for counter, nonref in enumerate(rawfiles[1:]):
            nonrefname = os.path.split(nonref)[-1]
            print("Outputing file {}".format(counter))

            nonrefx = get_data(nonref)
            alldata = np.concatenate([refx.as_matrix(), nonrefx.as_matrix()], axis=0)
            alllabels = np.concatenate([np.zeros(refx.shape[0]), np.ones(nonrefx.shape[0])], axis=0)

            load = Loader(data=alldata, labels=alllabels, shuffle=False)

            tf.reset_default_graph()
            restore_folder = os.path.join(model_dir, nonrefname)
            saucie = SAUCIE(None, restore_folder=restore_folder)

            recon, labels = saucie.get_layer(load, 'output')

            #recon = sinh(recon)

            # write out reference file
            if args.cols:
                inds = args.cols
            else:
                inds = range(recon.shape[1])

            if counter == 0:
                reconref = recon[labels == 0]
                rawdata = get_data(ref, return_rawfile=True)
                for ind, c in enumerate(inds):
                    rawdata.iloc[:, c] = reconref[:, ind]

                outfileref = os.path.join(data_dir, refname)
                write_data(outfileref, rawdata.columns.tolist(), rawdata)
                #fcswrite.write_fcs(outfileref, rawdata.columns.tolist(), rawdata)

            # write out nonreference file
            reconnonref = recon[labels == 1]
            rawdata = get_data(nonref, return_rawfile=True)

            for ind, c in enumerate(inds):
                rawdata.iloc[:, c] = reconnonref[:, ind]

            outfilenonref = os.path.join(data_dir, nonrefname)
            write_data(outfilenonref, rawdata.columns.tolist(), rawdata)
            #fcswrite.write_fcs(outfilenonref, rawdata.columns.tolist(), rawdata)

    except Exception as ex:
        # if it didn't run all the way through, clean everything up and remove it
        shutil.rmtree(data_dir)
        raise(ex)
Exemplo n.º 10
0
from loader import Loader

loader = Loader('faces94/malestaff/voudcx', 'faces94/malestaff/tony')
a, b = loader.get_all()
print(a.shape)
print(b.shape)
Exemplo n.º 11
0
 def test_valid_loader_init(self):
     loader = Loader(model_weights_path="../config/yolov3.weights",
                     model_config_path="../config/yolov3.cfg",
                     coco_names="../config/coco.names")
     assert isinstance(loader, Loader)
 def _get_default_environment(self):
     return SandboxedEnvironment(
         extensions=['jinja2.ext.loopcontrols', 'jinja2.ext.do'],
         loader=Loader())
Exemplo n.º 13
0
pass  # TESTING ONLY ...

# ------------------------- MAIN PROGRAM --------------------------------------

if __name__ == '__main__':

    from statistics import Statistics
    from loader import Loader
    from view import View

    print("\nEXPERIMENTS\n")

    # SETTING A DATASET LOADER

    # The path to datasets may be optionally changed ...
    loader = Loader(path='../datasets/UCI datasets/')

    # SETTING DATASETS

    #  datasets = ( 'arrhythmia', ('balloons', 1), ('balloons', 2), \
    #               'breast-cancer', 'census', 'chess', 'fars', 'flare', \
    #               'kr-vs-k', ('lymphography', 1), ('lymphography', 2), \
    #               'mushroom', 'SPECT-binary', 'SPECT-full', 'splice' )

    #  datasets = ( 'arrhythmia', ('balloons', 1), ('balloons', 2), \
    #               'breast-cancer', 'chess' )

    #  datasets = ( 'arrhythmia', ('balloons', 1), ('balloons', 2), 'flare' )

    #  datasets = ( 'flare', )
Exemplo n.º 14
0
def run_train(hparams, inputs, loss, train_op):
    # Decode inputs
    mixture, vocals, instrumentals = inputs

    # Prepare data
    loader = Loader(hparams)

    # Set config for GPU
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Setup saver for saving checkpoints
    saver = tf.train.Saver()

    with tf.Session(config=config) as sess:

        # Setup train/eval writers and log hparams
        # train_writer, eval_writer = setup(hparams, sess, saver)
        # log_hparams(hparams, train_writer)
        sess.run(tf.global_variables_initializer())
        with open(hparams.log_file_train, 'a+') as f:
            f.write('\n**************** NEW TRAIN SESSION ****************\n')
        with open(hparams.log_file_val, 'a+') as f:
            f.write(
                '\n**************** NEW VALIDATION SESSION ****************\n')

        start = time.time()
        n_evals = 1
        n_saves = 1

        # Iterate through max number of training steps
        for step in range(hparams.max_steps):
            feed_dict = make_feed_dict(loader, inputs)

            # Run session to get loss and gradients
            result = sess.run([loss, train_op], feed_dict=feed_dict)

            raw_loss, _ = result

            # Print loss per time step
            if step % hparams.print_loss_frequency == 0:
                print('%d (%d %d%%) Loss: %.8f' %
                      (time.time() - start, step,
                       float(step) / hparams.max_steps * 100, raw_loss))
            if step % hparams.train_loss_frequency == 0:
                with open(hparams.log_file_train, 'a+') as f:
                    f.write('%d (%d %d%%) Loss: %.8f\n' %
                            (time.time() - start, step,
                             float(step) / hparams.max_steps * 100, raw_loss))

            # Run Eval
            if step % hparams.val_loss_frequency == 0:
                feed_dict_val = make_feed_dict(loader, inputs, val=True)
                result = sess.run(loss, feed_dict=feed_dict_val)
                raw_loss = result
                with open(hparams.log_file_val, 'a+') as f:
                    f.write('%d (%d %d%%) Loss: %.8f\n' %
                            (time.time() - start, step,
                             float(step) / hparams.max_steps * 100, raw_loss))
                n_evals += 1

            if step > 10 and (time.time() -
                              start) // hparams.save_model_interval >= n_saves:
                print("Saving Model")
                makedirs('./' + hparams.save_dir + ('%s/' % str(step)))
                saver.save(sess, './' + hparams.save_dir + ('%s/' % str(step)),
                           step)
                n_saves += 1
import numpy as np
from loader import Loader
import matplotlib.pyplot as plt

load = Loader("testing")

# Load in the digits from the dataset, each in their own array
zeros, labels = load.load_dataset([0])
ones, labels = load.load_dataset([1])
twos, labels = load.load_dataset([2])
threes, labels = load.load_dataset([3])
fours, labels = load.load_dataset([4])
fives, labels = load.load_dataset([5])
sixes, labels = load.load_dataset([6])
sevens, labels = load.load_dataset([7])
eights, labels = load.load_dataset([8])
nines, labels = load.load_dataset([9])

# Find the minimum number of digits so we don't go outside
# the bounds of the array
lengths = [len(zeros),len(ones),len(twos),len(threes),\
    len(fours),len(fives),len(sixes),len(sevens),len(eights)]

length = min(lengths)

# Set up checking for similarity level between the same digits
sim0 = []
sim1 = []
sim2 = []
sim3 = []
sim4 = []
Exemplo n.º 16
0
def output_cluster(inputfiles):
    """Use already trained model to output clustered data."""
    try:
        model_dir = os.path.join(args.output_dir, 'models', 'clustered')
        data_dir = os.path.join(args.output_dir, 'clustered')
        if os.path.exists(data_dir):
            shutil.rmtree(data_dir)
        os.mkdir(data_dir)

        tf.reset_default_graph()
        saucie = SAUCIE(None, restore_folder=model_dir)

        print("Finding all binary codes")
        all_codes = {}
        for counter, f in enumerate(inputfiles):
            x = get_data(f)
            load = Loader(data=x, shuffle=False)

            acts = saucie.get_layer(load, 'layer_c')
            acts = acts / acts.max()
            binarized = np.where(acts > .000001, 1, 0)

            unique_rows, counts = np.unique(binarized, axis=0, return_counts=True)
            for unique_row in unique_rows:
                unique_row = tuple(unique_row.tolist())
                if unique_row not in all_codes:
                    all_codes[unique_row] = len(all_codes)

        print("Found {} clusters".format(len(all_codes)))

        print("Starting to output {} clustered files...".format(len(inputfiles)))
        for counter, f in enumerate(inputfiles):
            fname = os.path.split(f)[-1]
            print("Outputing file {}".format(counter))
            x = get_data(f)
            load = Loader(data=x, shuffle=False)
            acts = saucie.get_layer(load, 'layer_c')
            acts = acts / acts.max()
            binarized = np.where(acts > .000001, 1, 0)

            clusters = -1 * np.ones(x.shape[0])
            for code in all_codes:
                rows_equal_to_this_code = np.where(np.all(binarized == code, axis=1))[0]
                clusters[rows_equal_to_this_code] = all_codes[code]


            embeddings = saucie.get_layer(load, 'embeddings')

            rawdata = get_data(f, return_rawfile=True)
            outcols = rawdata.columns.tolist() + ['Cluster', 'Embedding_SAUCIE1', 'Embedding_SAUCIE2']
            rawdata = pd.concat([rawdata, pd.DataFrame(clusters), pd.DataFrame(embeddings[:, 0]), pd.DataFrame(embeddings[:, 1])], axis=1)

            outfile = os.path.join(data_dir, fname)

            #fcswrite.write_fcs(outfile, outcols, rawdata, compat_chn_names=False, compat_percent=False, compat_negative=False)
            write_data(outfile, outcols, rawdata)

    except Exception as ex:
        # if it didn't run all the way through, clean everything up and remove it
        shutil.rmtree(data_dir)
        raise(ex)
Exemplo n.º 17
0
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold


def test_model(model, X, Y, folds=5):
    cv = StratifiedKFold(n_splits=5)
    return cross_val_score(model, X, Y, cv=cv, n_jobs=-1)


def print_score(scores):
    return "Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)


if __name__ == '__main__':

    from sklearn import svm
    from datetime import datetime
    from loader import Loader

    print str(datetime.now()) + " loading data"
    l = Loader()
    X, Y = l.load_data()

    print str(datetime.now()) + " testing"
    scores = test_model(svm.SVC(C=1), X, Y)
    print str(datetime.now()) + " done"
    print print_score(scores)
Exemplo n.º 18
0
 def __init__(self):
     #uma dupla é formada por um motorista e um carregador
     self.loader = Loader()
     self.driver = Driver()
Exemplo n.º 19
0
        yhat = model.caption_model.predict([photo, sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = loader.idxtoword[yhat]
        in_text += ' ' + word
        print(word)
        if word == loader.STOP:
            break
    final = in_text.split()
    final = final[1:-1]
    final = ' '.join(final)
    return final


if __name__ == '__main__':

    loader = Loader()
    model = Model()

    loader.load(model)

    model.build(loader)
    model.train()

    # generate captions
    for img in loader.train_descriptions.keys():
        image = loader.encoding_train[f'{img}.jpg'].reshape(
            (1, model.OUTPUT_DIM))
        x = plt.imread(
            os.path.join(loader.root_path, 'ImgFlip500K_Dataset', 'templates',
                         'img', f'{img}.jpg'))
        plt.imshow(x)
Exemplo n.º 20
0
 def setUpClass(cls):
     cls.loader = Loader()
     cls.sc = cls.create_spark_context()
Exemplo n.º 21
0
 def make_loader(self, quiet=None):
     if quiet is None:
         quiet = self.quiet
     return Loader(self.log_drive, self.group_name, quiet=quiet)
Exemplo n.º 22
0
           c_vector=c_vector)

# Use Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Create a supervised trainer
trainer = create_supervised_trainer(model, optimizer, model.loss)

# Use Mean Squared Error as evaluation metric
metrics = {'evaluation': MeanSquaredError()}

# Create a supervised evaluator
evaluator = create_supervised_evaluator(model, metrics=metrics)

# Load the train and test data
train_loader = Loader(train_x, train_y, batchsize=1024)
test_loader = Loader(test_x, test_y, batchsize=1024)


def log_training_loss(engine, log_interval=500):
    """
    Function to log the training loss
    """
    model.itr = engine.state.iteration  # Keep track of iterations
    if model.itr % log_interval == 0:
        fmt = "Epoch[{}] Iteration[{}/{}] Loss: {:.2f}"
        # Keep track of epochs and outputs
        msg = fmt.format(engine.state.epoch, engine.state.iteration,
                         len(train_loader), engine.state.output)
        print(msg)
Exemplo n.º 23
0
    def __init__(self):

        self.loader = Loader()
        self.data_process = DataProcess()
Exemplo n.º 24
0
from prefect import Flow, task
from extracter import Extracter
from transformer import Transformer
from loader import Loader

with Flow("ETL") as flow:
    url = 'https://www.marketbeat.com/stocks/NASDAQ/MSFT/price-target/?MostRecent=0'
    e = Extracter(url).extract()
    df = Transformer().transform(text=e)
    l = Loader().load(df)

flow.run()


Exemplo n.º 25
0
    """
    reconstructed_b1 = b1 @ pcs
    reconstructed_b2 = b2 @ pcs
    corr = tfp.stats.correlation(reconstructed_b1, reconstructed_b2, sample_axis=1, event_axis=None)
    loss = tf.reduce_sum(1 - tf.math.abs(corr))
    return loss

# Load the data
rna_dataset, atac_dataset, rna_pca_components, atac_pca_components, rna_cluster_labels, atac_cluster_labels = get_data()
print("rna data shape: {} atac data shape: {}".format(rna_dataset.shape, atac_dataset.shape))
print("rna PCs shape: {} atac PCs shape: {}".format(rna_pca_components.shape, atac_pca_components.shape))
print('rna cluster labels len: {}'.format(rna_cluster_labels.shape))
print('atac cluster labels len: {}'.format(atac_cluster_labels.shape))

# Prepare the loaders
load_rna = Loader(rna_dataset, labels=rna_cluster_labels, shuffle=True)
load_atac = Loader(atac_dataset, labels=atac_cluster_labels, shuffle=True)
batch_size = 100

# Build the tf graph
magan = MAGAN(dim_b1=rna_dataset.shape[1], dim_b2=atac_dataset.shape[1], correspondence_loss=correspondence_loss, xb1_pcs=rna_pca_components, xb2_pcs=atac_pca_components)

# Data save directory
save_dir = './scicar_models/model3'

# Train
for i in range(1, 10000):
    if i % 100 == 0: print("Iter {} ({})".format(i, now()))
    rna_, rna_labels_ = load_rna.next_batch(batch_size)
    atac_, atac_labels_ = load_atac.next_batch(batch_size)
Exemplo n.º 26
0
    def __init__(self):
        self.loader = Loader()

        self.datasets = {}
Exemplo n.º 27
0
import tensorflow as tf
import numpy as np
from loader import Loader
from sklearn.metrics import f1_score
from nn_model import Network
from config import *

if __name__ == '__main__':
    network = Network([256, 128, 64, 29], tf.nn.relu, 300, 1e-2)
    loader = Loader(LABEL_FILE_PATH, DATA_FILE_PATH, 7, TRAINING_SET_PERCENT,
                    FIRST_TIME_TO_READ_FILE)

    # max= 0
    #
    # for i in range(loader.y_test.shape[0]):
    #     if loader.y_test[i][0] > max:
    #         max = loader.y_train[i][0]
    # print(max)

    train_step = 2000

    for i in range(train_step):
        network.train(loader.x_train, np.squeeze(loader.y_train))

    y_predict = np.squeeze(network.output(loader.x_test))
    print(y_predict)
    print(np.squeeze(loader.y_test))
    print(f1_score(np.squeeze(loader.y_test), y_predict, average='macro'))
Exemplo n.º 28
0
from __future__ import print_function
from loader import Loader
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

ld = Loader()

plt_X = []
plt_Y = []

[X,
 y] = ld.load_data('datasets/Live_Television.train_features_100_percent.csv')

for n_clusters in range(1, 16):
    km = KMeans(n_clusters=n_clusters)
    km.fit(X)
    plt_X.append(n_clusters)
    plt_Y.append(km.inertia_)

plt.plot(plt_X, plt_Y)
plt.ylabel('Within groups sum of squares')
plt.xlabel('Number of Clusters')
plt.savefig('figures/' + 'Kmeans_Live_Television_Elbow_Curve.png')
Exemplo n.º 29
0
def train_model(tokenizer, model, n_epochs=4000):
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    model.train()

    l = Loader()

    l.load_files()

    optim = AdamW(model.parameters(), lr=5e-5)

    max_len = tokenizer.max_model_input_sizes[
        model_name] if model_name != 'lstm' else 512
    print("max_len", max_len)

    for epoch_i in tqdm(list(range(n_epochs))):
        print(epoch_i)
        n_res = 0
        sum_res = 0
        for epoch in l.next_epoch(batch_size=4, simulate=True):
            batch = epoch[0]
            labels = epoch[1]
            optim.zero_grad()
            tokenized = tokenizer(list(batch),
                                  padding=True,
                                  truncation=True,
                                  is_split_into_words=True,
                                  return_length=True,
                                  max_length=max_len)
            input_ids = torch.tensor(tokenized["input_ids"]).to(device)
            attention_mask = torch.tensor(
                tokenized["attention_mask"]).to(device)
            labels_tensor = torch.tensor([
                list(label) + [-100] * (length - len(label)) if
                (max_len is None) or
                (len(label) <= max_len) else list(label)[:max_len]
                for label, length in zip(labels, tokenized["length"])
            ]).to(device)
            outputs = model(input_ids,
                            attention_mask=attention_mask,
                            labels=labels_tensor)
            logits = outputs.logits.detach().max(axis=-1)[1]
            mask = labels_tensor != -100
            n_res += mask.float().sum()
            sum_res += ((logits == labels_tensor) & mask).float().sum()
            loss = outputs[0]
            loss.backward()
            optim.step()

        if epoch_i % 50 == 0:
            del input_ids
            del attention_mask
            del labels_tensor
            del outputs
            del logits
            del mask
            torch.cuda.empty_cache()
            model.save_pretrained("models/multilabel/%s" % model_name)
            model.eval()
            train2_acc, dev_acc = benchmark(tokenizer, model, l)
            model.train()

        train_acc = sum_res / n_res
        print("train_acc", train_acc)

    model.eval()
    return model, train_acc, l
Exemplo n.º 30
0
if __name__ == "__main__":
    start_date = '08-08-2016'
    num_days = 20
    date_list = [
        datetime.strptime(start_date, "%d-%m-%Y") + timedelta(days=x)
        for x in range(num_days)
    ]
    gs_dict = '../data/gensim/wiki_en_wordids.txt.bz2'

    builder = Builder()
    builder.addExtractor(LDAExtractor(dictionary=gs_dict))
    builder.addExtractor(KeywordSentimentExtractor())
    publishers = AgentManager(TopicSentimentAgent)
    relations = CoTopicRelations()
    db_params = {'dbname': 'moreover', 'user': '******', 'password': '******'}
    data = Loader(**db_params)
    query_file = "simple query"
    for date in date_list:
        print(date)
        data.executeQuery(query_file,
                          date)  # obtain the data for the given query
        print('pricessing articles')
        builder.process(data, publishers, agent_field_name='source'
                        )  # extract the relevant information from the data
        print('updating extractors')
        builder.updateExtractors()
        agents = publishers.agents
        relations.updateRelations(agents)

    agents_filename = '../data/trial1_agents.json'
    relations_filename = '../data/trial1_relations.json'