Пример #1
0
def load(args, client):
    if args["input"] is not None:
        load_file(client,
                  args["input"],
                  typecast=args["typecast"],
                  cast=args["cast"],
                  verbose=args["verbose"])
    else:
        load_folder(client,
                    args["folder"],
                    typecast=args["typecast"],
                    cast=args["cast"],
                    verbose=args["verbose"])
Пример #2
0
def import_topic_data():
    es = Elasticsearch()
    df = load_file('topics_desc.pkl')

    df['wagi'] = [[w[1] for w in words] for words in df.words]
    df['words'] = [[w[0] for w in words] for words in df.words]
    df.rename(columns={"words": "słowa", "desc": "opis"}, inplace=True)

    def to_doc(index, row):
        return {"_index": 'tematy', "_id": index, "_source": row.to_dict()}

    docs_gen = (to_doc(index, row) for index, row in df.iterrows())

    tematy_settings = {
        "mappings": {
            "properties": {
                "embedding": {
                    "type": "dense_vector",
                    "dims": 768
                }
            }
        }
    }

    es.indices.create(index='tematy', ignore=400, body=tematy_settings)
    helpers.bulk(es, tqdm(docs_gen, total=len(df)))
Пример #3
0
def last_two():
    P = data.load_file()
    params = {
        "epochs": 4000,
        "neurons": 1024,
        "learn_method": 'classic'
    }

    # generate weight matrix
    W = np.random.randn(params['neurons'], params['neurons'])
    W = (W + W.T) / 2
    W = W - np.diag(W.diagonal())
    p = np.random.randint(-1, 1, params['neurons']).reshape(1,-1)
    p = (p*2) + 1
    Hop = HopfieldNet(p)
    Hop.W = W
    recalled_set, energy = Hop.sequential_recall_shuffle(p, epochs=4000)
    plt.imshow(recalled_set.reshape(32,32))
    plt.show()
    plt.plot(range(len(energy[0])), energy[0])
    plt.xlabel('Epoch', fontsize=16)
    plt.ylabel('Energy', fontsize=16)
    plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
    plt.savefig('Energy_sym.png')
    plt.show()
Пример #4
0
def first():
    P = data.load_file()
    params = {"epochs": 4000, "neurons": 1024, "learn_method": 'classic'}

    # ====== train ========================================
    p0 = P[0].ravel()
    p1 = P[1].ravel()
    p2 = P[2].ravel()
    train_set = np.vstack((p0, p1))
    train_set = np.vstack((train_set, p2))

    Hop = HopfieldNet(train_set)
    Hop.batch_train()
    recall_set = np.vstack((p0, p1))
    recall_set = np.vstack((recall_set, p2))

    # ====== add one more =========================================
    p3 = P[3].ravel()
    p4 = P[4].ravel()
    p5 = P[5].ravel()
    p6 = P[6].ravel()
    add_p = {0: p3, 1: p4, 2: p5, 3: p6}

    recalled_set = {}

    Hop = HopfieldNet(train_set)
    Hop.batch_train()
    recalled_set[0], energy = Hop.sequential_recall_shuffle(recall_set,
                                                            epochs=2)

    for i in add_p.keys():
        print(i)
        train_set = np.vstack((train_set, add_p[i]))
        Hop = HopfieldNet(train_set)
        Hop.batch_train()
        recall_set = np.vstack((recall_set, add_p[i]))
        recalled_set[i + 1], energy = Hop.sequential_recall_shuffle(recall_set,
                                                                    epochs=2)

    error_pattern = {}
    error_pattern[0] = []
    error_pattern[1] = []
    error_pattern[2] = []
    for i in recalled_set.keys():
        error_pattern[0] += [abs(np.mean(recalled_set[i][0, :] - p0))]
        error_pattern[1] += [abs(np.mean(recalled_set[i][1, :] - p1))]
        error_pattern[2] += [abs(np.mean(recalled_set[i][2, :] - p2))]

        fig, ax = plt.subplots(1, 3)
        ax[0].imshow(recalled_set[i][0, :].reshape(32, 32), origin="lower")
        ax[1].imshow(recalled_set[i][1, :].reshape(32, 32), origin="lower")
        ax[2].imshow(recalled_set[i][2, :].reshape(32, 32), origin="lower")
        plt.show()

    plt.plot(range(len(error_pattern[0])), np.array(error_pattern[0]))
    plt.plot(range(len(error_pattern[1])), np.array(error_pattern[1]))
    plt.plot(range(len(error_pattern[2])), np.array(error_pattern[2]))
    plt.xlabel('Number of patterns added', fontsize=16)
    plt.ylabel('Error %', fontsize=16)
    plt.show()
Пример #5
0
def test():
    #The below line will tell you where to put .theanoarc for configuration
    #print os.path.expanduser('~/.theanorc.txt')
    params = serial.load('../networks/1457038649_26229_9000_NET0_PARAMS')
    net = net_configs.createNet0(epoch=0)
    net.load_params_from(params)
    img = load_file('../data/diabetic_ret/dataset_256_norm/test/30307_left.jpeg')
    prediction = net.predict(img)
    print prediction
Пример #6
0
def load_mtl(name):
    file = data.load_file('models/' + name)
    mtl = {}
    name = None
    for line in file:
        line = line.strip()
        if not line or line[0] == '#': continue
        tokens = line.split()
        cmd = tokens.pop(0)
        if cmd == 'newmtl':
            name = tokens[0]
        elif cmd == 'Kd':
            mtl[name] = tuple(int(float(x) * 255) for x in tokens)
    return mtl
Пример #7
0
def load_obj_data(name):
    #if name in _obj_cache:
    #    return _obj_cache[name]
    file = data.load_file('models/' + name)
    vertices = []
    faces = []
    mtl = {}
    rot = rx(90) * 1 #reflect_x
    color = (127, 127, 127)
    for line in file:
        line = line.strip()
        if not line or line[0] == '#': continue
        tokens = line.split()
        cmd = tokens.pop(0)
        if cmd == 'v':
            vertices.append(rot(v3(float(x) for x in tokens)))
        elif cmd == 'f':
            faces.append((tuple(int(x.split('/')[0]) for x in tokens), color))
        elif cmd == 'mtllib':
            mtl = load_mtl(tokens[0])
        elif cmd == 'usemtl':
            color = mtl[tokens[0]]
    xs, ys, zs = zip(*vertices)
    bbox = Box(min(xs), max(xs), min(ys), max(ys), min(zs), max(zs))
    vdata = []
    cdata = []
    for face, color in faces:
        vxs = [vertices[n-1] for n in face]
        for i in xrange(len(vxs) - 2):
            vdata.extend(vxs[0])
            cdata.extend(color)
            vdata.extend(vxs[i+2])
            cdata.extend(color)
            vdata.extend(vxs[i+1])
            cdata.extend(color)
    #if DEBUG:
    #    print name, len(vdata) // 9
    return vdata, cdata, bbox
Пример #8
0
        if dmg < 0:
            dmg = 0
        print "%s landed a hit! Dealing %i damage." % (dict1['name'], dmg)
        dict2['hp'] -= dmg
    else:
        print "%s missed!" % (dict1['name'])
    if dict2['hp'] <= 0:
        alive = 2
    return alive


ans = raw_input('New player? ')
if ans == 'yes' or ans == 'Yes' or ans == 'y' or ans == '1':
    filename = raw_input('Name your Character: ')
    data.new_file(filename)
    player = data.load_file(filename)
else:
    filename = raw_input('What is your Character Name? ')
    player = data.load_file(filename)

ans = raw_input('Wanna use health potion? ')
if ans == 'yes' or ans == 'Yes' or ans == 'y' or ans == '1':
    player['hp'] += 5
    if player['hp'] > player['life']:
        player['hp'] = player['life']

id = raw_input('Enter beast code: ')
monster = beastiary.beast(id)
print "You will fight a %s" % (monster['name'])
raw_input("\nPress enter to continue.")
Пример #9
0
        'epochs_all': epochs_all,
        'iterations_all': iterations_all,
        'batch_size_all': batch_size_all,
        'learning_rate_all': learning_rate_all,
    }
    save_path = 'models/{}/'.format(datetime.now().strftime("%Y%m%d_%H%M%S"))
    os.mkdir(save_path)
    with open(save_path + 'hyperparameters.json', 'w') as f:
        json.dump(hyperparameters, f)

    print('LOADING DATA...')
    # data_train = load_file('data/LREC/train2012')
    # data_valid = load_file('data/LREC/dev2012')
    # data_test = load_file('data/LREC/test2011')
    # data_test_asr = load_file('data/LREC/test2011asr')
    data_train = load_file('data/NPR-podcasts/train')
    data_valid = load_file('data/NPR-podcasts/valid')
    data_test = load_file('data/NPR-podcasts/test')

    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                              do_lower_case=True)

    print('PREPROCESSING DATA...')
    X_train, y_train = preprocess_data(data_train, tokenizer, punctuation_enc,
                                       segment_size)
    X_valid, y_valid = preprocess_data(data_valid, tokenizer, punctuation_enc,
                                       segment_size)

    print('INITIALIZING MODEL...')
    output_size = len(punctuation_enc)
    bert_punc = nn.DataParallel(
Пример #10
0
    noisy_pattern = np.copy(pattern)

    for i in picks:
        noisy_pattern[i] = pattern[i] * -1

    return noisy_pattern


def calc_acc(original_pattern, predicted_pattern):
    """
    Calculate the accuracy of the model as the difference between the patterns.
    The flipped pattern also counts as a correct prediction.
    :param original_pattern: the target pattern
    :param predicted_pattern: the outcome of the model
    :return: accuracy: [0, 100]
    """
    acc = np.sum(original_pattern == predicted_pattern) / float(
        original_pattern.shape[0])
    negative_pattern = original_pattern * -1
    neg_acc = np.sum(negative_pattern == predicted_pattern) / float(
        original_pattern.shape[0])

    if neg_acc > acc:
        acc = -neg_acc
    return acc


if __name__ == '__main__':
    p = data.load_file()
    part_3_4()
Пример #11
0
        'learning_rate_all': learning_rate_all,
    }
    train_data_path = "/media/nas/samir-data/punctuation/all_datasets/data_dir_punctuator_v2_wait"
    train_data_path2 = "/media/nas/samir-data/punctuation/all_datasets/data_europarl/training-monolingual-europarl"
    data_path = "/media/nas/samir-data/punctuation/all_datasets/data_dir_punctuator_v3"
    save_path = 'models/{}/'.format(datetime.now().strftime("%Y%m%d_%H%M%S"))
    os.mkdir(save_path)
    with open(save_path + 'hyperparameters.json', 'w') as f:
        json.dump(hyperparameters, f)

    print('LOADING DATA...')
    #data_train = load_file(os.path.join(train_data_path2, 'europarl-v7.fr_cleaned.txt'))
    #data_train = load_file2(os.path.join(train_data_path, 'cleaned_leMonde_with_punct_v2_for_punctuator.train.txt'), segment_word)
    #data_train = load_file(os.path.join(train_data_path2, 'europarl-v7.fr_cleaned.txt'))
    data_train = load_file(
        os.path.join(
            data_path,
            'subset_cleaned_leMonde_with_punct_v2_for_punctuator.train.txt'))
    data_valid = load_file(
        os.path.join(
            data_path,
            'subset_cleaned_leMonde_with_punct_v2_for_punctuator.dev.txt'))

    tokenizer = CamembertTokenizer.from_pretrained('camembert-base')

    print('PREPROCESSING DATA...')
    X_train, y_train = encode_data3(data_train, tokenizer, puncs,
                                    punctuation_enc, segment_size)
    X_valid, y_valid = encode_data3(data_valid, tokenizer, puncs,
                                    punctuation_enc, segment_size)

    print('INITIALIZING MODEL...')
Пример #12
0
        if y[i] == 1:
            actual += 1
            if d(X[i], Z) == 0:
                common += 1

    print('No of actual outiers : ', actual)
    print('Precision : ', common/len(Z))
    print('Recall : ', common/actual)
    print('Cost : ', cost(C, X, Z))


if __name__ == "__main__":

    # Loading the existing data
    if real_data:
        temp_X, temp_Y = load_file(load_data)
        random.shuffle(temp_X)
        random.shuffle(temp_Y)

        U, y = removeDups(temp_X, temp_Y)

    # Synthetic Data
    else:
        U, y, C_, Z_, ids_ = make_data(5, 0, 8, 50)


    # # X_train, X_test, y_train, y_test = train_test_split(np.array(temp_X), np.array(temp_Y), test_size=0.33, random_state=42)
    # # print(X_test.shape)

    # # data is finally in U and labels in y
    # print('u shape ', len(U),',',len(U[0]))
    # data_train = load_file('data/LREC/train2012')
    # data_valid = load_file('data/LREC/dev2012')
    # data_test = load_file('data/LREC/test2011')
    # data_test_asr = load_file('data/LREC/test2011asr')

    # import os
    # wpath = "C:/Users/HP/Google Drive/Colab Notebooks/Punctuation_Restoration/BertPunc-master_GPU/data/"
    # os.chdir(wpath)
    # print(os.getcwd())

    with open("train2012_MAX.txt", 'r', encoding="utf8", errors="ignore"
              ) as f:  # encoding='cp1252', encoding='utf8', errors="replace"
        data_train = f.readlines()

    # data_train = load_file('data/test2011')
    data_valid = load_file('data/dev2012')
    data_test = load_file('data/test2011')

    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                              do_lower_case=True)

    print('PREPROCESSING DATA...')
    X_train, y_train = preprocess_data(data_train, tokenizer, punctuation_enc,
                                       segment_size)
    X_valid, y_valid = preprocess_data(data_valid, tokenizer, punctuation_enc,
                                       segment_size)

    print('INITIALIZING MODEL...')
    output_size = len(punctuation_enc)
    bert_punc = nn.DataParallel(
        BertPunc(segment_size, output_size, dropout).cuda())
import data
import algorithm

# $, A, C, G, T, N/X 从 0 开始编码
# 前向扩展 => W -> Wa
# 后向扩展 => W -> aW
# 对于空串,k = l = 0, s = len(T)-1

# reference =["ACCTTGA"] # 不包含终止符 $

if __name__ == "__main__":
    print('hello, my bwa tools!')

    print('********************** DATA LOAD *************************')

    reference = data.load_file(
        'd:\\short-read-aligment-based-on-the-BWT\\tools\\my_ref.fa')

    # fmd_index = bwt.BWA_FMD_index(reference)

    fmd_index = bwt.BWA_FMD_index_noend(reference)

    print('************************ RESULT **************************')

    # print('B :', fm_index.data['B'])
    # print('S :', fm_index.data['S'])
    # print('C :', fm_index.data['C'])
    # print('O :', fm_index.data['O'])

    # print(fm_index.text)

    # print('B   :', fmd_index.data['B'])
Пример #15
0
def first_three():
    P = data.load_file()
    params = {
        "epochs": 4000,
        "neurons": 1024,
        "learn_method": 'classic'
    }

    # ====== train ========================================
    p0 = P[0].ravel()
    p1 = P[1].ravel()
    p2 = P[2].ravel()
    train_set = np.vstack((p0, p1))
    train_set = np.vstack((train_set, p2))

    Hop = HopfieldNet(train_set)
    Hop.batch_train()

    # get energy per pattern
    energy_p0 = Hop.energy(p0, threshold=0)
    energy_p1 = Hop.energy(p1, threshold=0)
    energy_p2 = Hop.energy(p2, threshold=0)
    print('The enegy for p0 is: {}'.format(energy_p0))
    print('The enegy for p1 is: {}'.format(energy_p1))
    print('The enegy for p2 is: {}'.format(energy_p2))
    print('\n')

    # ====== test =========================================
    p10 = P[9].ravel()
    p11 = P[10].ravel()

    # get energy per pattern
    energy_p10 = Hop.energy(p10, threshold=0)
    energy_p11 = Hop.energy(p11, threshold=0)
    print('The enegy for distorted p10 is: {}'.format(energy_p10))
    print('The enegy for distorted p11 is: {}'.format(energy_p11))
    print('\n')

    recall_set = np.vstack((p10, p11))
    recalled_set, energy = Hop.sequential_recall_shuffle(recall_set, epochs=4)

    rs0_real = P[0]
    rs0_org = P[9]
    rs0 = recalled_set[0, :].reshape(32, 32)
    rs1_real = P[2]
    rs1_org = P[10]
    rs1 = recalled_set[1, :].reshape(32, 32)

    fig, ax = plt.subplots(1, 3)
    ax[0].imshow(rs0_real, origin="lower")
    ax[1].imshow(rs0_org, origin="lower")
    ax[2].imshow(rs0, origin="lower")
    plt.show()

    plt.plot(range(len(energy[0])), energy[0])
    plt.xlabel('Epoch', fontsize=16)
    plt.ylabel('Energy', fontsize=16)
    plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
    #plt.savefig('Energy1.png')
    plt.show()

    fig, ax = plt.subplots(1, 3)
    ax[0].imshow(rs1_real, origin="lower")
    ax[1].imshow(rs1_org, origin="lower")
    ax[2].imshow(rs1, origin="lower")
    plt.show()

    plt.plot(range(len(energy[1])), energy[1])
    plt.xlabel('Epoch', fontsize=16)
    plt.ylabel('Energy', fontsize=16)
    plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0))
    #plt.savefig('Energy2.png')
    plt.show()
    print("hola")
Пример #16
0
import torch
import torch.nn as nn

from data import load_file
from model import SentimentModel


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
TEXT, LABEL, train, valid, test, train_iter, valid_iter, test_iter = load_file(filepath='data/',
                                                                               device=device)

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1

model = SentimentModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

optimizer = torch.optim.SGD(model.parameters(), lr=3e-3)

criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)

def binary_accuracy(preds, y):
    
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()
    acc = correct.sum() / len(correct)
    
Пример #17
0
import numpy as np
from glob import glob
from transformers import CamembertTokenizer
import torch
from torch import nn
#%matplotlib inline
import json
from tqdm import tqdm
from sklearn import metrics

from model import BertPunc, BertPunc_ner
from data import load_file, load_file2, encode_data3, create_data_loader, create_data_loader_without_attentions

segment_word = 12

data_test = load_file("dev.ester.clean")
#data_test = load_file2("test_ilyes_segment_long1.txt", segment_word)
#data_test = load_file("/media/nas/samir-data/punctuation/all_datasets/data_dir_punctuator_v3/subset_cleaned_leMonde_with_punct_v2_for_punctuator.test.txt")

tokenizer = CamembertTokenizer.from_pretrained('camembert-base')

punctuation_enc = {'PAD': 0, 'TOKEN': 1, ',': 2, '.': 3, '▁?': 4}

#punctuation_enc = {
#	'PAD': 0,
#        'TOKEN': 1,
#        ',': 2,
#        '.': 3,
#        '▁?': 4,
#        '▁:': 5,
#        '▁!': 6,
Пример #18
0
#filter_lengths = [1,2,3,4,5,6,7]
filter_lengths = [4, 5, 6]
print('Filter lengths:', filter_lengths)
hidden_dims = 250
print('Hidden dems:', hidden_dims)
nb_epoch = 20
embedding_droupout = 0.2
print('Embedding dropout:', embedding_droupout)
fc_dropout = 0.5
print('Fully-connected dropout:', fc_dropout)

# cross validation
n_folds = 10

print('Loading data...')
X_train, y_train, num_classes = load_file(full_train_file, alphabet)
print(len(X_train), 'train sequences')

print('Pad sequences (samples x time)')
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
print('X_train shape:', X_train.shape)
y_train = np.array(y_train)

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, num_classes)


def make_model(maxlen, alphabet_size, embedding_dims, embedding_droupout,
               nb_filters, filter_lengths, hidden_dims, fc_dropout,
               num_classes):
    print('Build model...')
Пример #19
0
    os.makedirs(save_path)
    with open(save_path+'hyperparameters.json', 'w') as f:
        json.dump(hyperparameters, f)

    print('LOADING DATA...')
    # LREC数据集
    # data_train = load_file('data/LREC/train2012')
    # data_valid = load_file('data/LREC/dev2012')
    # data_test = load_file('data/LREC/test2011')
    # asr数据集
    # data_test_asr = load_file('data/LREC/test2011asr')
    # data_train = load_file('data/NPR-podcasts/train')
    # data_valid = load_file('data/NPR-podcasts/valid')
    # data_test = load_file('data/NPR-podcasts/test')
    # 中文数据集**************************
    data_train = load_file('data/zh_pfdsj/train_proc')
    data_valid = load_file('data/zh_pfdsj/dev_proc')
    data_test = load_file('data/zh_pfdsj/test_proc')
    # vocab.txt所在的位置
    # tokenizer = BertTokenizer.from_pretrained('./models/', do_lower_case=True)
    # tokenizer = AutoTokenizer.from_pretrained('./models/albert_en/', do_lower_case=True)
    # 中文数据集tokenizer
    # tokenizer = BertTokenizer.from_pretrained('./models/albert_chinese_small/', do_lower_case=True)
    # distillbert
    # tokenizer = AutoTokenizer.from_pretrained('./models/bert_distill_chinese', do_lower_case=True)
    # ALbert-small-rnn tokenizer
    # tokenizer = BertTokenizer.from_pretrained('./models/albert_chinese_small/', do_lower_case=True)
    # # ALbert-small-dense-hidden tokenizer
    # tokenizer = BertTokenizer.from_pretrained('./models/albert_chinese_small/', do_lower_case=True)
    # NOTE ALbert-small-dense-Rnn tokenizer
    tokenizer = BertTokenizer.from_pretrained('./models/albert_chinese_small/', do_lower_case=True)