Пример #1
0
"""
This is experiment 5

bert as a service, concat, scikit mlp

"""

import numpy as np
import sklearn.neural_network as nn
from scipy.stats.stats import pearsonr

from utils.resourceManager import getEmbeddedResource

print("Getting data...")
data = getEmbeddedResource("exp5", "BertAsService", "zh", "train", subname="en-ch", MultiServerBert=("./bert/uncased_L-12_H-768_A-12","./bert/chinese_L-12_H-768_A-12"))
val_data = getEmbeddedResource("exp5", "BertAsService", "zh", "dev", subname="en-ch", MultiServerBert=("./bert/uncased_L-12_H-768_A-12","./bert/chinese_L-12_H-768_A-12"))
test_data = getEmbeddedResource("exp5", "BertAsService", "zh", "test", subname="en-ch", MultiServerBert=("./bert/uncased_L-12_H-768_A-12","./bert/chinese_L-12_H-768_A-12"))
print("Tokenized data")
data.extend(val_data)

es,cs,y =[],[],[]
for e, c, y_ in data:
  es.append(e)
  cs.append(c)
  y.append(y_)
x = np.concatenate((es, cs), axis=1)
y = np.asarray(y)

es,cs=[],[]
for e, c in test_data:
  es.append(e)
Пример #2
0
"""
This is experiment 5

bert as a service, concat, scikit mlp

"""

import numpy as np
import sklearn.neural_network as nn
from scipy.stats.stats import pearsonr

from utils.resourceManager import getEmbeddedResource

print("Getting data...")
data = getEmbeddedResource("exp5", "BertAsService", "de", "train")
val_data = getEmbeddedResource("exp5", "BertAsService", "de", "dev")
print("Tokenized data")

es, cs, y = [], [], []
for e, c, y_ in data:
    es.append(e)
    cs.append(c)
    y.append(y_)
x = np.concatenate((es, cs), axis=1)
y = np.asarray(y)

es, cs, val_y = [], [], []
for e, c, y_ in val_data:
    es.append(e)
    cs.append(c)
    val_y.append(y_)
Пример #3
0
from scipy.stats.stats import pearsonr

from utils.resourceManager import getEmbeddedResource


def getAverage(data):
    data_averaged = []
    for e, c, l in data:
        e_avg = np.mean(e, axis=0)
        c_avg = np.mean(c, axis=0)
        data_averaged.append((np.array(list(e_avg) + list(c_avg)), l))
    return data_averaged

print("Getting data...")

data = getEmbeddedResource("exp2", "FastText", "zh", "train")
val_data = getEmbeddedResource("exp2", "FastText", "zh", "dev")
print("Tokenized data")

train = getAverage(data)
model = nn.MLPRegressor(max_iter=4, hidden_layer_sizes=(5,), verbose=True)
X = [x[0] for x in train]
y = [x[1] for x in train]
model.fit(X, y)

test = getAverage(val_data)
X_test = [x[0] for x in test]
y_test = [x[1] for x in test]
my_y = model.predict(X_test)

print("PEARSON:", pearsonr(y_test, my_y))
Пример #4
0
This is experiment 5

bert as a service, concat, scikit mlp

"""

import numpy as np
import sklearn.neural_network as nn
from scipy.stats.stats import pearsonr

from utils.resourceManager import getEmbeddedResource

print("Getting data...")
data = getEmbeddedResource("exp5",
                           "BertAsService",
                           "de",
                           "train",
                           subname="uncased",
                           model_dir="./bert/uncased_L-12_H-768_A-12")
val_data = getEmbeddedResource("exp5",
                               "BertAsService",
                               "de",
                               "dev",
                               subname="uncased",
                               model_dir="./bert/uncased_L-12_H-768_A-12")
print("Tokenized data")

es, cs, y = [], [], []
for e, c, y_ in data:
    es.append(e)
    cs.append(c)
    y.append(y_)