""" This is experiment 5 bert as a service, concat, scikit mlp """ import numpy as np import sklearn.neural_network as nn from scipy.stats.stats import pearsonr from utils.resourceManager import getEmbeddedResource print("Getting data...") data = getEmbeddedResource("exp5", "BertAsService", "zh", "train", subname="en-ch", MultiServerBert=("./bert/uncased_L-12_H-768_A-12","./bert/chinese_L-12_H-768_A-12")) val_data = getEmbeddedResource("exp5", "BertAsService", "zh", "dev", subname="en-ch", MultiServerBert=("./bert/uncased_L-12_H-768_A-12","./bert/chinese_L-12_H-768_A-12")) test_data = getEmbeddedResource("exp5", "BertAsService", "zh", "test", subname="en-ch", MultiServerBert=("./bert/uncased_L-12_H-768_A-12","./bert/chinese_L-12_H-768_A-12")) print("Tokenized data") data.extend(val_data) es,cs,y =[],[],[] for e, c, y_ in data: es.append(e) cs.append(c) y.append(y_) x = np.concatenate((es, cs), axis=1) y = np.asarray(y) es,cs=[],[] for e, c in test_data: es.append(e)
""" This is experiment 5 bert as a service, concat, scikit mlp """ import numpy as np import sklearn.neural_network as nn from scipy.stats.stats import pearsonr from utils.resourceManager import getEmbeddedResource print("Getting data...") data = getEmbeddedResource("exp5", "BertAsService", "de", "train") val_data = getEmbeddedResource("exp5", "BertAsService", "de", "dev") print("Tokenized data") es, cs, y = [], [], [] for e, c, y_ in data: es.append(e) cs.append(c) y.append(y_) x = np.concatenate((es, cs), axis=1) y = np.asarray(y) es, cs, val_y = [], [], [] for e, c, y_ in val_data: es.append(e) cs.append(c) val_y.append(y_)
from scipy.stats.stats import pearsonr from utils.resourceManager import getEmbeddedResource def getAverage(data): data_averaged = [] for e, c, l in data: e_avg = np.mean(e, axis=0) c_avg = np.mean(c, axis=0) data_averaged.append((np.array(list(e_avg) + list(c_avg)), l)) return data_averaged print("Getting data...") data = getEmbeddedResource("exp2", "FastText", "zh", "train") val_data = getEmbeddedResource("exp2", "FastText", "zh", "dev") print("Tokenized data") train = getAverage(data) model = nn.MLPRegressor(max_iter=4, hidden_layer_sizes=(5,), verbose=True) X = [x[0] for x in train] y = [x[1] for x in train] model.fit(X, y) test = getAverage(val_data) X_test = [x[0] for x in test] y_test = [x[1] for x in test] my_y = model.predict(X_test) print("PEARSON:", pearsonr(y_test, my_y))
This is experiment 5 bert as a service, concat, scikit mlp """ import numpy as np import sklearn.neural_network as nn from scipy.stats.stats import pearsonr from utils.resourceManager import getEmbeddedResource print("Getting data...") data = getEmbeddedResource("exp5", "BertAsService", "de", "train", subname="uncased", model_dir="./bert/uncased_L-12_H-768_A-12") val_data = getEmbeddedResource("exp5", "BertAsService", "de", "dev", subname="uncased", model_dir="./bert/uncased_L-12_H-768_A-12") print("Tokenized data") es, cs, y = [], [], [] for e, c, y_ in data: es.append(e) cs.append(c) y.append(y_)