Пример #1
0
            X2.append(iclasses[label])
            labels.append(0)
    categoricals = {"匹配":1, "不匹配":0}
    return X1, X2, labels, categoricals

X1, X2, y, classes = convert_to_pairs(X, y, classes)
X1_train = X1[:-1000]
X2_train = X2[:-1000]
y_train = y[:-1000]

X1_test = X1[-1000:]
X2_test = X2[-1000:]
y_test = y[-1000:]

num_classes = len(classes)
tokenizer = SimpleTokenizer()
tokenizer.fit(X1 + X2)
X1_train = tokenizer.transform(X1_train)
X2_train = tokenizer.transform(X2_train)

maxlen = 48
hdims = 128
epochs = 2

X1_train = sequence.pad_sequences(
    X1_train, 
    maxlen=maxlen,
    dtype="int32",
    padding="post",
    truncating="post",
    value=0
Пример #2
0
from dataset import load_THUCNews_title_label
from dataset import load_weibo_senti_100k
from dataset import load_simplifyweibo_4_moods
from dataset import load_simplifyweibo_3_moods
from dataset import load_hotel_comment

# 来自Transformer的激活函数,效果略有提升
def gelu(x):
    return 0.5 * x * (1.0 + tf.math.erf(x / tf.sqrt(2.0)))

X, y, classes = load_hotel_comment()
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=73672)
class_weight = balance_class_weight(y_train)

num_classes = len(classes)
tokenizer = SimpleTokenizer(min_freq=32)
tokenizer.fit(X_train)
X_train = tokenizer.transform(X_train)

maxlen = 48
maxlen = find_best_maxlen(X_train)

X_train = sequence.pad_sequences(
    X_train, 
    maxlen=maxlen,
    dtype="int32",
    padding="post",
    truncating="post",
    value=0
)
y_train = tf.keras.utils.to_categorical(y_train, num_classes)