-
Notifications
You must be signed in to change notification settings - Fork 0
/
CBOW.py
116 lines (94 loc) · 3.75 KB
/
CBOW.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from blocks.bricks import Linear, Softmax
from blocks.bricks.lookup import LookupTable
from blocks.initialization import IsotropicGaussian, Constant
from blocks.bricks.cost import CategoricalCrossEntropy
from blocks.graph import ComputationGraph
from blocks.algorithms import GradientDescent, Scale
from blocks.extensions.monitoring import TrainingDataMonitoring
from blocks.main_loop import MainLoop
from theano import tensor
from blocks_extras.extensions.plot import Plot
from blocks.filter import VariableFilter
from blocks.roles import WEIGHT
from blocks.extensions import FinishAfter, Printing, ProgressBar
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme
from BrownDataset import BrownDataset
from SaveWeightsExtension import SaveWeights
import pickle
import sys
import os
import logging
logger = logging.getLogger(__name__)
def main():
if len(sys.argv) > 1:
print(sys.argv)
epochs = int(sys.argv[1])
HIDDEN_DIMS = int(sys.argv[2])
path = sys.argv[3]
name = "./" + sys.argv[4] + "/"
else:
epochs = 10
HIDDEN_DIMS = 200
name = "./results/"
path = "data/"
if not os.path.exists(name):
os.makedirs(name)
run(epochs, path, HIDDEN_DIMS, name)
def run(epochs=1, corpus="data/", HIDDEN_DIMS=100, path="./"):
brown = BrownDataset(corpus)
INPUT_DIMS = brown.get_vocabulary_size()
OUTPUT_DIMS = brown.get_vocabulary_size()
# These are theano variables
x = tensor.lmatrix('context')
y = tensor.ivector('output')
# Construct the graph
input_to_hidden = LookupTable(name='input_to_hidden', length=INPUT_DIMS,
dim=HIDDEN_DIMS)
# Compute the weight matrix for every word in the context and then compute
# the average.
h = tensor.mean(input_to_hidden.apply(x), axis=1)
hidden_to_output = Linear(name='hidden_to_output', input_dim=HIDDEN_DIMS,
output_dim=OUTPUT_DIMS)
y_hat = Softmax().apply(hidden_to_output.apply(h))
# And initialize with random varibales and set the bias vector to 0
weights = IsotropicGaussian(0.01)
input_to_hidden.weights_init = hidden_to_output.weights_init = weights
input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0)
input_to_hidden.initialize()
hidden_to_output.initialize()
# And now the cost function
cost = CategoricalCrossEntropy().apply(y, y_hat)
cg = ComputationGraph(cost)
W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
cost = cost + 0.01 * (W1 ** 2).sum() + 0.01 * (W2 ** 2).sum()
cost.name = 'cost_with_regularization'
mini_batch = SequentialScheme(brown.num_instances(), 512)
data_stream = DataStream.default_stream(brown, iteration_scheme=mini_batch)
# Now we tie up lose ends and construct the algorithm for the training
# and define what happens in the main loop.
algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
step_rule=Scale(learning_rate=0.1))
extensions = [
ProgressBar(),
FinishAfter(after_n_epochs=epochs),
Printing(),
# TrainingDataMonitoring(variables=[cost]),
SaveWeights(layers=[input_to_hidden, hidden_to_output],
prefixes=['%sfirst' % path, '%ssecond' % path]),
# Plot(
# 'Word Embeddings',
# channels=[
# [
# 'cost_with_regularization'
# ]
# ])
]
logger.info("Starting main loop...")
main = MainLoop(data_stream=data_stream,
algorithm=algorithm,
extensions=extensions)
main.run()
pickle.dump(cg, open('%scg.pickle' % path, 'wb'))
if __name__ == '__main__':
main()