Пример #1
0
 def __init__(self, text, batch_size, num_unrollings=1, vocabulary=None, random_batch_initiation=False):
     self._text = text
     self._text_size = len(text)
     self._batch_size = batch_size
     self.vocabulary = vocabulary
     self._vocabulary_size = len(self.vocabulary)
     self.character_positions_in_vocabulary = get_positions_in_vocabulary(self.vocabulary)
     self._num_unrollings = num_unrollings
     if random_batch_initiation:
         self._cursor = random.sample(range(self._text_size), batch_size)
     else:
         segment = self._text_size // batch_size
         self._cursor = [offset * segment for offset in range(batch_size)]
     self._last_batch = self._start_batch()
# valid_text = text[:valid_size]
# train_text = text[valid_size:]

voc_name = 'razvedopros_voc.txt'
if os.path.isfile(voc_name):
    with open(voc_name, 'r') as f:
        vocabulary = list(f.read())
else:
    vocabulary = create_vocabulary(text)
    with open(voc_name, 'w') as f:
        f.write(''.join(vocabulary))
vocabulary_size = len(vocabulary)

env = Environment(Lstm, BatchGenerator, vocabulary=vocabulary)

cpiv = get_positions_in_vocabulary(vocabulary)

add_feed = [{'placeholder': 'dropout', 'value': 0.9} #,
            # {'placeholder': 'sampling_prob',
            #  'value': {'type': 'linear', 'start': 0., 'end': 1., 'interval': 3000}},
            # {'placeholder': 'loss_comp_prob',
            #  'value': {'type': 'linear', 'start': 1., 'end': 0., 'interval': 3000}}
            ]
valid_add_feed = [# {'placeholder': 'sampling_prob', 'value': 1.},
                  {'placeholder': 'dropout', 'value': 1.}]

add_metrics = ['bpc', 'perplexity', 'accuracy']

# tf.set_random_seed(1)
NUM_UNROLLINGS = 30
BATCH_SIZE = 32