def load_text_dataset(path, train_size, valid_size, test_size): file_name = os.path.join('..', 'datasets', path) old_dir = os.getcwd() abspath = os.path.abspath(__file__) dname = os.path.dirname(abspath) os.chdir(dname) with open(file_name, 'r') as f: text = f.read() os.chdir(old_dir) vocabulary = create_vocabulary(text) if test_size is not None: test_text = text[:test_size] text = text[test_size:] else: test_text = None if valid_size is not None: valid_text = text[:valid_size] text = text[valid_size:] else: valid_text = None if train_size is not None: train_text = text[:train_size] else: train_text = text return vocabulary, train_text, valid_text, test_text
def get_vocab_by_given_path(file_name, text, create=False): if os.path.isfile(file_name) and not create: vocabulary = load_vocabulary_with_unk(file_name) else: vocabulary = create_vocabulary(text, with_unk=True) if not os.path.exists(os.path.dirname(file_name)) and len( os.path.dirname(file_name)) > 0: os.makedirs(os.path.dirname(file_name)) with open(file_name, 'w') as f: f.write(''.join(vocabulary)) vocabulary_size = len(vocabulary) return vocabulary, vocabulary_size
os.chdir(dname) # # with open(conf_file, 'r') as f: # lines = f.read().split('\n') dataset_path = os.path.join(*(['..'] * ROOT_HEIGHT + ['datasets', 'text8.txt'])) with open(dataset_path, 'r') as f: text = f.read() valid_size = 500 valid_text = text[:valid_size] train_text = text[valid_size:] vocabulary = create_vocabulary(text) vocabulary_size = len(vocabulary) print(vocabulary_size) tf.set_random_seed(1) env = Environment(pupil_class=Lstm, meta_optimizer_class=ResNet4Lstm, batch_generator_classes=BatchGenerator, vocabulary=vocabulary) add_metrics = ['bpc', 'perplexity', 'accuracy'] NUM_EXERCISES = 10 NUM_UNROLLINGS = 4 OPT_INF_NAME = 'COLD' OPT_INF_RESTORE_PUPIL_PATHS = [(OPT_INF_NAME, None)] env.build_pupil(batch_size=32,
def create_vocabulary(texts): text = '' for t in texts: text += t return create_vocabulary(text)
import tensorflow as tf from learning_to_learn.environment import Environment from learning_to_learn.pupils.lstm_for_meta import Lstm, LstmFastBatchGenerator as BatchGenerator from learning_to_learn.useful_functions import create_vocabulary, get_positions_in_vocabulary with open('datasets/text8.txt', 'r') as f: text = f.read() valid_size = 500 valid_text = text[:valid_size] train_text = text[valid_size:] vocabulary = create_vocabulary(train_text + valid_text) vocabulary_size = len(vocabulary) env = Environment(Lstm, BatchGenerator, vocabulary=vocabulary) cpiv = get_positions_in_vocabulary(vocabulary) add_feed = [ { 'placeholder': 'dropout', 'value': 0.9 } #, # {'placeholder': 'sampling_prob', # 'value': {'type': 'linear', 'start': 0., 'end': 1., 'interval': 3000}}, # {'placeholder': 'loss_comp_prob', # 'value': {'type': 'linear', 'start': 1., 'end': 0., 'interval': 3000}} ] valid_add_feed = [ # {'placeholder': 'sampling_prob', 'value': 1.},