def write(self, sess, global_step):
   self.checkpoint_saver.save(sess, self.config.checkpoint,
                              global_step=global_step)
   utils.write_cpickle(
       (self.history, self.unlabeled_data_reader.current_file,
        self.unlabeled_data_reader.current_line),
       self.config.progress)
示例#2
0
 def write(self, sess, global_step):
   self.checkpoint_saver.save(sess, self.config.checkpoint,
                              global_step=global_step)
   utils.write_cpickle(
       (self.history, self.unlabeled_data_reader.current_file,
        self.unlabeled_data_reader.current_line),
       self.config.progress)
示例#3
0
 def evaluate_all_tasks(self, sess, summary_writer, history, train_set=False):
   for task in self.tasks:
     results = self._evaluate_task(sess, task, summary_writer, train_set)
     if history is not None:
       results.append(('step', self._model.get_global_step(sess)))
       history.append(results)
   if history is not None:
     utils.write_cpickle(history, self._config.history_file)
示例#4
0
def main(data_dir='/content/data'):
  random.seed(0)

  utils.log("BUILDING WORD VOCABULARY/EMBEDDINGS")
  for pretrained in ['glove.6B.50d.txt']:
    config = configure.Config(data_dir=data_dir,
                              for_preprocessing=True,
                              pretrained_embeddings=pretrained,
                              word_embedding_size=50)
    embeddings.PretrainedEmbeddingLoader(config).build()

  utils.log("CONSTRUCTING DEV SETS")
  for task_name in ["chunk"]:
    # chunking does not come with a provided dev split, so create one by
    # selecting a random subset of the data
    config = configure.Config(data_dir=data_dir,
                              for_preprocessing=True)
    task_data_dir = os.path.join(config.raw_data_topdir, task_name) + '/'
    train_sentences = word_level_data.TaggedDataLoader(
        config, task_name, False).get_labeled_sentences("train")
    random.shuffle(train_sentences)
    write_sentences(task_data_dir + 'train_subset.txt', train_sentences[1500:])
    write_sentences(task_data_dir + 'dev.txt', train_sentences[:1500])

  utils.log("WRITING LABEL MAPPINGS")
  for task_name in ["chunk"]:
    for i, label_encoding in enumerate(["BIOES"]):
      config = configure.Config(data_dir=data_dir,
                                for_preprocessing=True,
                                label_encoding=label_encoding)
      token_level = task_name in ["ccg", "pos", "depparse"]
      loader = word_level_data.TaggedDataLoader(config, task_name, token_level)
      if token_level:
        if i != 0:
          continue
        utils.log("WRITING LABEL MAPPING FOR", task_name.upper())
      else:
        utils.log("  Writing label mapping for", task_name.upper(),
                  label_encoding)
      utils.log(" ", len(loader.label_mapping), "classes")
      utils.write_cpickle(loader.label_mapping,
                          loader.label_mapping_path)
示例#5
0
def main(data_dir='./data'):
  random.seed(0)

  utils.log("BUILDING WORD VOCABULARY/EMBEDDINGS")
  for pretrained in ['glove.6B.300d.txt']:
    config = configure.Config(data_dir=data_dir,
                              for_preprocessing=True,
                              pretrained_embeddings=pretrained,
                              word_embedding_size=300)
    embeddings.PretrainedEmbeddingLoader(config).build()

  utils.log("CONSTRUCTING DEV SETS")
  for task_name in ["chunk"]:
    # chunking does not come with a provided dev split, so create one by
    # selecting a random subset of the data
    config = configure.Config(data_dir=data_dir,
                              for_preprocessing=True)
    task_data_dir = os.path.join(config.raw_data_topdir, task_name) + '/'
    train_sentences = word_level_data.TaggedDataLoader(
        config, task_name, False).get_labeled_sentences("train")
    random.shuffle(train_sentences)
    write_sentences(task_data_dir + 'train_subset.txt', train_sentences[1500:])
    write_sentences(task_data_dir + 'dev.txt', train_sentences[:1500])

  utils.log("WRITING LABEL MAPPINGS")
  for task_name in ["chunk"]:
    for i, label_encoding in enumerate(["BIOES"]):
      config = configure.Config(data_dir=data_dir,
                                for_preprocessing=True,
                                label_encoding=label_encoding)
      token_level = task_name in ["ccg", "pos", "depparse"]
      loader = word_level_data.TaggedDataLoader(config, task_name, token_level)
      if token_level:
        if i != 0:
          continue
        utils.log("WRITING LABEL MAPPING FOR", task_name.upper())
      else:
        utils.log("  Writing label mapping for", task_name.upper(),
                  label_encoding)
      utils.log(" ", len(loader.label_mapping), "classes")
      utils.write_cpickle(loader.label_mapping,
                          loader.label_mapping_path)
示例#6
0
def main(data_dir='/content/data'):
  random.seed(0)

  utils.log("BUILDING WORD VOCABULARY/EMBEDDINGS")
  for pretrained in ['glove.6B.100d.txt']:
    config = configure.Config(data_dir=data_dir,
                              for_preprocessing=True,
                              pretrained_embeddings=pretrained,
                              word_embedding_size=100)
    embeddings.PretrainedEmbeddingLoader(config).build()

  utils.log("WRITING LABEL MAPPINGS")
  for task_name in ["senclass"]:
    config = configure.Config(data_dir=data_dir,
                              for_preprocessing=True)
    loader = sentence_level_data.SentenceClassificationDataLoader(config, task_name)
    utils.log("WRITING LABEL MAPPING FOR", task_name.upper())
    utils.log(" ", len(loader.label_mapping), "classes")
    utils.write_cpickle(loader.label_mapping,
                        loader.label_mapping_path)
示例#7
0
 def _write(self):
   utils.write_cpickle(np.vstack(self.vectors), self.config.word_embeddings)
   utils.write_cpickle(self.vocabulary, self.config.word_vocabulary)
 def _write(self):
     utils.write_cpickle(np.vstack(self.vectors),
                         self.config.word_embeddings)
     utils.write_cpickle(self.vocabulary, self.config.word_vocabulary)