Пример #1
0
def read_yeast():
  # Read the yeast data
  dataOff = data_reader('./data/datayeastoff.txt')
  dataOff = np.delete(dataOff, 0, 0) # delete the first row (misread headers)
  dataOn = data_reader('./data/datayeaston.txt')
  dataOn = np.delete(dataOn, 0, 0) # delte the first row (misread headers)

  # now we return a list of data
  #merged_data = np.vstack((dataOn, dataOff)) # merge the on + off datasets

  # return a list of data
  data_list = []
  data_list.append(dataOn) # append the first time series segment
  data_list.append(dataOff) # append the second time series segment

  # Set the true incidence matrix defined by the literature
  # true_inc = [
  #   [0, 1, 0, 0, 0],
  #   [0, 0, 1, 1, 0],
  #   [1, 0, 0, 1 ,1],
  #   [0, 1, 0, 0, 0],
  #   [1, 0, 0, 0, 0]
  # ]
  true_inc = [
    [0, 0, 1, 0, 1],
    [1, 0, 0, 1, 0],
    [0, 1, 0, 0 ,0],
    [0, 1, 1, 0, 0],
    [0, 0, 1, 0, 0]
  ]
  return(data_list, true_inc)
Пример #2
0
 def _init(self):
     """Init common dic, access vocab by dictionary attr or load method."""
     common_dic = data_reader(self._common_dic_path)
     vocab = common_dic.strip().split('\n')
     for word in vocab:
         if word:
             self.dictionary.add(word)
     logging.info("Initialized `{}` common words from file `{}`".format(len(vocab), self._common_dic_path))
def run(conf, only_testmode):
    if -1 in conf.firstN:
        reader = data_reader(data_dir=conf.data_dir, filename='train', batch_size=conf.batch)
    else:
        reader = data_reader_firstN(data_dir=conf.data_dir, filename='train',
                                    batch_size=conf.batch, from_to=conf.firstN)

    conf.class_divpnt = reader.class_divpnt
    conf.n_tracks = reader.num_tracks
Пример #4
0
 def add_from_file(self, vocab_file):
     """Add words to common dic by vocab file.
     Args:
         vocab_file: user vocab file path.
     """
     user_dic = data_reader(vocab_file)
     vocab = user_dic.strip().split('\n')
     with codecs.open(self._common_dic_path, 'a+') as fo:
         for word in vocab:
             if word not in self.dictionary:
                 self.dictionary.add(word)
                 fo.write('\n'+word)
                 logging.info("Add word `{}` to file `{}`.".format(word, self._common_dic_path))
Пример #5
0
def main():
    print('Starting...')
    data = data_reader(args.target)
    # solution = LocalSearch(op_idx=2)
    # solution = VariableLocalSearch(op_idx1=0, op_idx2=2, keep_invariant=1000, keep_invariant_max=2000)
    # solution = SA(op_idx=0, init_coeff=0.9, init_inner_time=200, stop_temp=1e-2, alpha=0.98)
    solution = GA(population_size=200, cross_rate=[0.3, 0.5], mutation_rate=[0.1, 0.5], keep_invariant=50)
    tsp = TSP(solution, data, euclidean_dist)
    tsp.run(threshhold=args.thresh, savepath=args.savepath, save_freq=args.save_freq,
            print_freq=args.print_freq, max_iteration=args.max_itr)
    if args.savepath is not None:
        generate_gif(args.savepath)
        plot(args.savepath)
Пример #6
0
 def remove_from_file(self, vocab_file):
     """Remove words from common dic by iterable object.
     Args:
         vocab_file: user vocab file path.
     """
     user_dic = data_reader(vocab_file)
     vocab = user_dic.strip().split('\n')
     for word in vocab:
         if word in self.dictionary:
             self.dictionary.remove(word)
             logging.info("Remove word `{}` to file `{}`.".format(word, self._common_dic_path))
     with codecs.open(self._common_dic_path, 'w') as fo:
         for word in self.dictionary:
             fo.write(word + '\n')
Пример #7
0
 def __init__(self,
              corpus_file,
              common_words_file=None,
              min_candidate_len=2,
              max_candidate_len=5,
              least_cnt_threshold=5,
              solid_rate_threshold=0.018,
              entropy_threshold=1.92,
              all_words=False):
     if not corpus_file:
         raise ValueError("Corpus file is empty, please specify corpus file path.")
     self._document = data_reader(corpus_file, cn_only=True)
     self._common_dic = common_words_file
     self._min_candidate_len = min_candidate_len
     self._max_candidate_len = max_candidate_len
     self._least_cnt_threshold = least_cnt_threshold
     self._solid_rate_threshold = solid_rate_threshold
     self._entropy_threshold = entropy_threshold
     self._all_words = all_words
     if not self._all_words:
         self.dictionary = Dictionary(self._common_dic)
     else:
         logging.warning("Extract all words mode, if you only want new words, set new_words=False to new words mode.")
Пример #8
0
 def get_dev_examples(self, data_dir):
     """
     Load dev examples
     """
     return data_reader(os.path.join(self.data_dir, "dev.tsv"),
                 self.vocab, self.num_examples, "dev")
Пример #9
0
 def get_train_examples(self, data_dir, epoch=1):
     """
     Load training examples
     """
     return data_reader(os.path.join(self.data_dir, "train.tsv"),
                 self.vocab, self.num_examples, "train", epoch)
Пример #10
0
def run(conf, only_testmode):
    if -1 in conf.firstN:
        reader = data_reader(data_dir=conf.data_dir, filename='train', batch_size=conf.batch)
    else:
        reader = data_reader_firstN(data_dir=conf.data_dir, filename='train',
                                    batch_size=conf.batch, from_to=conf.firstN)

    conf.n_tracks = reader.num_tracks
    conf.n_input = reader.num_items
    conf.n_output = reader.num_items
    conf.charsize = reader.num_char
    conf.strmaxlen = reader.max_title_len

    kp_range = conf.input_kp
    test_seed = conf.test_seed
    update_seed = conf.update_seed

    readers_test = {}
    for seed in test_seed:
        readers_test[seed] = data_reader_test(data_dir=conf.data_dir, filename=seed,
                                              batch_size=conf.batch, test_num=conf.testsize)

    info = None
    model = None
    print(conf.n_input)

    model_title = None
    if conf.mode == 'pretrain':
        info = '[pretrain mode]'
        model = DAE_tied(conf)
    elif conf.mode == 'dae':
        if only_testmode:
            conf.initval = conf.save
        info = '[dae mode]'
        model = DAE(conf)
    elif conf.mode == 'title':
        info = '[title mode]'
        model_title = get_model(conf)
        model = DAE_title(conf, model_title.output)

    info += ' start at ' + str(datetime.datetime.now())
    log_write(conf, '*'*10)
    log_write(conf, info)

    model.fit()
    sess = tf.Session()
    sess.run(model.init_op)
    saver = tf.train.Saver()
    
    epoch = 0
    max_eval = 0.0
    iter = 0
    loss = 0.0

    # if test mode is specified, just test the result and no training session.
    if only_testmode:
        log_write(conf, '<<only test mode>>')
        if conf.mode == 'title':
            saver.restore(sess, conf.save)

        for seed_num, reader_test in readers_test.items():
            log_write(conf, "seed num: " + seed_num)
            rprec, ndcg, rsc = eval(reader_test, conf, sess, model, model_title)
            r = show_result(rprec, ndcg, rsc)
            log_write(conf, r)
        return

    while True:
        start_idx = reader.train_idx
        trk_positions, art_positions, y_positions, titles, trk_val, art_val = reader.next_batch()
        end_idx = reader.train_idx

        input_kp = random.uniform(kp_range[0], kp_range[-1])

        if conf.mode in ['pretrain', 'dae']:
            rand_int = np.random.randint(2)
            if rand_int == 0:
                _, l = sess.run([model.optimizer, model.cost],
                                feed_dict={model.x_positions: trk_positions, model.x_ones: trk_val,
                                           model.y_positions: y_positions, model.y_ones: np.ones(len(y_positions)),
                                           model.keep_prob: conf.kp, model.input_keep_prob: input_kp})

            elif rand_int == 1:
                _, l = sess.run([model.optimizer, model.cost],
                                feed_dict={model.x_positions: art_positions, model.x_ones: art_val,
                                           model.y_positions: y_positions, model.y_ones: np.ones(len(y_positions)),
                                           model.keep_prob: conf.kp, model.input_keep_prob: input_kp})
        elif conf.mode == 'title':
            _, l = sess.run([model.optimizer, model.cost],
                            feed_dict={model.x_positions: y_positions, model.x_ones: np.ones(len(y_positions)),
                                       model.y_positions: y_positions, model.y_ones: np.ones(len(y_positions)),
                                       model_title.titles: titles,
                                       model.keep_prob: conf.kp, model_title.keep_prob: conf.title_kp,
                                       model.input_keep_prob: input_kp,
                                       model.titles_use: [[1]] * conf.batch})

        loss += l
        iter += 1

        if start_idx > end_idx or end_idx == 0:
            epoch += 1
            loss = loss / iter
            if epoch >= 0:
                log_write(conf, "epoch "+str(epoch))
                log_write(conf, "training loss: "+str(loss))
                cur_eval = 0
                for seed_num, reader_test in readers_test.items():
                    log_write(conf, "seed num: "+seed_num)
                    rprec, ndcg, rsc = eval(reader_test, conf, sess, model, model_title)
                    r = show_result(rprec, ndcg, rsc)
                    log_write(conf, r)
                    if seed_num in update_seed:
                        cur_eval += rprec

                if cur_eval >= max_eval:
                    if conf.mode in ['pretrain', 'dae']:
                        model.save_model(sess)
                    elif conf.mode == 'title':
                        saver.save(sess, conf.save)
                    max_eval = cur_eval
                    log_write(conf, "The highest score is updated. Parameters are saved")
            loss = 0
            iter = 0
            if epoch == conf.epochs:
                break
Пример #11
0
 def get_test_examples(self, data_dir, epoch):
     """
     Load test examples
     """
     return data_reader((self.data_dir + "/test.tsv"), self.vocab,
                        self.num_examples, "infer", epoch)
Пример #12
0
 def get_dev_examples(self, data_dir, epoch):
     """
     Load dev examples
     """
     return data_reader((self.data_dir + "/dev.tsv"), self.vocab,
                        self.num_examples, "dev", epoch)
Пример #13
0
 def get_dev_examples(self, data_dir, epoch, shuffle):
     return data_reader((self.data_dir + "/dev.tsv"), self.vocab,
                        self.num_examples, "dev", epoch, shuffle)
Пример #14
0
 def get_test_examples(self, data_dir):
     """
     Load test examples
     """
     return data_reader(os.path.join(self.data_dir, "test.tsv"),
                 self.vocab, self.num_examples, "test")
Пример #15
0
 def get_train_examples(self, data_dir, epoch):
     """
     Load training examples
     """
     return data_reader((self.data_dir + "/train.tsv"), self.vocab,
                        self.num_examples, "train", epoch)
Пример #16
0
 def get_infer_examples(self, data_dir):
     """
     Load infer querys
     """
     return data_reader(os.path.join(self.data_dir, "infer.tsv"),
                 self.vocab, self.num_examples, "infer")
Пример #17
0
 def get_train_examples(self, data_dir, epoch, shuffle):
     return data_reader((self.data_dir + "/train.tsv"), self.vocab,
                        self.num_examples, "train", epoch, shuffle)