Exemplo n.º 1
0
def evaluate():
    vocab_map, _ = dataset.read_map('corpus/mapping')
    sess = tf.Session()
    Model = create_model(sess, 'test')
    Model.batch_size = 1

    df = pd.read_csv('corpus/SAD.csv', header=None)
    df = df.dropna()
    #df=df.head()
    idx = list(df.index)
    random.seed(SEED)
    random.shuffle(idx)
    df = df.ix[idx]
    cut_by = int(0.9 * df.shape[0])
    train_df = df.iloc[:cut_by]
    val_df = df.iloc[cut_by:]
    for df in [train_df, val_df]:
        sentences = df[3]
        answers = df[1]
        scores = []
        for i, sentence in enumerate(sentences):
            if i % 1000 == 0:
                print(i)
            token_ids = dataset.convert_to_token(sentence, vocab_map)
            encoder_input, encoder_length, _ = Model.get_batch(
                [(0, token_ids)], shuffle=False)
            score = Model.step(sess, encoder_input, encoder_length)
            #print(i,score)
            scores.append(score)
        scores = [s[0][0] for s in scores]
        auc = roc_auc_score(answers, scores)
        yield auc
def evaluate(cut_mode):
    if cut_mode == "word":
        import jieba_fast as jieba
        jieba.load_userdict("dict_fasttext.txt")
    vocab_map, _ = dataset.read_map('corpus/mapping')
    sess = tf.Session()
    Model = create_model(sess, 'test')
    Model.batch_size = 1

    sys.stdout.write('>')
    sys.stdout.flush()
    sentence = sys.stdin.readline()
    sentence = sentence_cutter(sentence, cut_mode)

    while (sentence):
        print('sentence: ', sentence)
        token_ids = dataset.convert_to_token(sentence, vocab_map)
        print('toekn_ids: ', token_ids)
        encoder_input, encoder_length, _ = Model.get_batch([(0, token_ids)])
        print('encoder_input: ', encoder_input, encoder_input.shape)
        print('encoder_length: ', encoder_length)
        score = Model.step(sess, encoder_input, encoder_length)
        print('Score: ', score[0][0])
        print('>', end='')
        sys.stdout.flush()
        sentence = sys.stdin.readline()
        sentence = sentence_cutter(sentence, cut_mode)
def evaluate():
  vocab_map, _ = dataset.read_map('corpus/mapping')
  sess = tf.Session()
  Model = create_model(sess, 'test')
  Model.batch_size = 1
  
  sys.stdout.write('>')
  sys.stdout.flush()
  sentence = sys.stdin.readline()

  while(sentence):
    token_ids = dataset.convert_to_token(sentence, vocab_map)
    encoder_input, encoder_length, _ = Model.get_batch([(0, token_ids)]) 
    score = Model.step(sess, encoder_input, encoder_length)
    print('Score: ' + str(score[0][0]))
    print('>', end = '')
    sys.stdout.flush()
    sentence = sys.stdin.readline()