Python Dataset.load_questions примеры использования

Язык программирования: Python

Пространство имен/Пакет: preprocessing.dataset

Класс/Тип: Dataset

Метод/Функция: load_questions

Примеров на hotexamples.com: 3

Python Dataset.load_questions - 3 примера найдено. Это лучшие примеры Python кода для preprocessing.dataset.Dataset.load_questions, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Dataset(21)

branches(8)

filesAdded(8)

setOutputBranches(8)

addFiles(5)

selection(4)

process(4)

sampleSelection(3)

outputIndex(3)

load_questions(3)

addFlatSFtoDataframe(2)

ignoreBranches(2)

_resolveWildcardBranch(2)

rescale_labels(1)

outputBranchesSet(1)

outputBranches(1)

get_validation(1)

index_to_text(1)

cleanBranchList(1)

get_training(1)

get_counters(1)

getSelectedDataframe(1)

getBranchesFromFile(1)

generate_dataset(1)

files(1)

encode_single_question(1)

get_test(1)

Пример #1

Показать файл

import pickle
from preprocessing.preprocess import answer_span_to_indices

# custom imports
from preprocessing.dataset import Dataset
from network.config import CONFIG
from network.build_model import get_batch
from evaluation_metrics import get_f1_from_tokens, get_exact_match_from_tokens
# Suppress tensorflow verboseness
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

print("Starting testing on dev file...")
D = Dataset(CONFIG.EMBEDDING_FILE)
index2embedding = D.index2embedding
padded_data, (max_length_question,
              max_length_context) = D.load_questions('data/dev.json')
print("Loaded data")

# split padded data per the start index of the question
split_data_pre = dict()
for qas in padded_data:
    first_word = D.index2word[qas["question"][0]].lower()
    if first_word not in split_data_pre:
        split_data_pre[first_word] = []
    split_data_pre[first_word].append(qas)

# Extract data bigger than batch size
split_data = dict()
print("First word frequency:")
for key in split_data_pre.keys():
    if len(split_data_pre[key]) > CONFIG.BATCH_SIZE:

Пример #2

Показать файл

import sys
import numpy as np
import tensorflow as tf
import pickle
from functools import reduce
import os
# custom imports
from network.config import CONFIG
from network.build_model import build_model_v2, get_feed_dict, get_batch
from evaluation_metrics import get_f1_from_tokens, get_exact_match_from_tokens
from preprocessing.dataset import Dataset

D = Dataset(CONFIG.EMBEDDING_FILE)
index2embedding = D.index2embedding
padded_data, (max_length_question,
              max_length_context) = D.load_questions(CONFIG.QUESTION_FILE_V2)
print("Loaded data")

tf.reset_default_graph()
embedding = tf.placeholder(
    shape=[len(index2embedding), CONFIG.EMBEDDING_DIMENSION],
    dtype=tf.float32,
    name='embedding_ph')
train_op, loss, s, e = build_model_v2(embedding)

# Blank csv file
root_path = __init__.root_path
results_path = root_path + '/resultsv2'
model_path = root_path + '/modelv2'
open(results_path + '/training_loss_per_batch.csv', 'w').close()

Пример #3

Показать файл

from preprocessing.dataset import Dataset
from network.config import CONFIG
from network.build_model import get_batch
from evaluation_metrics import get_f1_from_tokens, get_exact_match_from_tokens

# Suppress tensorflow verboseness
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

D = Dataset(CONFIG.EMBEDDING_FILE)
index2embedding = D.index2embedding
#padded_data_squad1, (max_length_question, max_length_context) = D.load_questions('data/train.json')
#padded_data_validation = padded_data_squad1[(int) (CONFIG.TRAIN_PERCENTAGE*len(padded_data_squad1)):]
#untrained_contexts = [x["context"] for x in padded_data_validation]
#print("Loaded data from squad one")

padded_data_squad2, (max_length_question_squad2, max_length_context_squad2) = D.load_questions('data/train-v2.0.json')
print("padded_data_squad2.len = ",len(padded_data_squad2))
print("Max length from squad 2 q and c: ", max_length_question_squad2, max_length_context_squad2)
print("Loaded data from squad two")
'''
padded_data_untrained = [x for x in padded_data_squad2 if x["context"] in untrained_contexts]
unanswerable_data = [x for x in padded_data_untrained if x["answer_start"]==-1]
answerable_data = [x for x in padded_data_untrained if x["answer_start"]>=0]
print("Number of unanswerable questions: ",len(unanswerable_data))
print("Number of answerable questions: ", len(answerable_data))

padded_data = np.array(padded_data_untrained)
'''
padded_data = np.array(padded_data_squad2)
padded_data = padded_data[(int) ((CONFIG.TRAIN_PERCENTAGE)*len(padded_data_squad2)) : ]
print(padded_data.shape)