示例#1
0
    def load_model(self, data_folder):
        index_path = join_path(data_folder, ['train_data', 'index.trained'])
        vals_path = join_path(data_folder, ['train_data', 'vals.trained'])
        dict_path = join_path(data_folder, ['train_data', 'dict_word2idx.pkl'])

        dict_word2idx = pickle_load(dict_path)
        self.dict_idx2word = {v: k for k, v in dict_word2idx.items()}

        self.index = faiss.read_index(index_path)

        self.vals = np.load(vals_path)

        print('Model loaded')
示例#2
0
    parser.add_argument("--test_ratio", type=float, default=0.01)
    parser.add_argument("--data_folder", default=None)
    parser.add_argument("--random_seed", type=int, default=1)

    args = parser.parse_args()

    if args.data_folder is None:
        extracted_pkl = args.extracted_pkl
        tensor_folder = args.tensor_folder
    else:
        extracted_pkl = join_path(args.data_folder,
                                  ['extracted_txt', 'extracted.pkl'])
        params_path = join_path(args.data_folder, 'params.pkl')

        try:
            params = pickle_load(params_path)
        except Exception:
            params = {}
        params['encode_data_test_ratio'] = args.test_ratio
        params['encode_data_random_seed'] = args.random_seed
        pickle_save(params, params_path)

    dataset = pickle_load(extracted_pkl)

    random.Random(1).shuffle(dataset)
    test_size = int(len(dataset) * args.test_ratio)
    test_set = dataset[:test_size]
    train_set = dataset[test_size:]

    train_path = join_path(args.data_folder, 'train_data')
    check_mkdir(train_path)
    parser = argparse.ArgumentParser()

    parser.add_argument("--extracted_pkl", default=None)
    parser.add_argument("--savepath", default=None)
    parser.add_argument("--data_folder", default=None)

    args = parser.parse_args()

    if args.data_folder is None:
        if args.extracted_pkl is not None:
            extracted_pkl = args.extracted_pkl
        else:
            raise FileNotFoundError('No file path was provided')
        savepath = args.savepath
    else:
        setup_folder(args.data_folder)
        extracted_pkl = join_path(args.data_folder,
                                  ['extracted_txt', 'extracted.pkl'])
        savepath = join_path(args.data_folder, ['definition', 'def_dict.pkl'])

    dataset = pickle_load(extracted_pkl)

    print(f'Total number of applications: {len(dataset)}')

    def_example = extract_definition(dataset)

    print(f'Number of unique term: {len(def_example)}')

    print(f'Saving to {savepath}')
    pickle_save(def_example, savepath)
示例#4
0
    else:
        date = datetime.datetime.today().strftime('%Y_%m_%d')
        if args.remark is not None:
            date = date + '_' + args.remark
        model_output = join_path(args.data_folder, ['models', date])
        check_mkdir(model_output)
        tensor_folder = join_path(args.data_folder, 'tensor')

    tensor_list = listdir(tensor_folder)

    model = DNN(model_output, es_patience=args.es_patience, callbacks=['es'])

    if args.data_folder is not None:
        params_path = join_path(args.data_folder, 'params.pkl')
        try:
            params = pickle_load(params_path)
        except Exception:
            params = {}
        model_params = model.get_params()
        model_params['epochs'] = args.epochs
        model_params['es_patience'] = args.es_patience
        model_params['layers_num'] = args.layers_num
        model_params['layers_act'] = args.layers_act
        model_params['pretrain_path'] = args.pretrain_path
        model.set_params(model_params)

        model_params = model.get_params()
        for key in model_params.keys():
            key_name = 'model_params_' + key
            params[key_name] = model_params[key]
        pickle_save(params, params_path)
    if args.data_folder is None:
        checkpoint_save = args.checkpoint_folder
        combinefile = args.combinefile
        savepath = args.savepath
        if (checkpoint_save is None) and (combinefile is None):
            raise FileNotFoundError('No file path was provided')

    else:
        setup_folder(args.data_folder)
        checkpoint_save = join_path(args.data_folder, 'search_chunks')
        savepath = join_path(args.data_folder, ['extracted_txt',
                                                'extracted.pkl'])
        combinefile = None
    try:
        output_list = pickle_load(combinefile)
    except Exception:
        output_list = combine_checkpoint_file(checkpoint_save)

    app_list = extract_app(output_list)
    print(f'Total number of applications: {len(app_list)}')

    processed_data = []
    for app in tqdm(app_list):
        intro_text = process_intro(app[0])
        claims = process_claim(app[1])
        definitions = process_definition(app[2])
        app_data = [intro_text, claims, definitions]
        if (len(claims) > 1) and (len(definitions) > 1):
            processed_data.append(app_data)
示例#6
0
    args = parser.parse_args()

    if args.data_folder is None:
        if args.checkpoint_folder is not None:
            chunk_list = combine_checkpoint_file(args.checkpoint_folder)
        else:
            raise FileNotFoundError('No file path was provided')
        savepath = args.savepath
    else:
        savepath = join_path(args.data_folder, ['vocab', 'vocab_tensor.pkl'])
        chunk_folder = join_path(args.data_folder, 'search_chunks')
        chunk_list = combine_checkpoint_file(chunk_folder)
        params_path = join_path(args.data_folder, 'params.pkl')
        try:
            params = pickle_load(params_path)
        except Exception:
            params = {}
        params['extract_vocab_max_length'] = args.max_length
        params['extract_vocab_error_word_list'] = args.error_word_list
        params['extract_vocabmin_freq'] = args.min_freq
        pickle_save(params, params_path)

    app_list = extract_app(chunk_list)
    random.shuffle(app_list)
    subset_size = min(len(app_list), 100)
    sub_list = app_list[:subset_size]

    print(f'Total number of applications: {len(sub_list)}')

    unique_word = {}
示例#7
0
from src.model.model import DNN
from src.model.model import predict_word
from src.utils.encode import encode_data
from src.utils.encode import encode_preword_len
from src.utils.general import pickle_load
from src.utils.general import count_word

vocab_dict_path = 'resources/vocab_tensor.pkl'
model_path = 'resources/model.h5'
definition_path = 'resources/def_dict.pkl'

model = DNN()
model.load_model(model_path)

vocab_dict = pickle_load(vocab_dict_path)
vocab_dict['STOPSTOPSTOP'] = encode_data('STOPSTOPSTOP')

def_dict = pickle_load(definition_path)


class Inputs(BaseModel):
    claim: str
    intro: str
    term: str


app = FastAPI()


@app.get("/")