def init_from_scratch(args, train_exs, dev_exs): """New module, new data, new dictionary.""" # Create a feature dict out of the annotations in the data logger.info('-' * 100) logger.info('Generate features') feature_dict = utils.build_feature_dict(args, train_exs) logger.info('Num features = %d' % len(feature_dict)) logger.info(feature_dict) # Build a dictionary from the data questions + words (train/dev splits) logger.info('-' * 100) logger.info('Build dictionary') word_dict = utils.build_word_dict(args, train_exs + dev_exs) logger.info('Num words = %d' % len(word_dict)) if args.use_char_emb: # Build a character dictionary from the data questions + words (train/dev splits) logger.info('-' * 100) logger.info('Build character dictionary') character_dict = utils.build_character_dict(args, train_exs + dev_exs) logger.info('Num character = %d' % len(character_dict)) # Initialize module model = DocReader(config.get_model_args(args), word_dict, feature_dict, character_dict) else: # Initialize module model = DocReader(config.get_model_args(args), word_dict, feature_dict) # Load pretrained embeddings for words in dictionary if args.embedding_file: model.load_embeddings(word_dict.tokens(), args.embedding_file) return model
def init_from_scratch(args, train_exs, dev_exs): """New model, new data, new dictionary.""" # Create a feature dict out of the annotations in the data logger.info('-' * 100) logger.info('Generate features') feature_dict = utils.build_feature_dict(args, train_exs) logger.info('Num features = %d' % len(feature_dict)) logger.info(feature_dict) # Build a dictionary from the data questions + words (train/dev splits) logger.info('-' * 100) logger.info('Build dictionary') word_dict = utils.build_word_dict(args, train_exs + dev_exs) logger.info('Num words = %d' % len(word_dict)) # Initialize model model = DocReader(config.get_model_args(args), word_dict, feature_dict) # Load pretrained embeddings for words in dictionary if args.embedding_file: model.load_embeddings(word_dict.tokens(), args.embedding_file) # COMMENTED OUT QUANTIZATION/TT SUPPORT # if args.use_quant_embed: # # assert it is loading from a existing file # if not args.embedding_file: # raise IOError("No embedding file specified when using real int based compressed embedding") # quant_embed.quantize_embed(model.network, nbit=args.nbit) return model
def init_from_scratch(args, train_exs, dev_exs): """New model, new data, new dictionary.""" # Create a feature dict out of the annotations in the data logger.info('-' * 100) logger.info('Generate features') feature_dict = utils.build_feature_dict(args, train_exs) logger.info('Num features = %d' % len(feature_dict)) logger.info(feature_dict) # Build a dictionary from the data questions + documents (train/dev splits) logger.info('-' * 100) logger.info('Build word dictionary') if args.embedding_from_model: sp = torch.load(args.embedding_from_model, map_location=lambda storage, loc: storage) word_dict = sp['word_dict'] embedding_weights = sp['state_dict']['embedding.weight'] else: embedding_weights = None word_dict = utils.build_word_dict(args, train_exs + dev_exs) logger.info('Num words = %d' % len(word_dict)) # Build a char dictionary from the data questions + documents (train/dev splits) logger.info('-' * 100) logger.info('Build char dictionary') char_dict = utils.build_char_dict(args, train_exs + dev_exs) logger.info('Num chars = %d' % len(char_dict)) # Initialize model model = DocReader(config.get_model_args(args), word_dict, char_dict, feature_dict) # Load pretrained embeddings for words in dictionary if args.embedding_from_model: model.load_emb_weights(embedding_weights) else: if args.embedding_file: model.load_embeddings(word_dict.tokens(), args.embedding_file) if args.char_embedding_file: model.load_char_embeddings(char_dict.tokens(), args.char_embedding_file) return model
def init_from_scratch(args, train_exs, dev_exs): """New model, new data, new dictionary.""" # Create a feature dict out of the annotations in the data logger.info('-' * 100) logger.info('Generate features') feature_dict = utils.build_feature_dict(args, train_exs) logger.info('Num features = %d' % len(feature_dict)) logger.info(feature_dict) # Build a dictionary from the data questions + words (train/dev splits) logger.info('-' * 100) logger.info('Build dictionary') word_dict = utils.build_word_dict(args, train_exs + dev_exs) logger.info('Num words = %d' % len(word_dict)) # Initialize model model = DocReader(config.get_model_args(args), word_dict, feature_dict) # Load pretrained embeddings for words in dictionary if args.embedding_file: model.load_embeddings(word_dict.tokens(), args.embedding_file) return model
def init_from_scratch(args, train_exs, dev_exs): """New model, new data, new dictionary.""" # Create a feature dict out of the annotations in the data logger.info("-" * 100) logger.info("Generate features") feature_dict = utils.build_feature_dict(args, train_exs) logger.info("Num features = %d" % len(feature_dict)) logger.info(feature_dict) # Build a dictionary from the data questions + words (train/dev splits) logger.info("-" * 100) logger.info("Build dictionary") word_dict = utils.build_word_dict(args, train_exs + dev_exs) logger.info("Num words = %d" % len(word_dict)) # Initialize model model = DocReader(config.get_model_args(args), word_dict, feature_dict) # Load pretrained embeddings for words in dictionary if args.embedding_file: model.load_embeddings(word_dict.tokens(), args.embedding_file) return model