model = load_word2vec_model(model_file, mmap='r') lr_entity_model = EntityModel.load(lr_entity_file, mmap='r') centroid_entity_model = EntityModel.load(centroid_entity_file, mmap='r') norm_entities = [(entity.lower(), entity) for entity in lr_entity_model.entities] while True: try: line = raw_input('> ').strip() except EOFError: break words, entities = parse_query(norm_entities, line) lr_top = top_entities(model, lr_entity_model, entities, words) centroid_top = top_entities(model, centroid_entity_model, entities, words) for (lr_score, lr_ent), (centroid_score, centroid_ent) in zip(lr_top, centroid_top): print '%-50s%10.3f | %-50s%10.3f' % (lr_ent, lr_score, centroid_ent, centroid_score) if __name__ == '__main__': np.random.seed(1729) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) baker.run()
('q', q), ('pred_bps', float(pred_bps)), ('avg_zeros', float(avg_zeros)), ('avg_err', float(avg_err)), ])) def load_quant_data(json_filename): import pandas as pd with open(json_filename) as fin: data = [] decoder = json.JSONDecoder(object_pairs_hook=OrderedDict) for line in fin: row = decoder.decode(line) accuracy = row['accuracy'][-1] assert accuracy['section'] == 'total' # XXX acc_percentage = float(accuracy['correct']) / (accuracy['correct'] + accuracy['incorrect']) row['accuracy'] = acc_percentage data.append(row) return pd.DataFrame(data) if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) baker.run()