def predict_list(path_list, maxlen=None): """ """ os.makedirs(dataset_text_folder, exist_ok=True) files = list(glob(os.path.join(dataset_text_folder, '*'))) print('files=%d %s' % (len(files), files)) # assert files nn = NeuroNER(parameters_filepath=parameters_filepath) results = {} try: for i, path in enumerate(path_list): print('~' * 80) print('Processing %d of %d' % (i, len(path_list)), end=': ') text, entities, times = predict(nn, path) results[path] = (text, entities, times) except Exception as e: print('^' * 80) print('Failed to process %s' % path) print(type(e)) print(e) raise nn.close() print('=' * 80) print('Completed %d of %d' % (len(results), len(path_list))) print('!' * 80) print('files=%d %s' % (len(path_list), path_list[:5])) for i, path in enumerate(path_list[:len(results)]): text, entities, (t0, t1, t2) = results[path] print('*' * 80) print('%3d: %5d %8d %s' % (i, len(text), os.path.getsize(path), path)) if not text: continue print('pdftotext=%4.1f sec' % (t1 - t0)) print(' predict=%4.1f sec' % (t2 - t1)) print(' total=%4.1f sec %4.0f chars/sec ' % ((t2 - t0), len(text) / (t2 - t0))) summarize(entities, max_texts=10) all_entities = [] all_text_len = 0 all_t = 0.0 for i, path in enumerate(path_list[:len(results)]): text, entities, (t0, t1, t2) = results[path] all_text_len += len(text) all_entities.extend(entities) all_t += t2 - t0 print('#' * 80) print('All files: %d files length=%d' % (len(results), all_text_len)) if all_text_len: print(' total=%4.1f sec %4.0f chars/sec ' % (all_t, all_text_len / all_t)) summarize(all_entities, max_texts=100)
def main(argv=sys.argv): ''' NeuroNER main method Args: parameters_filepath the path to the parameters file output_folder the path to the output folder ''' # Parse arguments arguments = parse_arguments(argv[1:]) nn = NeuroNER() entities = nn.predict( 'www fresnobee com news local crime article179830941 html Sex offender Snyder could live in Fresno motel | The Fresno Bee Mobile & Apps Jeffrey Snyder 62 was allowed supervised release in June after serving prison time for molesting children. He may be moved to a motel just south of downtown Fresno. FRESNO COUNTY DISTRICT ATTORNEY S OFFICE Jeffrey Snyder 62 was allowed supervised release in June after serving prison time for molesting children. He may be moved to a motel just south of downtown Fresno. FRESNO COUNTY DISTRICT ATTORNEY S OFFICE Sexually violent predator Jeffrey Snyder could soon have a new place to live By Rory Appleton Order Reprint of this Story October 19 2017 3:11 PM The California Department of State Hospitals suggested a new option Thursday for housing 62-year-old Jeffrey Snyder who was conditionally released in June after serving out a sentence for molesting children. The El Muir Motel at 2339 S. G St. is the proposed location. It is a one-story 6 400-square-foot building with 20 guest rooms located just south of downtown Fresno and west of Calwa near Golden State Boulevard. The Fresno County District Attorney s office is accepting public comment to include in its formal response in court. Anyone who wishes to submit a comment is asked to email [email protected] or mail to the office at 2220 Tulare St. Suite 1000 Fresno CA 93721. Mailed comments should have Attn: Sexual Assault Unit written on the envelope. Snyder s placement has been a fierce topic of discussion for both law enforcement and the general public for almost a year . A proposed placement at a home in Squaw Valley was met with fierce opposition by neighbors and the property eventually burned down. Of the 1 749 houses the state looked at none met the required criteria for placement of a sexually violent offender. A group of Cal Fire firefighters confer after putting out a mobile home fire- where sex offender Jeffrey Snyder was to be housed- at Dunlap on Sage Lane in January. JOHN WALKER [email protected]' ) print(entities) nn.close()
def main(argv=sys.argv): """ NeuroNER main method Args: parameters_filepath the path to the parameters file output_folder the path to the output folder """ arguments = parse_arguments(argv[1:]) nn = NeuroNER(**arguments) nn.fit() nn.close()
def predict(path): """ NeuroNER main method Args: parameters_filepath the path to the parameters file output_folder the path to the output folder """ pdftotext(path, path_txt) files = list(glob(os.path.join(dataset_text_folder, '*'))) print('files=%d %s' % (len(files), files)) assert files nn = NeuroNER(parameters_filepath=parameters_filepath) nn.fit() nn.close()
def main(argv=sys.argv): ''' NeuroNER main method Args: parameters_filepath the path to the parameters file output_folder the path to the output folder ''' # Parse arguments arguments = parse_arguments(argv[1:]) nn = NeuroNER(**arguments) entire_time = time.time() #nn.fit() #e = nn.predict("Butch has the opportunity to just walk out of Maynard and Zed's shop.") #print(e) with open('./whole.json') as f: data = json.load(f) num_data = len(data) contexts = [data[i]['context'] for i in range(num_data)] print('num_data : %d' % num_data) for i in range(num_data): #for i in range(106, num_data): print('Process %d data.....' % (i + 1)) if len(contexts[i]) == 0: print('no data in context %d' % (i + 1)) continue t = time.time() context = contexts[i] statement = [ np.concatenate((context[i]), axis=None) for i in range(len(context)) ] sentence = np.concatenate((statement), axis=None) statement_len = [len(statement[j]) for j in range(len(statement))] statement_index = list() statement_index.append(0) for j in range(len(statement_len)): statement_index.append(statement_index[j] + statement_len[j]) sentence_index = list() for j in range(len(context)): s = context[j] s_len = [len(s[k]) for k in range(len(s))] s_index = list() s_index.append(0) for k in range(len(s_len)): s_index.append(s_index[k] + s_len[k]) sentence_index.append(s_index) text = ' '.join(list(sentence)) entities = nn.predict(text) end_index = 0 end_list = list() for j in range(len(sentence)): end_index += len(sentence[j]) end_list.append(end_index) end_index += 1 statement_iter = 0 sentence_iter = 0 ner = list() ner_statement = list() for entity in entities: e_type = entity['type'] e_start = entity['start'] e_end = entity['end'] start = -1 end = -1 for j in range(len(sentence)): if end_list[j] > e_start: start = j break for j in range(len(sentence) - 1, -1, -1): if end_list[j] <= e_end: end = j + 1 break while start >= statement_index[statement_iter + 1]: statement_iter += 1 sentence_iter = 0 ner.append(ner_statement) ner_statement = list() while start >= statement_index[statement_iter] + sentence_index[ statement_iter][sentence_iter + 1]: sentence_iter += 1 start -= (statement_index[statement_iter] + sentence_index[statement_iter][sentence_iter]) end -= (statement_index[statement_iter] + sentence_index[statement_iter][sentence_iter]) ner_statement.append([sentence_iter, start, end, e_type]) ner.append(ner_statement) data[i]['ner'] = ner print('Done(%.2fs)' % (time.time() - t)) with open('whole_ner.json', 'w') as f: json.dump(data, f) print('Elapsed Time : %.2fs' % (time.time() - entire_time)) nn.close()
import spacy from neuroner import NeuroNER import load_parameters model_folder = '../trained_models/conll_2003_en' init_dataset_folder = '../data/example_unannotated_texts' arguments = load_parameters.parse_arguments( pretrained_model_folder=model_folder, init_dataset=init_dataset_folder) nn = NeuroNER(**arguments) spacy_nlp = spacy.load('en') #this function can be repeated several times to depoy as a service without loading the model and core_nlp again def predict(text): return nn.new_predict(text, spacy_nlp)