def parse(): if request.method == 'POST': text = request.form['text'] doc_id = '99999999' print text #if text == "readfile": # with open('test.txt', 'r') as myfile: # text=myfile.read() rules0 = request.form['rules0'] rule0_lines = rules0.split("\n") rules1 = request.form['rules1'] rule1_lines = rules1.split("\n") rules2 = request.form['rules2'] rule2_lines = rules2.split("\n") param_helper = ParamHelper(text,doc_id,rule0_lines,rule1_lines,rule2_lines) raw_doc = document_pb2.Document() edg_rules = edgRules_pb2.EdgRules() param_helper.setDocProtoAttributes(raw_doc) param_helper.setRuleProtoAttributes(edg_rules) ########################## parse_bllip = parse_using_bllip(raw_doc,edg_rules) #print parse_bllip brat_bllip = json.dumps(get_brat_data(parse_bllip)) brat_bllip_added = json.dumps(get_brat_data_added(parse_bllip)) return render_template('index_edg.html', text=text, rules0=rules0,rules1=rules1,rules2=rules2, brat_string_bllip=brat_bllip, brat_string_bllip_added=brat_bllip_added) else: return render_template('index_edg.html')
def run(): textFH = open(sys.argv[1], "r") text = textFH.read() textFH.close() #text = u'Surface expression of mir-21 activates tgif beta receptor type II expression. Expression of mir-21 and mir-132 directly mediates cell migration . mir-21 mediates cell migration and proliferation. mir-21 seems to mediate apoptosis. mir-21 is involved in cellular processes, such as cell migration and cell proliferation. mir-21 regulates the ectopic expression of smad2 .' #text = u'transport of annexin 2 not only to dynamic actin-rich ruffles at the cell cortex but also to cytoplasmic and perinuclear vesicles.' doc_id = '99999999' rule_phase0_filename = sys.argv[2] rule_phase1_filename = sys.argv[3] rule_phase2_filename = sys.argv[4] fh0 = open(rule_phase0_filename, "r") rule0_lines = fh0.readlines() fh0.close() fh1 = open(rule_phase1_filename, "r") rule1_lines = fh1.readlines() fh1.close() fh2 = open(rule_phase2_filename, "r") rule2_lines = fh2.readlines() fh2.close() param_helper = ParamHelper(text, doc_id, rule0_lines, rule1_lines, rule2_lines) raw_doc = document_pb2.Document() edg_rules = edgRules_pb2.EdgRules() param_helper.setDocProtoAttributes(raw_doc) param_helper.setRuleProtoAttributes(edg_rules) # Parse using Bllip parser. #print (ruleList) # Parse using Bllip parser. result = parse_using_bllip(raw_doc, edg_rules) helper = DocHelper(result) sentences = result.sentence #print(edg_rules) for sentence in sentences: print(helper.text(sentence)) for depExtra in sentence.dependency_extra: print(helper.printExtraDependency(sentence, depExtra)) print("===============================")
def run(): # text = u'Surface expression of mir-21 activates tgif beta receptor type II expression. Expression of mir-21 and mir-132 directly mediates cell migration . mir-21 mediates cell migration and proliferation. mir-21 seems to mediate apoptosis. mir-21 is involved in cellular processes, such as cell migration and cell proliferation. mir-21 regulates the ectopic expression of smad2 .' # text = u'transport of annexin 2 not only to dynamic actin-rich ruffles at the cell cortex but also to cytoplasmic and perinuclear vesicles.' doc_id = '99999999' rule_phase0_filename = '/home/leebird/Projects/nlputils/visual/uploads/rules_phase0.txt' rule_phase1_filename = '/home/leebird/Projects/nlputils/visual/uploads/rules_phase1.txt' rule_phase2_filename = '/home/leebird/Projects/nlputils/visual/uploads/rules_phase2.txt' fh0 = open(rule_phase0_filename, "r") rule0_lines = fh0.readlines() fh0.close() fh1 = open(rule_phase1_filename, "r") rule1_lines = fh1.readlines() fh1.close() fh2 = open(rule_phase2_filename, "r") rule2_lines = fh2.readlines() fh2.close() with open('/home/leebird/Projects/nlputils/utils/typing/test.json') as f: json_doc = json.load(f) for t in json_doc['entity'].values(): t['entityType'] = t['entityType'].upper() text = json.dumps(json_doc) raw_doc = json_format.Parse(text, document_pb2.Document(), True) param_helper = ParamHelper(text, doc_id, rule0_lines, rule1_lines, rule2_lines) # raw_doc = document_pb2.Document() edg_rules = edgRules_pb2.EdgRules() # param_helper.setDocProtoAttributes(raw_doc) param_helper.setRuleProtoAttributes(edg_rules) # Parse using Bllip parser. doc = parse_using_bllip(raw_doc, edg_rules) helper = DocHelper(doc) invalid_deps = constraint_args(helper, {'arg0': {document_pb2.Entity.GENE}}) print(invalid_deps) propagate(helper, {'arg0': {document_pb2.Entity.GENE}}, invalid_deps)
def upload(): if request.method == 'POST': # Get the name of the uploaded file file0 = request.files['ruleFile0'] file1 = request.files['ruleFile1'] file2 = request.files['ruleFile2'] rules0 = save_read_uploaded_file(file0) rules1 = save_read_uploaded_file(file1) rules2 = save_read_uploaded_file(file2) text = request.form['text'] if rules0 == "": rules0 = request.form['rules0'] if rules1 == "": rules1 = request.form['rules1'] if rules2 == "": rules2 = request.form['rules2'] rule0_lines = rules0.split("\n") rule1_lines = rules1.split("\n") rule2_lines = rules2.split("\n") doc_id = "9999999" param_helper = ParamHelper(text,doc_id,rule0_lines,rule1_lines,rule2_lines) raw_doc = document_pb2.Document() edg_rules = edgRules_pb2.EdgRules() param_helper.setDocProtoAttributes(raw_doc) param_helper.setRuleProtoAttributes(edg_rules) ########################## parse_bllip = parse_using_bllip(raw_doc,edg_rules) #print parse_bllip brat_bllip = json.dumps(get_brat_data(parse_bllip)) brat_bllip_added = json.dumps(get_brat_data_added(parse_bllip)) return render_template('index_edg.html', text=text, rules0=rules0,rules1=rules1,rules2=rules2, brat_string_bllip=brat_bllip, brat_string_bllip_added=brat_bllip_added) else: return render_template('index_edg.html')
def run(): #####Iterate through all files in Input directory and create doc_list input_dir_path = sys.argv[1] glob_path = input_dir_path + "/*" input_files = glob.glob(glob_path) document_list = list() for input_file in input_files: textFH = open(input_file, "r") text = textFH.read() textFH.close() raw_doc = document_pb2.Document() raw_doc = document_pb2.Document() doc_id = os.path.splitext(os.path.basename(input_file))[0] raw_doc.text = text raw_doc.doc_id = doc_id document_list.append(raw_doc) rule_phase0_filename = sys.argv[2] fh0 = open(rule_phase0_filename, "r") rule0_lines = fh0.readlines() fh0.close() ####NEED TO UPDDATE PARAM_HELPER param_helper = ParamHelper("NA", "NA", rule0_lines, [], []) edg_rules = edgRules_pb2.EdgRules() param_helper.setRuleProtoAttributes(edg_rules) #param_helper.setDocProtoAttributes(raw_doc) # This is a simple function to make requests out of a list of documents. We # put 5 documents in each request. requests = edg_request_iter_docs( document_list, edg_rules, request_size=5, request_type=rpc_pb2.EdgRequest.PARSE_BLLIP) # Given a request iterator, send requests in parallel and get responses. responses_queue = grpcapi.get_queue(server='128.4.20.169', port=8902, request_thread_num=10, iterable_request=requests, edg_request_processor=True) count = 0 for response in responses_queue: for doc in response.document: #print(doc) helper = DocHelper(doc) sentences = doc.sentence doc_id = doc.doc_id #print(edg_rules) sentNum = 0 for sentence in sentences: flag = 0 sentText = helper.text(sentence) dependenciesExtra = sentence.dependency_extra edgRelations = EdgRelations(doc_id, sentNum) edgRelations.setRelations(helper, sentence, dependenciesExtra) toPrintRel = ["inv", "reg", "ass", "exp", "cmp", "isa", "fnd"] for edgRelation in edgRelations.relations: numb_args_list = edgRelation.getEdgRelationNumArgs() relation_name = edgRelation.name trigger_head = edgRelation.trigger_head trigger_phrase = edgRelation.trigger_phrase if relation_name in toPrintRel: for numb_args in numb_args_list: print("Sentence: " + doc_id + "\t" + str(sentNum) + "\t" + sentText) print("Relation: " + relation_name + "\t" + trigger_head + "\t" + trigger_phrase) print("Arg0: " + numb_args[0]) print("Arg1: " + numb_args[1]) print("Arg2: " + numb_args[2]) print("\n") sentNum = sentNum + 1 count += 1
def run(): #####Iterate through all files in Input directory and create doc_list input_dir_path = sys.argv[1] glob_path = input_dir_path + "/*"; input_files = glob.glob(glob_path) document_list = list() for input_file in input_files: textFH = open(input_file,"r") text = textFH.read() textFH.close() raw_doc = document_pb2.Document() raw_doc = document_pb2.Document() doc_id = os.path.splitext(os.path.basename(input_file))[0] raw_doc.text = text raw_doc.doc_id = doc_id document_list.append(raw_doc) rule_phase0_filename = sys.argv[2] fh0 = open(rule_phase0_filename, "r") rule0_lines = fh0.readlines() fh0.close() ####NEED TO UPDDATE PARAM_HELPER param_helper = ParamHelper("NA","NA",rule0_lines,[],[]) edg_rules = edgRules_pb2.EdgRules() param_helper.setRuleProtoAttributes(edg_rules) #param_helper.setDocProtoAttributes(raw_doc) # This is a simple function to make requests out of a list of documents. We # put 5 documents in each request. requests = edg_request_iter_docs(document_list, edg_rules, request_size=5, request_type=rpc_pb2.EdgRequest.PARSE_BLLIP) # Given a request iterator, send requests in parallel and get responses. responses_queue = grpcapi.get_queue(server='128.4.20.169', port=8902, request_thread_num=10, iterable_request=requests, edg_request_processor=True) count = 0 for response in responses_queue: for doc in response.document: #print(doc) helper = DocHelper(doc) sentences = doc.sentence doc_id = doc.doc_id #print(edg_rules) sentNum = 0 for sentence in sentences: flag = 0 sentText = helper.text(sentence) for depExtra in sentence.dependency_extra: flag = 1 print(doc_id+"\t"+str(sentNum)+"\t"+helper.printExtraDependencyAnalysis(sentence,depExtra)+"\t"+sentText) if flag == 0: print(doc_id+"\t"+str(sentNum)+"\t"+helper.printEmptyExtraDependencyAnalysis(sentence)+"\t"+sentText) sentNum = sentNum + 1 count += 1