Exemplo n.º 1
0
def parse():
    if request.method == 'POST':
        text = request.form['text']
        doc_id = '99999999'
        print text
        #if text == "readfile":
        #    with open('test.txt', 'r') as myfile:
        #        text=myfile.read()

        rules0 = request.form['rules0']
        rule0_lines = rules0.split("\n")
        rules1 = request.form['rules1']
        rule1_lines = rules1.split("\n")
        rules2 = request.form['rules2']
        rule2_lines = rules2.split("\n")
        
        param_helper = ParamHelper(text,doc_id,rule0_lines,rule1_lines,rule2_lines)
        raw_doc = document_pb2.Document()
        edg_rules = edgRules_pb2.EdgRules()
        param_helper.setDocProtoAttributes(raw_doc)
        param_helper.setRuleProtoAttributes(edg_rules)
        ##########################
        parse_bllip = parse_using_bllip(raw_doc,edg_rules)
        #print parse_bllip 
        brat_bllip = json.dumps(get_brat_data(parse_bllip))
        brat_bllip_added = json.dumps(get_brat_data_added(parse_bllip))

        return render_template('index_edg.html', text=text, rules0=rules0,rules1=rules1,rules2=rules2,
                               brat_string_bllip=brat_bllip,
                               brat_string_bllip_added=brat_bllip_added)
    else:
        return render_template('index_edg.html')
Exemplo n.º 2
0
def run():
    textFH = open(sys.argv[1], "r")
    text = textFH.read()
    textFH.close()
    #text = u'Surface expression of mir-21 activates tgif beta receptor type II expression. Expression of mir-21 and mir-132  directly mediates cell migration . mir-21 mediates cell migration and proliferation. mir-21 seems to mediate apoptosis. mir-21 is  involved in cellular processes, such as cell migration and cell proliferation. mir-21 regulates the ectopic expression of smad2 .'
    #text = u'transport of annexin 2 not only to dynamic actin-rich ruffles at the cell cortex but also to cytoplasmic and perinuclear vesicles.'
    doc_id = '99999999'
    rule_phase0_filename = sys.argv[2]
    rule_phase1_filename = sys.argv[3]
    rule_phase2_filename = sys.argv[4]
    fh0 = open(rule_phase0_filename, "r")
    rule0_lines = fh0.readlines()
    fh0.close()
    fh1 = open(rule_phase1_filename, "r")
    rule1_lines = fh1.readlines()
    fh1.close()
    fh2 = open(rule_phase2_filename, "r")
    rule2_lines = fh2.readlines()
    fh2.close()
    param_helper = ParamHelper(text, doc_id, rule0_lines, rule1_lines,
                               rule2_lines)

    raw_doc = document_pb2.Document()
    edg_rules = edgRules_pb2.EdgRules()

    param_helper.setDocProtoAttributes(raw_doc)
    param_helper.setRuleProtoAttributes(edg_rules)

    # Parse using Bllip parser.
    #print (ruleList)
    # Parse using Bllip parser.
    result = parse_using_bllip(raw_doc, edg_rules)
    helper = DocHelper(result)
    sentences = result.sentence
    #print(edg_rules)
    for sentence in sentences:
        print(helper.text(sentence))
        for depExtra in sentence.dependency_extra:
            print(helper.printExtraDependency(sentence, depExtra))
        print("===============================")
Exemplo n.º 3
0
def run():
    # text = u'Surface expression of mir-21 activates tgif beta receptor type II expression. Expression of mir-21 and mir-132  directly mediates cell migration . mir-21 mediates cell migration and proliferation. mir-21 seems to mediate apoptosis. mir-21 is  involved in cellular processes, such as cell migration and cell proliferation. mir-21 regulates the ectopic expression of smad2 .'
    # text = u'transport of annexin 2 not only to dynamic actin-rich ruffles at the cell cortex but also to cytoplasmic and perinuclear vesicles.'
    doc_id = '99999999'
    rule_phase0_filename = '/home/leebird/Projects/nlputils/visual/uploads/rules_phase0.txt'
    rule_phase1_filename = '/home/leebird/Projects/nlputils/visual/uploads/rules_phase1.txt'
    rule_phase2_filename = '/home/leebird/Projects/nlputils/visual/uploads/rules_phase2.txt'
    fh0 = open(rule_phase0_filename, "r")
    rule0_lines = fh0.readlines()
    fh0.close()
    fh1 = open(rule_phase1_filename, "r")
    rule1_lines = fh1.readlines()
    fh1.close()
    fh2 = open(rule_phase2_filename, "r")
    rule2_lines = fh2.readlines()
    fh2.close()

    with open('/home/leebird/Projects/nlputils/utils/typing/test.json') as f:
        json_doc = json.load(f)
        for t in json_doc['entity'].values():
            t['entityType'] = t['entityType'].upper()
        text = json.dumps(json_doc)
        raw_doc = json_format.Parse(text, document_pb2.Document(), True)

    param_helper = ParamHelper(text, doc_id, rule0_lines, rule1_lines,
                               rule2_lines)

    # raw_doc = document_pb2.Document()
    edg_rules = edgRules_pb2.EdgRules()

    # param_helper.setDocProtoAttributes(raw_doc)
    param_helper.setRuleProtoAttributes(edg_rules)

    # Parse using Bllip parser.
    doc = parse_using_bllip(raw_doc, edg_rules)
    helper = DocHelper(doc)
    invalid_deps = constraint_args(helper, {'arg0': {document_pb2.Entity.GENE}})
    print(invalid_deps)
    propagate(helper, {'arg0': {document_pb2.Entity.GENE}}, invalid_deps)
Exemplo n.º 4
0
def upload():
    if request.method == 'POST':
        # Get the name of the uploaded file
        file0 = request.files['ruleFile0']
        file1 = request.files['ruleFile1']
        file2 = request.files['ruleFile2']
        rules0 = save_read_uploaded_file(file0)
        rules1 = save_read_uploaded_file(file1)
        rules2 = save_read_uploaded_file(file2)

        text = request.form['text']
        if rules0 == "":
            rules0 = request.form['rules0']
        if rules1 == "":
            rules1 = request.form['rules1']
        if rules2 == "":
            rules2 = request.form['rules2']
        rule0_lines = rules0.split("\n")
        rule1_lines = rules1.split("\n")
        rule2_lines = rules2.split("\n")
        doc_id = "9999999" 
        param_helper = ParamHelper(text,doc_id,rule0_lines,rule1_lines,rule2_lines)
        raw_doc = document_pb2.Document()
        edg_rules = edgRules_pb2.EdgRules()
        param_helper.setDocProtoAttributes(raw_doc)
        param_helper.setRuleProtoAttributes(edg_rules)
        ##########################
        parse_bllip = parse_using_bllip(raw_doc,edg_rules)
        #print parse_bllip 
        brat_bllip = json.dumps(get_brat_data(parse_bllip))
        brat_bllip_added = json.dumps(get_brat_data_added(parse_bllip))

        return render_template('index_edg.html', text=text, rules0=rules0,rules1=rules1,rules2=rules2,
                               brat_string_bllip=brat_bllip,
                               brat_string_bllip_added=brat_bllip_added)
    else:
        return render_template('index_edg.html')
def run():

    #####Iterate through all files in Input directory and create doc_list
    input_dir_path = sys.argv[1]
    glob_path = input_dir_path + "/*"
    input_files = glob.glob(glob_path)
    document_list = list()
    for input_file in input_files:
        textFH = open(input_file, "r")
        text = textFH.read()
        textFH.close()
        raw_doc = document_pb2.Document()
        raw_doc = document_pb2.Document()
        doc_id = os.path.splitext(os.path.basename(input_file))[0]
        raw_doc.text = text
        raw_doc.doc_id = doc_id
        document_list.append(raw_doc)

    rule_phase0_filename = sys.argv[2]
    fh0 = open(rule_phase0_filename, "r")
    rule0_lines = fh0.readlines()
    fh0.close()

    ####NEED TO UPDDATE PARAM_HELPER
    param_helper = ParamHelper("NA", "NA", rule0_lines, [], [])
    edg_rules = edgRules_pb2.EdgRules()

    param_helper.setRuleProtoAttributes(edg_rules)
    #param_helper.setDocProtoAttributes(raw_doc)

    # This is a simple function to make requests out of a list of documents. We
    # put 5 documents in each request.
    requests = edg_request_iter_docs(
        document_list,
        edg_rules,
        request_size=5,
        request_type=rpc_pb2.EdgRequest.PARSE_BLLIP)

    # Given a request iterator, send requests in parallel and get responses.
    responses_queue = grpcapi.get_queue(server='128.4.20.169',
                                        port=8902,
                                        request_thread_num=10,
                                        iterable_request=requests,
                                        edg_request_processor=True)
    count = 0
    for response in responses_queue:
        for doc in response.document:
            #print(doc)
            helper = DocHelper(doc)
            sentences = doc.sentence
            doc_id = doc.doc_id
            #print(edg_rules)
            sentNum = 0
            for sentence in sentences:
                flag = 0
                sentText = helper.text(sentence)
                dependenciesExtra = sentence.dependency_extra
                edgRelations = EdgRelations(doc_id, sentNum)
                edgRelations.setRelations(helper, sentence, dependenciesExtra)

                toPrintRel = ["inv", "reg", "ass", "exp", "cmp", "isa", "fnd"]
                for edgRelation in edgRelations.relations:
                    numb_args_list = edgRelation.getEdgRelationNumArgs()
                    relation_name = edgRelation.name
                    trigger_head = edgRelation.trigger_head
                    trigger_phrase = edgRelation.trigger_phrase
                    if relation_name in toPrintRel:
                        for numb_args in numb_args_list:
                            print("Sentence: " + doc_id + "\t" + str(sentNum) +
                                  "\t" + sentText)
                            print("Relation: " + relation_name + "\t" +
                                  trigger_head + "\t" + trigger_phrase)
                            print("Arg0: " + numb_args[0])
                            print("Arg1: " + numb_args[1])
                            print("Arg2: " + numb_args[2])
                            print("\n")
                sentNum = sentNum + 1
            count += 1
def run():
    
    #####Iterate through all files in Input directory and create doc_list
    input_dir_path = sys.argv[1]
    glob_path = input_dir_path + "/*";
    input_files = glob.glob(glob_path)
    document_list = list()
    for input_file in input_files:
        textFH = open(input_file,"r")
        text = textFH.read()
        textFH.close()
        raw_doc = document_pb2.Document()
        raw_doc = document_pb2.Document()
        doc_id = os.path.splitext(os.path.basename(input_file))[0]
        raw_doc.text = text 
        raw_doc.doc_id = doc_id 
        document_list.append(raw_doc)

    rule_phase0_filename = sys.argv[2]
    fh0 = open(rule_phase0_filename, "r")
    rule0_lines = fh0.readlines()
    fh0.close()
    
    ####NEED TO UPDDATE PARAM_HELPER
    param_helper = ParamHelper("NA","NA",rule0_lines,[],[])
    edg_rules = edgRules_pb2.EdgRules()

    param_helper.setRuleProtoAttributes(edg_rules)
    #param_helper.setDocProtoAttributes(raw_doc)

    # This is a simple function to make requests out of a list of documents. We
    # put 5 documents in each request.
    requests = edg_request_iter_docs(document_list, edg_rules,
                                 request_size=5,
                                 request_type=rpc_pb2.EdgRequest.PARSE_BLLIP)

    # Given a request iterator, send requests in parallel and get responses.
    responses_queue = grpcapi.get_queue(server='128.4.20.169',
                                        port=8902,
                                        request_thread_num=10,
                                        iterable_request=requests,
                                        edg_request_processor=True)
    count = 0
    for response in responses_queue:
        for doc in response.document:
            #print(doc)
            helper = DocHelper(doc)
            sentences = doc.sentence
            doc_id = doc.doc_id
            #print(edg_rules)
            sentNum = 0
            for sentence in sentences:
                flag = 0
                sentText = helper.text(sentence)
                for depExtra in sentence.dependency_extra:
                    flag = 1
                    print(doc_id+"\t"+str(sentNum)+"\t"+helper.printExtraDependencyAnalysis(sentence,depExtra)+"\t"+sentText)
                if flag == 0:
                    print(doc_id+"\t"+str(sentNum)+"\t"+helper.printEmptyExtraDependencyAnalysis(sentence)+"\t"+sentText)
                sentNum = sentNum + 1
            count += 1