Python Analysis示例，natural_language.Analysis Python示例

示例#1

0

显示文件

文件： knowledgebase_medline.py 项目： bselles/Sistema-de-Busqueda-de-respuestas-adaptable-a-distintos-dominios

def rankDocument(doc, q):
    titly = doc.find_all("content", attrs={"name": ["title", "altTitle"]})
    hl = 0  # count of terms highlited by search engine
    sim = 0  # query-defined similarity with the titles
    for t in titly:  # We search for the max scores from all the title entries
        c = BeautifulSoup(t.get_text(), "html.parser")
        hl = max(hl, len(c.find_all("span")))
        sim = max(sim, q.similarity(Analysis(c.get_text(), superficial=True)))
    return hl + sim

示例#2

0

显示文件

文件： knowledgebase_wiki.py 项目： Javi96/Sistema-de-Busqueda-de-respuestas-adaptable-a-distintos-dominios

def rankResult(result, q):
    title_similarity = q.similarity(Analysis(result['title'],
                                             superficial=True))
    snippet = BeautifulSoup(result['snippet'], 'html.parser')
    matches = snippet.find_all('span', attrs={"class": "searchmatch"})
    match_score = len(set(m.get_text() for m in matches))
    #print('{}: {}+{}'.format(result['title'], title_similarity, match_score))
    title_weight = 1
    match_weight = 0.1
    return title_weight * title_similarity + match_weight * match_score

示例#3

0

显示文件

def response(data_file):
    with open(data_file, 'r') as file:
        data = json.load(file)
        #print(json.dumps(data, indent=4, sort_keys=True))
        ground = Ground()
        for i in data['questions']:
            question = Analysis(i['body'])
            question_type = question.graph.question_type
            doc = Analysis(retrieveDocument(question))
            ground.teardown()
            ground.add_text(doc)
            answers = ground.ask_question(question)
            print('------------------------------------')
            print('Question: ' + i['body'])
            print('Question type: ' + question_type)
            print('Ideal answer: ', i['ideal_answer'])
            if (len(answers) == 0):
                print("Answer: Sorry, I don't know the answer.")
            else:
                print('Answer: ',
                      '\n'.join(compose_answer(question, a) for a in answers))

示例#4

0

显示文件

文件： knowledgebase_wiki.py 项目： Javi96/Sistema-de-Busqueda-de-respuestas-adaptable-a-distintos-dominios

    results = json.loads(response.text)['query']['search']
    best_result = max(results, key=lambda r: rankResult(r, q))
    page = wikiRetrieve(best_result['pageid'])
    content = BeautifulSoup(
        json.loads(page.text)['parse']['text']['*'], 'html.parser')
    return ' '.join(p.get_text() for p in content.find_all('p'))


def retrieveDocument(q):
    r = wikiSearch(q.content_words())
    doc = getMostRelevantDocument(r, q)
    return ' '.join(doc.split())


if __name__ == "__main__":
    q = Analysis("What is the internet of things?")
    print(q.content_words())

    r = wikiSearch(q.content_words())
    print("{} -> {}".format(r.url, r.status_code))

    results = json.loads(r.text)['query']['search']
    print([(r['title'], r['snippet']) for r in results])

    page = wikiRetrieve(results[0]['pageid'])
    print(json.loads(page.text)['parse']['text']['*'])

    doc = getMostRelevantDocument(r, q)
    print(doc)

    print(retrieveDocument(Analysis("What is a heart attack?")))

示例#5

0

显示文件

    for result in cur.fetchall():
        tmp = {}
        tmp['content'] = str(list(result)[0])
        data.append(tmp)
        index = index + 1
    disconnect(conn)
    client = MongoClient('')
    client.tfgchat.test.delete_many({})
    client.tfgchat.test.insert_many(data)


def getMostRelevantDocument(q):
    client = MongoClient('')
    query = ' '.join(q.content_words())
    client.tfgchat.test.create_index([('content', "text")])
    cursor = client.tfgchat.test.find_one({"$text": {
        "$search": query
    }}, {"score": {
        "$meta": "textScore"
    }})
    return BeautifulSoup(cursor.get('content'), 'html.parser').get_text()


def retrieveDocument(q):
    #createDatabase(2000)
    return getMostRelevantDocument(q)


if __name__ == "__main__":
    q = Analysis("What is a heart attack?")
    print(retrieveDocument(q))

示例#6

0

显示文件

文件： knowledgebase_medline.py 项目： bselles/Sistema-de-Busqueda-de-respuestas-adaptable-a-distintos-dominios

    docs = parsed.find_all('document')
    doc = max(docs, key=lambda d: rankDocument(d, q))
    html = doc.find("content", attrs={"name": "FullSummary"}).get_text()
    return BeautifulSoup(html, "html.parser").get_text(" ")


'''
    Retrieves different documents which can contain the response to 
    question introduced by the user.
    To do it, it uses the result of the analysis of this question (parameter q).
'''


def retrieveDocument(q):
    r = medlineSearch(q.content_words())
    doc = getMostRelevantDocument(r, q)
    return ' '.join(doc.split())


if __name__ == "__main__":
    q = Analysis("What are the causes of blood infection?")
    print(q.content_words())

    r = medlineSearch(q.content_words())
    print("{} -> {}".format(r.url, r.status_code))

    doc = getMostRelevantDocument(r, q)
    print(doc)

    print(retrieveDocument(Analysis("What are the causes of a heart attack?")))

示例#7

0

显示文件


def test_init():
    global Analysis, nlg

    import nbimporter
    from natural_language import Analysis

    from grafeno import linearizers
    nlg = linearizers.get_pipeline(['node_edges'])


# Test case showing the behavior of Ground class
if __name__ == "__main__":

    test_init()
    ground = Ground()
    ground.teardown()
    ground.add_text(
        Analysis(
            "John loves Mary. John loves very cute dogs. Peter hates Susan. Susan loves John. Paul loves Joana. Joana loves Paul."
        ))
    answers = ground.ask_question(Analysis("Who loves John"))
    for answer in answers:
        print(answer.linearize(linearizer=nlg))

    print('##########################################################')
    answers = ground.ask_question(Analysis("John loves who"))
    for answer in answers:
        print(answer.linearize(linearizer=nlg))

示例#8

0

显示文件

def do_tests(original_path, destination_path, type_filter='none'):
    json_file = json.load(open(original_path))
    all_tests = []
    errors = {}
    # We create a single Ground
    ground = Ground()

    for question in json_file['questions']:
        # Error Handling
        snipped_errors = ""
        question_errors = ""
        compose_answer_errors = False

        question_text = question['body']
        ideal_answer = question['ideal_answer']

        # Clears the Ground
        ground.teardown()

        # Adds all the snippets to the current knowledge base
        for snippet in question['snippets']:
            snippet_text = snippet['text']
            try:
                ground.add_text(Analysis(snippet_text))
            except (KeyError):
                snipped_errors = snippet_text

        try:
            # Analyzes the query
            q = Analysis(question['body'])

            question_type = q.graph.question_type

            # If we have to avoid this type of question due to the filter, we move to the next for iteration
            if (type_filter != 'none' and type_filter != question_type):
                continue

            print('------------------------------------')
            print('Question: ' + question_text)
            print('Question type: ' + question_type)
            print('Ideal answer: ' + ideal_answer[0])

            # We ask a question to the current knowledge base
            answers = ground.ask_question(q)
        except:
            question_errors = question['body']
            print('     Question errors: ' + question_errors)

        try:
            # Translates the answer into a natural language sentence
            answers = response(q, answers, question_type)
        except:
            print('     Compose answer errors')
            compose_answer_errors = True

        print('Answer: ' + answers)

        # We collect all the relevant information
        all_tests.append({
            'question': question_text,
            'answer': answers,
            'ideal_answer': ideal_answer,
            'snipped_errors': snipped_errors,
            'question_errors': question_errors,
            'compose_answer_errors': compose_answer_errors
        })

    # We collect all the questions
    full = {"questions": all_tests}

    # We saved all the questions into the specified file
    dump = json.dumps(full)
    f = open(destination_path, "w")
    f.write(dump)
    f.close()