Python Extractor示例，commons.data.entity.Extractor Python示例

示例#1

0

显示文件

def test():
    from commons.data.entity import Extractor

    url = "http://wso2.com/library/articles/2015/10/article-wso2-developer-studio-development-and-deployment-best" \
          "-practices/ "
    data = get_data(url)
    print data
    extractor = Extractor()
    print list(extractor.extract_entities(data))

示例#2

0

显示文件

文件： doc.py 项目： DulanjanaYasara/chatbot

def test():
    from commons.data.entity import Extractor

    url = "http://wso2.com/library/articles/2015/10/article-wso2-developer-studio-development-and-deployment-best" \
          "-practices/ "
    data = get_data(url)
    print data
    extractor = Extractor()
    print list(extractor.extract_entities(data))

示例#3

0

显示文件

class EntityExtractPipeline(object):
    def __init__(self):
        super(EntityExtractPipeline, self).__init__()
        self.extractor = Extractor()

    def process_item(self, item, spider):
        """Entity extraction of title, imp1, imp2 and plain text"""
        item['title'] = list(self.extractor.extract_entities(item['title']))
        item['imp1'] = list(self.extractor.extract_entities(item['imp1']))
        item['imp2'] = list(self.extractor.extract_entities(item['imp2']))
        item['plain'] = list(self.extractor.extract_entities(item['plain']))

        return item

示例#4

0

显示文件

文件： breaknplay.py 项目： DulanjanaYasara/chatbot

def breaknplay_bot(question):
    """The questions are extracted into entities and they are fed to the bot"""
    extractor = Extractor()
    question_phrases = extractor.extract_entities(question)

    # Prints the answer only if the answer is different from the set of answers given by the bot
    tot_ans = []
    for q in question_phrases:
        ans = ask_suzy(q)[0]
        if ans not in tot_ans:
            tot_ans.append(ans)

    question_phrases = '\n'.join(question_phrases)
    tot_ans = '\n'.join(tot_ans)

    return question_phrases, tot_ans

示例#5

0

显示文件

文件： ordinary_tech_entity_extraction.py 项目： DulanjanaYasara/chatbot

def tech_entity_generation():
    """Used for the tech content entity extraction and storing them in the MongoDB"""
    mypath = './data/tech_content'

    # Getting all the file names in the mypath directory
    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

    # Creating an instance of Extractor
    extractor = Extractor()

    for filename in onlyfiles:
        # Using the newer with construct to close the file automatically.
        with open(mypath + '/' + filename) as f:
            data = f.readlines()
            entity_generator = []
            for line in data:
                entities_line = extractor.extract_entities(line)
                if entities_line:
                    entity_generator.append(list(entities_line))
                    print '.',
            print 'Entity generation done successfully!!!'
            write_db(collection_name='Tech_entities', db_name='wso2_entities',
                     entity_generator=entity_generator)
            print 'Tech entities database insertion done in \'' + filename + '\' file!!!'

示例#6

0

显示文件

def test():
extractor = Extractor()

bot_ans = """A fault sequence is a collection of mediators just like any other sequence, and it can be associated
with another sequence or a proxy service. When the sequence or the proxy service encounters an error during
mediation or while forwarding a message, the message that triggered the error is delegated to the specified fault
sequence. API development is usually done by someone who understands the technical aspects of the API,
interfaces, documentation, versions etc., while API management is typically carried out by someone who
understands the business aspects of the APIs. In most business environments, API development is a responsibility
that is distinct from API publication and management. WSO2 API Manager provides a simple Web interface names WSO2
API Publisher for API development and management, which is a structured GUI designed for API creators to develop,
document, scale, and version APIs, while also facilitating more API management-related tasks such as publishing
APIs, monetization, analyzing statistics, and promoting. You may require to map backend URLs to the pattern that
you want in the API Publisher. You may have dynamic backends which need to be resolved according to the request.
In such cases you need to implement URL mapping within API Manager. The URL pattern of the APIs in the Publisher
can be http://<hostname>:8280/<context>/<version>/<API resource>. You can define variables as part of the URI
template of your API's resources. For example, in the URI template /business/businessId/address/, businessId is a
variable. You can implement custom mediation flow and resolve these variables in mediation level. """

print list(get_entities(extractor, bot_ans))

示例#7

0

显示文件

import time

from commons.data.entity import Extractor, reducer
from commons.spreadsheet.basic import SpreadsheetConnector
from commons.suzy.ask import ask_suzy
"""Used to export better question entities from the SO question and the relevant suzy answers"""
# This only executes when file is executed rather than imported
row = 74
connector = SpreadsheetConnector('./SO_compare_entities/AnswerEvaluation.json')
extractor = Extractor()

while True:

    # Question is in the 2nd column
    q = connector.import_row(row, "Answer Evaluation", columns=[2])[0]
    print q

    if q == '':
        break

    # Extracting the question entities from the question
    q_entities = list(reducer(list(extractor.extract_entities(q))))

    suzy_ans = []

    while True:
        for entity in q_entities:
            try:
                print entity
                ans = ask_suzy(entity)[0]
                suzy_ans.append(ans)

示例#8

0

显示文件

 def __init__(self):
     super(EntityExtractPipeline, self).__init__()
     self.extractor = Extractor()

示例#9

0

显示文件

from time import sleep

from commons.data.entity import Extractor, comparator, reducer
from commons.spreadsheet.basic import SpreadsheetConnector
from commons.suzy.ans import get_entities

# This only executes when file is executed rather than imported
row = 34
connector = SpreadsheetConnector('./SO_compare_entities/AnswerEvaluation.json')
extractor = Extractor()

while True:
    q_en = []

    print '\033[1m' + 'Row No :' + '\033[0m',
    print row

    while True:
        try:
            # Entities is in the 3rd column
            for en in connector.import_row(row,
                                           "Answer Evaluation",
                                           sheet_no=1,
                                           columns=[3])[0].split('\n'):
                q_en.append(en)

            # Bot ans is in the 4th column
            bot_ans = connector.import_row(row,
                                           "Answer Evaluation",
                                           sheet_no=1,
                                           columns=[4])[0]