示例#1
0
def test():
    from commons.data.entity import Extractor

    url = "http://wso2.com/library/articles/2015/10/article-wso2-developer-studio-development-and-deployment-best" \
          "-practices/ "
    data = get_data(url)
    print data
    extractor = Extractor()
    print list(extractor.extract_entities(data))
示例#2
0
def test():
    from commons.data.entity import Extractor

    url = "http://wso2.com/library/articles/2015/10/article-wso2-developer-studio-development-and-deployment-best" \
          "-practices/ "
    data = get_data(url)
    print data
    extractor = Extractor()
    print list(extractor.extract_entities(data))
示例#3
0
class EntityExtractPipeline(object):
    def __init__(self):
        super(EntityExtractPipeline, self).__init__()
        self.extractor = Extractor()

    def process_item(self, item, spider):
        """Entity extraction of title, imp1, imp2 and plain text"""
        item['title'] = list(self.extractor.extract_entities(item['title']))
        item['imp1'] = list(self.extractor.extract_entities(item['imp1']))
        item['imp2'] = list(self.extractor.extract_entities(item['imp2']))
        item['plain'] = list(self.extractor.extract_entities(item['plain']))

        return item
示例#4
0
def breaknplay_bot(question):
    """The questions are extracted into entities and they are fed to the bot"""
    extractor = Extractor()
    question_phrases = extractor.extract_entities(question)

    # Prints the answer only if the answer is different from the set of answers given by the bot
    tot_ans = []
    for q in question_phrases:
        ans = ask_suzy(q)[0]
        if ans not in tot_ans:
            tot_ans.append(ans)

    question_phrases = '\n'.join(question_phrases)
    tot_ans = '\n'.join(tot_ans)

    return question_phrases, tot_ans
def tech_entity_generation():
    """Used for the tech content entity extraction and storing them in the MongoDB"""
    mypath = './data/tech_content'

    # Getting all the file names in the mypath directory
    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

    # Creating an instance of Extractor
    extractor = Extractor()

    for filename in onlyfiles:
        # Using the newer with construct to close the file automatically.
        with open(mypath + '/' + filename) as f:
            data = f.readlines()
            entity_generator = []
            for line in data:
                entities_line = extractor.extract_entities(line)
                if entities_line:
                    entity_generator.append(list(entities_line))
                    print '.',
            print 'Entity generation done successfully!!!'
            write_db(collection_name='Tech_entities', db_name='wso2_entities',
                     entity_generator=entity_generator)
            print 'Tech entities database insertion done in \'' + filename + '\' file!!!'
示例#6
0
def test():
    extractor = Extractor()

    bot_ans = """A fault sequence is a collection of mediators just like any other sequence, and it can be associated 
    with another sequence or a proxy service. When the sequence or the proxy service encounters an error during 
    mediation or while forwarding a message, the message that triggered the error is delegated to the specified fault 
    sequence. API development is usually done by someone who understands the technical aspects of the API, 
    interfaces, documentation, versions etc., while API management is typically carried out by someone who 
    understands the business aspects of the APIs. In most business environments, API development is a responsibility 
    that is distinct from API publication and management. WSO2 API Manager provides a simple Web interface names WSO2 
    API Publisher for API development and management, which is a structured GUI designed for API creators to develop, 
    document, scale, and version APIs, while also facilitating more API management-related tasks such as publishing 
    APIs, monetization, analyzing statistics, and promoting. You may require to map backend URLs to the pattern that 
    you want in the API Publisher. You may have dynamic backends which need to be resolved according to the request. 
    In such cases you need to implement URL mapping within API Manager. The URL pattern of the APIs in the Publisher 
    can be http://<hostname>:8280/<context>/<version>/<API resource>. You can define variables as part of the URI 
    template of your API's resources. For example, in the URI template /business/businessId/address/, businessId is a 
    variable. You can implement custom mediation flow and resolve these variables in mediation level. """

    print list(get_entities(extractor, bot_ans))
示例#7
0
import time

from commons.data.entity import Extractor, reducer
from commons.spreadsheet.basic import SpreadsheetConnector
from commons.suzy.ask import ask_suzy
"""Used to export better question entities from the SO question and the relevant suzy answers"""
# This only executes when file is executed rather than imported
row = 74
connector = SpreadsheetConnector('./SO_compare_entities/AnswerEvaluation.json')
extractor = Extractor()

while True:

    # Question is in the 2nd column
    q = connector.import_row(row, "Answer Evaluation", columns=[2])[0]
    print q

    if q == '':
        break

    # Extracting the question entities from the question
    q_entities = list(reducer(list(extractor.extract_entities(q))))

    suzy_ans = []

    while True:
        for entity in q_entities:
            try:
                print entity
                ans = ask_suzy(entity)[0]
                suzy_ans.append(ans)
示例#8
0
 def __init__(self):
     super(EntityExtractPipeline, self).__init__()
     self.extractor = Extractor()
示例#9
0
from time import sleep

from commons.data.entity import Extractor, comparator, reducer
from commons.spreadsheet.basic import SpreadsheetConnector
from commons.suzy.ans import get_entities

# This only executes when file is executed rather than imported
row = 34
connector = SpreadsheetConnector('./SO_compare_entities/AnswerEvaluation.json')
extractor = Extractor()

while True:
    q_en = []

    print '\033[1m' + 'Row No :' + '\033[0m',
    print row

    while True:
        try:
            # Entities is in the 3rd column
            for en in connector.import_row(row,
                                           "Answer Evaluation",
                                           sheet_no=1,
                                           columns=[3])[0].split('\n'):
                q_en.append(en)

            # Bot ans is in the 4th column
            bot_ans = connector.import_row(row,
                                           "Answer Evaluation",
                                           sheet_no=1,
                                           columns=[4])[0]