def test(): from commons.data.entity import Extractor url = "http://wso2.com/library/articles/2015/10/article-wso2-developer-studio-development-and-deployment-best" \ "-practices/ " data = get_data(url) print data extractor = Extractor() print list(extractor.extract_entities(data))
class EntityExtractPipeline(object): def __init__(self): super(EntityExtractPipeline, self).__init__() self.extractor = Extractor() def process_item(self, item, spider): """Entity extraction of title, imp1, imp2 and plain text""" item['title'] = list(self.extractor.extract_entities(item['title'])) item['imp1'] = list(self.extractor.extract_entities(item['imp1'])) item['imp2'] = list(self.extractor.extract_entities(item['imp2'])) item['plain'] = list(self.extractor.extract_entities(item['plain'])) return item
def breaknplay_bot(question): """The questions are extracted into entities and they are fed to the bot""" extractor = Extractor() question_phrases = extractor.extract_entities(question) # Prints the answer only if the answer is different from the set of answers given by the bot tot_ans = [] for q in question_phrases: ans = ask_suzy(q)[0] if ans not in tot_ans: tot_ans.append(ans) question_phrases = '\n'.join(question_phrases) tot_ans = '\n'.join(tot_ans) return question_phrases, tot_ans
def tech_entity_generation(): """Used for the tech content entity extraction and storing them in the MongoDB""" mypath = './data/tech_content' # Getting all the file names in the mypath directory onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] # Creating an instance of Extractor extractor = Extractor() for filename in onlyfiles: # Using the newer with construct to close the file automatically. with open(mypath + '/' + filename) as f: data = f.readlines() entity_generator = [] for line in data: entities_line = extractor.extract_entities(line) if entities_line: entity_generator.append(list(entities_line)) print '.', print 'Entity generation done successfully!!!' write_db(collection_name='Tech_entities', db_name='wso2_entities', entity_generator=entity_generator) print 'Tech entities database insertion done in \'' + filename + '\' file!!!'
def test(): extractor = Extractor() bot_ans = """A fault sequence is a collection of mediators just like any other sequence, and it can be associated with another sequence or a proxy service. When the sequence or the proxy service encounters an error during mediation or while forwarding a message, the message that triggered the error is delegated to the specified fault sequence. API development is usually done by someone who understands the technical aspects of the API, interfaces, documentation, versions etc., while API management is typically carried out by someone who understands the business aspects of the APIs. In most business environments, API development is a responsibility that is distinct from API publication and management. WSO2 API Manager provides a simple Web interface names WSO2 API Publisher for API development and management, which is a structured GUI designed for API creators to develop, document, scale, and version APIs, while also facilitating more API management-related tasks such as publishing APIs, monetization, analyzing statistics, and promoting. You may require to map backend URLs to the pattern that you want in the API Publisher. You may have dynamic backends which need to be resolved according to the request. In such cases you need to implement URL mapping within API Manager. The URL pattern of the APIs in the Publisher can be http://<hostname>:8280/<context>/<version>/<API resource>. You can define variables as part of the URI template of your API's resources. For example, in the URI template /business/businessId/address/, businessId is a variable. You can implement custom mediation flow and resolve these variables in mediation level. """ print list(get_entities(extractor, bot_ans))
import time from commons.data.entity import Extractor, reducer from commons.spreadsheet.basic import SpreadsheetConnector from commons.suzy.ask import ask_suzy """Used to export better question entities from the SO question and the relevant suzy answers""" # This only executes when file is executed rather than imported row = 74 connector = SpreadsheetConnector('./SO_compare_entities/AnswerEvaluation.json') extractor = Extractor() while True: # Question is in the 2nd column q = connector.import_row(row, "Answer Evaluation", columns=[2])[0] print q if q == '': break # Extracting the question entities from the question q_entities = list(reducer(list(extractor.extract_entities(q)))) suzy_ans = [] while True: for entity in q_entities: try: print entity ans = ask_suzy(entity)[0] suzy_ans.append(ans)
def __init__(self): super(EntityExtractPipeline, self).__init__() self.extractor = Extractor()
from time import sleep from commons.data.entity import Extractor, comparator, reducer from commons.spreadsheet.basic import SpreadsheetConnector from commons.suzy.ans import get_entities # This only executes when file is executed rather than imported row = 34 connector = SpreadsheetConnector('./SO_compare_entities/AnswerEvaluation.json') extractor = Extractor() while True: q_en = [] print '\033[1m' + 'Row No :' + '\033[0m', print row while True: try: # Entities is in the 3rd column for en in connector.import_row(row, "Answer Evaluation", sheet_no=1, columns=[3])[0].split('\n'): q_en.append(en) # Bot ans is in the 4th column bot_ans = connector.import_row(row, "Answer Evaluation", sheet_no=1, columns=[4])[0]