def render_article(request): #if current aricle has content field #render as is #else call alchemy and save content article_id = request.POST['articleData'] article = Article.objects.filter(id = article_id)[0] print(article_id.encode('utf-8')) print(article.content.encode('utf-8')) if article.content: return render_to_response('article.html', {'id' : article.id, 'data' : article.content, 'titleText' : article.title}) else: testURL = article.url #Create AlchemyAPI Object alchemyapi = AlchemyAPI() response = alchemyapi.text('url', testURL) titleData = alchemyapi.title('url', testURL) authorData = alchemyapi.author('url', testURL) article.content = response['text'].encode('utf-8') article.title = titleData['title'].encode('utf-8') article.save() return render_to_response('article.html', {'id' : article.id, 'data' : response['text'].encode('utf-8'), 'titleText' : titleData['title'].encode('utf-8')} )
class Alchemy(object): 'Chama API para leitura de feeds Atom RSS' def __init__(self): #Chamador do AlchemyAPI self.alchemy_api = AlchemyAPI() def processa_html(self, link): #Retorna o texto limpo a partir de uma URL return self.alchemy_api.text('url', link)['text'] def obtem_titulo(self, link): #Retorna o texto limpo a partir de uma URL return self.alchemy_api.title('url', link)['title'] def obtem_entidades(self, texto): #Retorna as entidaades encontradas no texto return self.alchemy_api.entities('text', texto, {'sentiment': 1})
print('') print('') print('') print('############################################') print('# Title Extraction Example #') print('############################################') print('') print('') print('Processing url: ', demo_url) print('') response = alchemyapi.title('url',demo_url) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Title ##') print('title: ', response['title'].encode('utf-8')) print('') else: print('Error in title extraction call: ', response['statusInfo'])
class Extraction: def __init__(self, url): self.alchemyAPI = AlchemyAPI() self.alchemyAPI.outputMode = 'json' self.url = url # must call extraction after initialization """ Goes through all URL processing routines for the constructor-specified URL """ def processText(self): text = self.__extractText(self.url) self.sentences = self.__sbdText(text) self.author = self.__extractAuthor(self.url) self.title = self.__extractTitle(self.url) """ Calls AlchemyAPI to extract the text from the given article """ def __extractText(self, url): if url is None or url == "": raise InputException("Invalid URL") response = self.alchemyAPI.text('url', url) if response['status'] != 'OK': warn(response['statusInfo']) return response['text'].encode('utf-8') """ Calls AlchemyAPI to extract the author of the article. """ def __extractAuthor(self, url): if url is None or url == "": raise InputException("Invalid URL") response = self.alchemyAPI.author('url', url) if response['status'] != 'OK': warn(response['statusInfo']) return response['author'].encode('utf-8') """ Gets the article title with """ def __extractTitle(self, url): if url is None or url == "": raise InputException("Invalid URL") response = self.alchemyAPI.title('url', url) if response['status'] != 'OK': warn(response['statusInfo']) return response['title'].encode('utf-8') """ Applies a sentence boundary disambiguation algorithm to the extracted article text. We then have access to the individual sentences of the article. From there any quotes are removed, so sentiment analysis is performed on the writer's additions only. """ def __sbdText(self, extractedText): import re sentenceEnders = re.compile(r""" # Split sentences on whitespace between them. (?: # Group for two positive lookbehinds. (?<=[.!?]) # Either an end of sentence punct, | (?<=[.!?]['"]) # or end of sentence punct and quote. ) # End group of two positive lookbehinds. (?<! Mr\. ) # Don't end sentence on "Mr." (?<! Mrs\. ) # Don't end sentence on "Mrs." (?<! Jr\. ) # Don't end sentence on "Jr." (?<! Dr\. ) # Don't end sentence on "Dr." (?<! Prof\. ) # Don't end sentence on "Prof." (?<! Sr\. ) # Don't end sentence on "Sr." \s+ # Split on whitespace between sentences. """, re.IGNORECASE | re.VERBOSE) sentenceList = sentenceEnders.split(extractedText) """ remove any quotes by recognizing ascii/unicode double sentences. any quotes within sentences are left, because this paraphrasing/choice is still somewhat indicative of possible bias """ for sentence in list(sentenceList): if sentence[:3] == "“" or sentence[:1] == '"': # “ = unicode representation of slanted double quote sentenceList.remove(sentence) return sentenceList
response = alchemyapi.language('text', test_text); assert(response['status'] == 'OK') response = alchemyapi.language('html', test_html); assert(response['status'] == 'OK') response = alchemyapi.language('url', test_url); assert(response['status'] == 'OK') response = alchemyapi.language('random', test_url); assert(response['status'] == 'ERROR') #invalid flavor print('Language tests complete!') print('') #Title print('Checking title . . . ') response = alchemyapi.title('text', test_text); assert(response['status'] == 'ERROR') #only works for html and url content response = alchemyapi.title('html', test_html); assert(response['status'] == 'OK') response = alchemyapi.title('url', test_url); assert(response['status'] == 'OK') print('Title tests complete!') print('') #Relations print('Checking relations . . . ') response = alchemyapi.relations('text', test_text); assert(response['status'] == 'OK') response = alchemyapi.relations('html', test_html);
wait = raw_input('press enter to continue') print('') print('') print('') print('############################################') print('# Title Extraction Example #') print('############################################') print('') print('') print('Processing url: ', demo_url) print('') response = alchemyapi.title('url', demo_url) if response['status'] == 'OK': print('## Response Object ##') print(json.dumps(response, indent=4)) print('') print('## Title ##') print('title: ', response['title'].encode('utf-8')) print('') else: print('Error in title extraction call: ', response['statusInfo']) wait = raw_input('press enter to continue') print('')
#Language print('Checking language . . . ') response = alchemyapi.language('text', test_text) assert (response['status'] == 'OK') response = alchemyapi.language('html', test_html) assert (response['status'] == 'OK') response = alchemyapi.language('url', test_url) assert (response['status'] == 'OK') response = alchemyapi.language('random', test_url) assert (response['status'] == 'ERROR') #invalid flavor print('Language tests complete!') print('') #Title print('Checking title . . . ') response = alchemyapi.title('text', test_text) assert (response['status'] == 'ERROR') #only works for html and url content response = alchemyapi.title('html', test_html) assert (response['status'] == 'OK') response = alchemyapi.title('url', test_url) assert (response['status'] == 'OK') print('Title tests complete!') print('') #Relations print('Checking relations . . . ') response = alchemyapi.relations('text', test_text) assert (response['status'] == 'OK') response = alchemyapi.relations('html', test_html) assert (response['status'] == 'OK') response = alchemyapi.relations('url', test_url)
print("Error in language detection call: ", response["statusInfo"]) print("") print("") print("") print("############################################") print("# Title Extraction Example #") print("############################################") print("") print("") print("Processing url: ", demo_url) print("") response = alchemyapi.title("url", demo_url) if response["status"] == "OK": print("## Response Object ##") print(json.dumps(response, indent=4)) print("") print("## Title ##") print("title: ", response["title"].encode("utf-8")) print("") else: print("Error in title extraction call: ", response["statusInfo"]) print("") print("")