Пример #1
0
def render_article(request):	
	#if current aricle has content field
	#render as is
	#else call alchemy and save content

	article_id = request.POST['articleData']
	article = Article.objects.filter(id = article_id)[0]

	print(article_id.encode('utf-8'))
	print(article.content.encode('utf-8'))
	if article.content:
		return render_to_response('article.html', {'id' : article.id, 'data' : article.content, 'titleText' : article.title})
	else:				
		testURL = article.url
		#Create AlchemyAPI Object
		alchemyapi = AlchemyAPI()
		response = alchemyapi.text('url', testURL)
		titleData = alchemyapi.title('url', testURL)
		authorData = alchemyapi.author('url', testURL)
		article.content = response['text'].encode('utf-8')
		article.title = titleData['title'].encode('utf-8')
		article.save()

		return render_to_response('article.html', {'id' : article.id, 'data' : response['text'].encode('utf-8'), 'titleText' : titleData['title'].encode('utf-8')}
 )
Пример #2
0


print('')
print('')
print('')
print('############################################')
print('#   Author Extraction Example              #')
print('############################################')
print('')
print('')

print('Processing url: ', demo_url)
print('')

response = alchemyapi.author('url',demo_url)

if response['status'] == 'OK':
	print('## Response Object ##')
	print(json.dumps(response, indent=4))

	print('')
	print('## Author ##')
	print('author: ', response['author'].encode('utf-8'))
	print('')
else:
	print('Error in author extraction call: ', response['statusInfo'])



print('')
Пример #3
0
class Extraction:
    def __init__(self, url):
        self.alchemyAPI = AlchemyAPI()
        self.alchemyAPI.outputMode = 'json'
        self.url = url
        # must call extraction after initialization

    """
    Goes through all URL processing routines for the constructor-specified URL
    """
    def processText(self):
        text = self.__extractText(self.url)
        self.sentences = self.__sbdText(text)
        self.author    = self.__extractAuthor(self.url)
        self.title     = self.__extractTitle(self.url)


    """
    Calls AlchemyAPI to extract the text from the given article
    """
    def __extractText(self, url):
        if url is None or url == "":
            raise InputException("Invalid URL")

        response = self.alchemyAPI.text('url', url)
        if response['status'] != 'OK':
            warn(response['statusInfo'])

        return response['text'].encode('utf-8')

    """
    Calls AlchemyAPI to extract the author of the article.
    """
    def __extractAuthor(self, url):
        if url is None or url == "":
            raise InputException("Invalid URL")

        response = self.alchemyAPI.author('url', url)
        if response['status'] != 'OK':
            warn(response['statusInfo'])

        return response['author'].encode('utf-8')

    """
    Gets the article title with
    """
    def __extractTitle(self, url):
        if url is None or url == "":
            raise InputException("Invalid URL")

        response = self.alchemyAPI.title('url', url)
        if response['status'] != 'OK':
            warn(response['statusInfo'])
        return response['title'].encode('utf-8')


    """
    Applies a sentence boundary disambiguation algorithm to the extracted
    article text. We then have access to the individual sentences of the article.
    From there any quotes are removed, so sentiment analysis is performed on the writer's
    additions only.
    """
    def __sbdText(self, extractedText):
        import re
        sentenceEnders = re.compile(r"""
            # Split sentences on whitespace between them.
            (?:               # Group for two positive lookbehinds.
              (?<=[.!?])      # Either an end of sentence punct,
            | (?<=[.!?]['"])  # or end of sentence punct and quote.
            )                 # End group of two positive lookbehinds.
            (?<!  Mr\.   )    # Don't end sentence on "Mr."
            (?<!  Mrs\.  )    # Don't end sentence on "Mrs."
            (?<!  Jr\.   )    # Don't end sentence on "Jr."
            (?<!  Dr\.   )    # Don't end sentence on "Dr."
            (?<!  Prof\. )    # Don't end sentence on "Prof."
            (?<!  Sr\.   )    # Don't end sentence on "Sr."
            \s+               # Split on whitespace between sentences.
            """,
        re.IGNORECASE | re.VERBOSE)
        sentenceList = sentenceEnders.split(extractedText)

        """
        remove any quotes by recognizing ascii/unicode double sentences.
        any quotes within sentences are left, because this paraphrasing/choice
        is still somewhat indicative of possible bias
        """
        for sentence in list(sentenceList):
            if sentence[:3] == "“" or sentence[:1] == '"': # “ = unicode representation of slanted double quote
                sentenceList.remove(sentence)

        return sentenceList
Пример #4
0
#Text Raw
print('Checking raw text . . . ')
response = alchemyapi.text_raw('text', test_text);
assert(response['status'] == 'ERROR')	#only works for html and url content
response = alchemyapi.text_raw('html', test_html);
assert(response['status'] == 'OK')
response = alchemyapi.text_raw('url', test_url);
assert(response['status'] == 'OK')
print('Raw text tests complete!')
print('')



#Author
print('Checking author . . . ')
response = alchemyapi.author('text', test_text);
assert(response['status'] == 'ERROR')	#only works for html and url content
response = alchemyapi.author('html', test_html);
assert(response['status'] == 'ERROR')	#there's no author in the test HTML
response = alchemyapi.author('url', test_url);
assert(response['status'] == 'OK')
print('Author tests complete!')
print('')



#Language
print('Checking language . . . ')
response = alchemyapi.language('text', test_text);
assert(response['status'] == 'OK')
response = alchemyapi.language('html', test_html);
Пример #5
0
else:
    print('Error in language detection call: ', response['statusInfo'])

print('')
print('')
print('')
print('############################################')
print('#   Author Extraction Example              #')
print('############################################')
print('')
print('')

print('Processing url: ', demo_url)
print('')

response = alchemyapi.author('url', demo_url)

if response['status'] == 'OK':
    print('## Response Object ##')
    print(json.dumps(response, indent=4))

    print('')
    print('## Author ##')
    print('author: ', response['author'])
    print('')
else:
    print('Error in author extraction call: ', response['statusInfo'])

print('')
print('')
print('')
Пример #6
0
print('')

#Text Raw
print('Checking raw text . . . ')
response = alchemyapi.text_raw('text', test_text)
assert (response['status'] == 'ERROR')  #only works for html and url content
response = alchemyapi.text_raw('html', test_html)
assert (response['status'] == 'OK')
response = alchemyapi.text_raw('url', test_url)
assert (response['status'] == 'OK')
print('Raw text tests complete!')
print('')

#Author
print('Checking author . . . ')
response = alchemyapi.author('text', test_text)
assert (response['status'] == 'ERROR')  #only works for html and url content
response = alchemyapi.author('html', test_html)
assert (response['status'] == 'ERROR')  #there's no author in the test HTML
response = alchemyapi.author('url', test_url)
assert (response['status'] == 'OK')
print('Author tests complete!')
print('')

#Language
print('Checking language . . . ')
response = alchemyapi.language('text', test_text)
assert (response['status'] == 'OK')
response = alchemyapi.language('html', test_html)
assert (response['status'] == 'OK')
response = alchemyapi.language('url', test_url)
Пример #7
0
    print("Error in text extraction call: ", response["statusInfo"])


print("")
print("")
print("")
print("############################################")
print("#   Author Extraction Example              #")
print("############################################")
print("")
print("")

print("Processing url: ", demo_url)
print("")

response = alchemyapi.author("url", demo_url)

if response["status"] == "OK":
    print("## Response Object ##")
    print(json.dumps(response, indent=4))

    print("")
    print("## Author ##")
    print("author: ", response["author"].encode("utf-8"))
    print("")
else:
    print("Error in author extraction call: ", response["statusInfo"])


print("")
print("")