Exemplo n.º 1
0
 def parse_add(self, url, response_body):
     a=newspaper.Article("")
     a.is_downloaded = True
     a.html = response_body
     a.parse()
     a.nlp()
     img_link = a.top_img
     named, persons, orgs= self.get_named_entities(a.text)
     print "alma base search started"
     persons = [person for person in persons if alma.search(person) > 0]
     orgs = [org for org in orgs if alma.search(org) > 0]
     print "alma base search ended"
     author="default"
     try:
         author=a.author[0]
     except:
         print "Not found"
     art=self.add_article(a.title,a.summary,url,author,img_link, named, persons, orgs)
     # test_keywords(art,newsClassifier)
     return art
Exemplo n.º 2
0
def train(url,keywords):
	# for url in urls:
    a=newspaper.Article(url)
    a.download()
    a.parse()
    a.nlp()
    img_link = a.top_img
    named, persons, orgs= get_named_entities(a.text)
    print "alma base search started"
    persons = [person for person in persons if alma.search(person) > 0]
    orgs = [org for org in orgs if alma.search(org) > 0]
    print "alma base search stoped"
    author="default"
    try:
        author=a.author[0]
    except:
        print "Not found"
    art=add_article(a.title,a.summary,url,author,img_link, named, persons, orgs)
        # test_keywords(art,newsClassifier)
    return keywords
Exemplo n.º 3
0
def train(url,keywords):
    # for url in urls:
    a=newspaper.Article(url)
    a.download()
    a.parse()
    a.nlp()
    img_link = a.top_img
    named, persons, orgs= get_named_entities(a.text)
    print "alma base search started"
    persons = [person for person in persons if alma.search(person) > 0]
    orgs = [org for org in orgs if alma.search(org) > 0]
    print "alma base search ended"
    author="default"
    try:
        author=a.author[0]
    except:
        print "Not found"
    art=add_article(a.title,a.summary,url,author,img_link, named, persons, orgs)
        # test_keywords(art,newsClassifier)
    return keywords