Exemplo n.º 1
0
 def make_dataset(self):
     for id in relevant:
         article = getArticleByID(id)
         if article:
             self.data.append(article['HD'] + '\n' + article['LP'] + '\n' +
                              article['TD'])
             self.target.append(1)
     for id in irrelevant:
         article = getArticleByID(id)
         if article:
             self.data.append(article['HD'] + '\n' + article['LP'] + '\n' +
                              article['TD'])
             self.target.append(0)
Exemplo n.º 2
0
from pipeline import getArticleByID
from Dataset import relevant, irrelevant, dataset, stopwords
#Test for get Article from database
#print(getArticleByID('CANBTZ0020141022eaan00011'))
relevant_articles = list()
irrelevant_articles = list()
for id in relevant:
    article = getArticleByID(id)
    print(article)
    if article:
        relevant_articles.append(article)
        #print(article['ID'],article['HD'])
print('{} relevant articles in given set, found {} of them in database'.format(
    len(relevant), len(relevant_articles)))
for id in irrelevant:
    article = getArticleByID(id)
    if article:
        irrelevant_articles.append(article)
    #	print(article['ID'],article['HD'])
print(
    '{} irrelevant articles in given set, found {} of them in database'.format(
        len(irrelevant), len(irrelevant_articles)))

import warnings

with warnings.catch_warnings():
    warnings.filterwarnings("ignore")
    import sklearn as sk
    import numpy as np

news = dataset
Exemplo n.º 3
0
# encoding=utf-8
from pipeline import checkItemExist, loadSettings, updateProgress, getTaskID, getDatabase
from pipeline import getArticleByID
from log import logger
res = checkItemExist('WC50042020180914ee9c0000g')
res = checkItemExist('xyz')
print(res)
print(loadSettings())
updateProgress(0.1)
print('Task:', getTaskID())
print(getArticleByID('WSJO000020140701ea71002gx'))
#process_item('u6274652','Test','YangLu','content','1993-02-14','1993-02-14','http://','Website')