示例#1
0
 def __init__(self):
     self.today = date.today()
     self.earliest_date = self.today - timedelta(
         days=int(config['ainews.period']))
     self.db = AINewsDB()
     self.summarizer = AINewsSummarizer()
     self.articles = []
示例#2
0
duplist_stored = []
try:
    duplist_stored = loadpickle(paths['corpus.duplist'])
except:
    pass

notduplist_stored = set()
try:
    notduplist_stored = loadpickle(paths['corpus.notduplist'])
except:
    pass
duplists += duplist_stored

corpus = AINewsCorpus()
summarizer = AINewsSummarizer()

id_begin = 315
id_end = 1500
####################################
# idset records all the news id
####################################
idset = set()     # idset records all human selected news id
checklist = set() # checklist records all human selected dup pairs
for dupset in duplists:
    for id in dupset[0]:
        idset.add(id)
    n = len(dupset[0])
    sortedlist = sorted(dupset[0])
    for i in range(n-1):
        for j in range(i+1, n):