def __init__(self): self.today = date.today() self.earliest_date = self.today - timedelta( days=int(config['ainews.period'])) self.db = AINewsDB() self.summarizer = AINewsSummarizer() self.articles = []
duplist_stored = [] try: duplist_stored = loadpickle(paths['corpus.duplist']) except: pass notduplist_stored = set() try: notduplist_stored = loadpickle(paths['corpus.notduplist']) except: pass duplists += duplist_stored corpus = AINewsCorpus() summarizer = AINewsSummarizer() id_begin = 315 id_end = 1500 #################################### # idset records all the news id #################################### idset = set() # idset records all human selected news id checklist = set() # checklist records all human selected dup pairs for dupset in duplists: for id in dupset[0]: idset.add(id) n = len(dupset[0]) sortedlist = sorted(dupset[0]) for i in range(n-1): for j in range(i+1, n):