class Collector: ''' 搜集相关的信息 然后返回 ''' def __init__(self): self.htmldb = HtmlDB() self.htmlnum = None def run(self): ''' 主程序 ''' self.clearRecords() self.htmlnum = self.htmldb.getHtmlNum() for i in range(self.htmlnum): htmlinfo = self.htmldb.setRecordHandle(i) dectitle = htmlinfo.title title = self.htmldb.getTitle() _content = self.htmldb.getContent() pagedec = self.transPageDec(_content) url = htmlinfo.url date = htmlinfo.date record = Record( title = title, dectitle = dectitle, url = url, decsource = pagedec, date = date ) record.save() def transPageDec(self, source): length = config.getint('indexer', 'page_dec_length') return source[:length] def clearRecords(self): ''' 每次记录 清空所有的旧的记录 ''' Record.objects.all().delete()