def insertCrawlerRecord(mysql,kwargs): crawlerRecord=CrawlerRecord() mysqlSession = mysql.session crawlerRecord.title=str.strip(kwargs['title'].encode('utf8')) crawlerRecord.abstract=str.strip(kwargs['abstract'].encode('utf8')) crawlerRecord.crawler_url=str.strip(kwargs['crawler_url'].encode('utf8')) crawlerRecord.gmt_crawler=kwargs['gmt_crawler'] crawlerRecord.gmt_created=kwargs['gmt_created'] crawlerRecord.url_key=BasicTool.md5(str.strip(kwargs['crawler_url'])) crawlerRecord.title_key=BasicTool.md5(str.strip(kwargs['title'].encode('utf8'))) crawlerRecord.sch_id=kwargs['sch_id'] crawlerRecord.alumni_id=kwargs['alumni_id'] try: mysqlSession.add(crawlerRecord) mysqlSession.commit() spider.crawlerTask.crawlerNum.add() except sqlalchemy.exc.IntegrityError,e: mysqlSession.rollback() CrawlerTool.logger.error(e)
abstract += sibling else: abstract += sibling.get_text()#re.search(r'<.*>(.*)</.*>',sibling.get_text()) #print type(sibling) #print sibling.name abstract=abstract[:-5] print gmt_created print title print crawler_url print abstract gmt_crawler = datetime.datetime.now()#BasicTool.getCurrentTime() crawlerRecord=CrawlerRecord() crawlerRecord.title=title crawlerRecord.abstract=abstract crawlerRecord.crawler_url=crawler_url crawlerRecord.gmt_crawler=gmt_crawler crawlerRecord.gmt_created=gmt_created crawlerRecord.url_key=BasicTool.md5(crawler_url) crawlerRecord.sch_id=1 crawlerRecord.alumni_id=1 mysql=MySQL() mysql.session.add(crawlerRecord) mysql.session.commit() mysql.session.close() # tt=(nt.strftime('%Y年%m月%d日 %H时%M分%S秒')) # print tt # timeArray = time.strptime(tt, "%Y年%m月%d日 %H时%M分%S秒") # print timeArray