sql = 'insert into vendor (vendor_code, cage_code, cage_name, address, dummy) values ' + ','.join( vendorList) self.dbProxy.execute(sql) self.dbProxy.commit() class Vendor(object): def __init__(self, vendorCode, vendorName): self.vendorCode = vendorCode self.vendorName = vendorName self.address = '' self.isDummy = 0 if __name__ == '__main__': import platform if 'window' in platform.system().lower(): Logging.initLogger(os.path.join('conf', 'crawler.logging.win.cfg')) else: Logging.initLogger(os.path.join('conf', 'crawler.logging.cfg')) conf = { CrawlerConstants.CONFIG_FILE_DBHOST: 'localhost', CrawlerConstants.CONFIG_FILE_DBPORT: 3306, CrawlerConstants.CONFIG_FILE_DBUSER: '******', CrawlerConstants.CONFIG_FILE_DBPASS: '******', CrawlerConstants.CONFIG_FILE_DBNAME: 'airbus' } parser = AirBusVendorParser(conf, Logging.getLogger(LOGGER_NAME)) fileName = 'F:\\tmp\\vendor.txt' parser.parse(fileName)
cageName = tds[2].text.strip() self.logger.debug('CageNum:%s, CageName:%s', cageNum, cageName) self.totalNum += 1 nextPageDisabled = soup.findAll('li', {'class': 'next disabled'}) if len(nextPageDisabled) > 0: return CrawlerConstants.VAL_STATUS_FINISH else: nextPage = soup.findAll('li', {'class': 'next'}) if len(nextPage) > 0: return CrawlerConstants.VAL_STATUS_MORE else: return CrawlerConstants.VAL_STATUS_FINISH if __name__ == '__main__': ''' if PIDUtils.isPidFileExist(LOGGER_NAME_CRAWL): print 'Previous process is on-going, please stop it firstly' sys.exit(1) ''' pid = os.getpid() PIDUtils.writePid(LOGGER_NAME_CRAWL, pid) Logging.initLogger('conf/crawler.logging.cfg') #Logging.initLogger('F:\\program\\crm\\crawler\\src\\python\\conf\\crawler.logging.cfg') ins = NSNCageCatalogCrawlerManager('conf/' + LOGGER_NAME_CRAWL + '.cfg') #ins = NSNCageCrawlerManager('F:\\program\\crm\\crawler\\src\\python\\conf\\nsn.cfg') ins.start() pidutils = PIDUtils(LOGGER_NAME_CRAWL, ins.shutDown, 5, ins.logger) pidutils.start() sys.exit(0)
if len(ref) == 13: previousFullWithoutHyphen = ref else: ref = previousFullWithoutHyphen[:13 - len(ref)] + ref ref = ref[:4] + '-' + ref[4:6] + '-' + ref[6:9] + '-' + ref[9:] if ref == nsnNum: continue referenceList.append(ref) return referenceList if __name__ == '__main__': ''' if PIDUtils.isPidFileExist(LOGGER_NAME_CRAWL): print 'Previous process is on-going, please stop it firstly' sys.exit(1) ''' pid = os.getpid() PIDUtils.writePid(LOGGER_NAME_CRAWL, pid) #Logging.initLogger('conf/crawler.logging.cfg') Logging.initLogger( 'F:\\program\\crm\\crawler\\src\\python\\conf\\crawler.logging.win.cfg' ) #ins = NSNCageFileCrawlerManager('conf/'+LOGGER_NAME_CRAWL+'.cfg') ins = NSNCageFileCrawlerManager( 'F:\\program\\crm\\crawler\\src\\python\\conf\\nfc.cfg') ins.start() pidutils = PIDUtils(LOGGER_NAME_CRAWL, ins.shutDown, 5, ins.logger) pidutils.start() sys.exit(0)