def __init__(self, confFile): ''' Constructor ''' super(StandardPartAgg, self).__init__(confFile) self.logger = Logging.getLogger(LOGGER_NAME_CLENER) self.prefixIndex = 0
def __init__(self, json_config_file): ''' Constructor ''' self.__cagePrefixList = [] super(NSNCageCatalogCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, json_config_file): ''' Constructor ''' self.__startPageNum = None self.__endPageNum = None super(SatAirCrawlerManager, self).__init__(json_config_file, 0.001, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, controller, dbProxy, request): super(EnterpriseListCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__continentCode = request[ EnterpriseListCrawler.PARA_CONTINENT_CODE] self.__countryCode = request[EnterpriseListCrawler.PARA_COUNTRY_CODE] self.__orgId = request[EnterpriseListCrawler.PARA_ORG_ID]
def __init__(self, json_config_file): ''' Constructor ''' self.__imgOnly = False self.__imgSavePath = '' super(AviAllCrawlerManager, self).__init__(json_config_file, 0.001, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, controller, dbProxy, request): super(PartCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__enterpriseId = request[PartCrawler.PARA_ENTERPRISE_ID] self.__licenceId = request[PartCrawler.PARA_LICENCE_ID] self.__startNo = None self.__enterpriseOnly = request[PartCrawler.PARA_ENTERPRISE_ONLY] if PartCrawler.PARA_SPECIFIC_STARTNO in request: self.__startNo = request[PartCrawler.PARA_SPECIFIC_STARTNO]
def __init__(self, json_config_file): ''' Constructor ''' self.__cageNumIndex = [-1, 0, 0, 0, 0] self.__noMore = False self.__cageNumList = [] super(NSNCageCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, controller, dbProxy, request): super(AviAllCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__url = request[AviAllCrawler.PARA_URL] self.__pageType = request[AviAllCrawler.PARA_PAGE_TYPE] if AviAllCrawler.PARA_IMG_ONLY not in request: self.__imgOnly = False else: self.__imgOnly = request[AviAllCrawler.PARA_IMG_ONLY] self.__imgSavePath = request[AviAllCrawler.PARA_IMG_SAVE_PATH]
def __init__(self, json_config_file): ''' Constructor ''' self.__cageNumIndex = [0, 0, 0, 0, 0] self.__noMore = False self.__startCageNum = None self.__endCageNum = None self.__parentPath = None super(NSNCageFileCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, json_config_file): ''' Crawl for enterprise: CREATE TABLE `enterprise` ( `ENTERPRISE_ID` varchar(63) NOT NULL DEFAULT '', `ENTERPRISE_NAME` varchar(255) DEFAULT NULL, `licence_id` varchar(63) DEFAULT NULL, `COUNTRY_CODE` varchar(10) DEFAULT NULL, `ORGID` varchar(63) DEFAULT NULL, `certificate_no` varchar(63) DEFAULT NULL, `EXPIRED_DATE` date DEFAULT NULL, `address` varchar(1023) DEFAULT NULL, `scan_copy_link` varchar(1023) DEFAULT NULL, PRIMARY KEY (`ENTERPRISE_ID`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8 ''' super(EnterpriseListCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, json_config_file): ''' CREATE TABLE `part` ( `enterprise_id` varchar(63) DEFAULT NULL, `licence_id` varchar(63) DEFAULT NULL, `aircraft_part_id` varchar(63) DEFAULT NULL, `ata_chapter_section` varchar(63) DEFAULT NULL, `category_no` varchar(63) DEFAULT NULL, `parts_number` varchar(63) DEFAULT NULL, `parts_name` varchar(255) DEFAULT NULL, `manufacturers` varchar(63) DEFAULT NULL, `inspection` char(1) DEFAULT '0', `repair` char(1) DEFAULT '0', `modification` char(1) DEFAULT '0', `overhaul` char(1) DEFAULT '0', `file_to_accord` varchar(255) DEFAULT NULL, `main_devices` varchar(255) DEFAULT NULL, `remark` text ) ENGINE=MyISAM DEFAULT CHARSET=utf8 Constructor ''' self.__enterpriseOnly = False super(PartCrawlerManager, self).__init__(json_config_file, 0.1, None) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
def __init__(self, confFile): ''' Constructor ''' super(NSNCleaner, self).__init__(confFile) self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
def __init__(self, confFile): ''' Constructor ''' super(AirBusPart, self).__init__(confFile) self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
if __name__ == '__main__': import platform if 'window' in platform.system().lower(): Logging.initLogger(os.path.join('conf','crawler.logging.win.cfg')) else: Logging.initLogger(os.path.join('conf','crawler.logging.cfg')) conf = { CrawlerConstants.CONFIG_FILE_DBHOST: 'localhost', CrawlerConstants.CONFIG_FILE_DBPORT: 3306, CrawlerConstants.CONFIG_FILE_DBUSER: '******', CrawlerConstants.CONFIG_FILE_DBPASS: '******', CrawlerConstants.CONFIG_FILE_DBNAME: 'airbus' } if DEBUG is False: folder = 'F:\\tmp\\part' for root, dirs, files in os.walk(folder, topdown=False): for filename in files: if filename.endswith('.txt'): fullpath = os.path.join(root,filename) parser = AirBusPartParser(conf, Logging.getLogger(LOGGER_NAME)) parser.parse(fullpath) parser.clean() else: fileName = 'F:\\tmp\\part\\0010\\27-54-49.txt' #fileName = 'F:\\tmp\\part\\test.txt' parser = AirBusPartParser(conf, Logging.getLogger(LOGGER_NAME)) parser.parse(fileName) parser.clean()
def __init__(self, controller, dbProxy, request): super(SatAirCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__url = request[SatAirCrawler.PARA_URL] self.__pageType = request[SatAirCrawler.PARA_PAGE_TYPE]
sql = 'insert into vendor (vendor_code, cage_code, cage_name, address, dummy) values ' + ','.join( vendorList) self.dbProxy.execute(sql) self.dbProxy.commit() class Vendor(object): def __init__(self, vendorCode, vendorName): self.vendorCode = vendorCode self.vendorName = vendorName self.address = '' self.isDummy = 0 if __name__ == '__main__': import platform if 'window' in platform.system().lower(): Logging.initLogger(os.path.join('conf', 'crawler.logging.win.cfg')) else: Logging.initLogger(os.path.join('conf', 'crawler.logging.cfg')) conf = { CrawlerConstants.CONFIG_FILE_DBHOST: 'localhost', CrawlerConstants.CONFIG_FILE_DBPORT: 3306, CrawlerConstants.CONFIG_FILE_DBUSER: '******', CrawlerConstants.CONFIG_FILE_DBPASS: '******', CrawlerConstants.CONFIG_FILE_DBNAME: 'airbus' } parser = AirBusVendorParser(conf, Logging.getLogger(LOGGER_NAME)) fileName = 'F:\\tmp\\vendor.txt' parser.parse(fileName)
def __init__(self, confFile): ''' Constructor ''' super(DuplicateAgg, self).__init__(confFile) self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
def __init__(self, controller, dbProxy, request): super(NSNCageCatalogCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__cagePrefix = request[NSNCageCatalogCrawler.PARA_CAGE_PREFIX]
def __init__(self, controller, dbProxy, request): super(NSNCageFileCrawler, self).__init__(controller, dbProxy, request) self.logger = Logging.getLogger(LOGGER_NAME_CRAWL) self.__cageNumFilePath = request[ NSNCageFileCrawler.PARA_CAGE_FILE_PATH] self.__cageNum = request[NSNCageFileCrawler.PARA_CAGE_NUM]