示例#1
0
 def __init__(self, confFile):
     '''
     Constructor
     '''
     super(StandardPartAgg, self).__init__(confFile)
     self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
     self.prefixIndex = 0
示例#2
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__cagePrefixList = []
     super(NSNCageCatalogCrawlerManager,
           self).__init__(json_config_file, 0.1, None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
示例#3
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__startPageNum = None
     self.__endPageNum = None
     super(SatAirCrawlerManager, self).__init__(json_config_file, 0.001, None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
示例#4
0
 def __init__(self, controller, dbProxy, request):
     super(EnterpriseListCrawler, self).__init__(controller, dbProxy,
                                                 request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__continentCode = request[
         EnterpriseListCrawler.PARA_CONTINENT_CODE]
     self.__countryCode = request[EnterpriseListCrawler.PARA_COUNTRY_CODE]
     self.__orgId = request[EnterpriseListCrawler.PARA_ORG_ID]
示例#5
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__imgOnly = False
     self.__imgSavePath = ''
     super(AviAllCrawlerManager, self).__init__(json_config_file, 0.001,
                                                None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
示例#6
0
 def __init__(self, controller, dbProxy, request):
     super(PartCrawler, self).__init__(controller, dbProxy, request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__enterpriseId = request[PartCrawler.PARA_ENTERPRISE_ID]
     self.__licenceId = request[PartCrawler.PARA_LICENCE_ID]
     self.__startNo = None
     self.__enterpriseOnly = request[PartCrawler.PARA_ENTERPRISE_ONLY]
     if PartCrawler.PARA_SPECIFIC_STARTNO in request:
         self.__startNo = request[PartCrawler.PARA_SPECIFIC_STARTNO]
示例#7
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__cageNumIndex = [-1, 0, 0, 0, 0]
     self.__noMore = False
     self.__cageNumList = []
     super(NSNCageCrawlerManager, self).__init__(json_config_file, 0.1,
                                                 None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
示例#8
0
    def __init__(self, controller, dbProxy, request):
        super(AviAllCrawler, self).__init__(controller, dbProxy, request)
        self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
        self.__url = request[AviAllCrawler.PARA_URL]
        self.__pageType = request[AviAllCrawler.PARA_PAGE_TYPE]
        if AviAllCrawler.PARA_IMG_ONLY not in request:
            self.__imgOnly = False
        else:
            self.__imgOnly = request[AviAllCrawler.PARA_IMG_ONLY]

        self.__imgSavePath = request[AviAllCrawler.PARA_IMG_SAVE_PATH]
示例#9
0
 def __init__(self, json_config_file):
     '''
     Constructor
     '''
     self.__cageNumIndex = [0, 0, 0, 0, 0]
     self.__noMore = False
     self.__startCageNum = None
     self.__endCageNum = None
     self.__parentPath = None
     super(NSNCageFileCrawlerManager,
           self).__init__(json_config_file, 0.1, None)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
示例#10
0
    def __init__(self, json_config_file):
        '''
        Crawl for enterprise:
CREATE TABLE `enterprise` (
  `ENTERPRISE_ID` varchar(63) NOT NULL DEFAULT '',
  `ENTERPRISE_NAME` varchar(255) DEFAULT NULL,
  `licence_id` varchar(63) DEFAULT NULL,
  `COUNTRY_CODE` varchar(10) DEFAULT NULL,
  `ORGID` varchar(63) DEFAULT NULL,
  `certificate_no` varchar(63) DEFAULT NULL,
  `EXPIRED_DATE` date DEFAULT NULL,
  `address` varchar(1023) DEFAULT NULL,
  `scan_copy_link` varchar(1023) DEFAULT NULL,
  PRIMARY KEY (`ENTERPRISE_ID`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8         
        
        '''
        super(EnterpriseListCrawlerManager,
              self).__init__(json_config_file, 0.1, None)
        self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
示例#11
0
    def __init__(self, json_config_file):
        '''
        CREATE TABLE `part` (
  `enterprise_id` varchar(63) DEFAULT NULL,
  `licence_id` varchar(63) DEFAULT NULL,
  `aircraft_part_id` varchar(63) DEFAULT NULL,
  `ata_chapter_section` varchar(63) DEFAULT NULL,
  `category_no` varchar(63) DEFAULT NULL,
  `parts_number` varchar(63) DEFAULT NULL,
  `parts_name` varchar(255) DEFAULT NULL,
  `manufacturers` varchar(63) DEFAULT NULL,
  `inspection` char(1) DEFAULT '0',
  `repair` char(1) DEFAULT '0',
  `modification` char(1) DEFAULT '0',
  `overhaul` char(1) DEFAULT '0',
  `file_to_accord` varchar(255) DEFAULT NULL,
  `main_devices` varchar(255) DEFAULT NULL,
  `remark` text
) ENGINE=MyISAM DEFAULT CHARSET=utf8
        Constructor
        '''
        self.__enterpriseOnly = False
        super(PartCrawlerManager, self).__init__(json_config_file, 0.1, None)
        self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
示例#12
0
 def __init__(self, confFile):
     '''
     Constructor
     '''
     super(NSNCleaner, self).__init__(confFile)
     self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
示例#13
0
 def __init__(self, confFile):
     '''
     Constructor
     '''
     super(AirBusPart, self).__init__(confFile)
     self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
示例#14
0
if __name__ == '__main__':
    import platform
    if 'window' in platform.system().lower():
        Logging.initLogger(os.path.join('conf','crawler.logging.win.cfg'))
    else:
        Logging.initLogger(os.path.join('conf','crawler.logging.cfg'))
    conf = {
                CrawlerConstants.CONFIG_FILE_DBHOST: 'localhost',
                CrawlerConstants.CONFIG_FILE_DBPORT: 3306,
                CrawlerConstants.CONFIG_FILE_DBUSER: '******',
                CrawlerConstants.CONFIG_FILE_DBPASS: '******',
                CrawlerConstants.CONFIG_FILE_DBNAME: 'airbus'
            
            }
    
    if DEBUG is False:
        folder = 'F:\\tmp\\part'
        for root, dirs, files in os.walk(folder, topdown=False):
            for filename in files:
                if filename.endswith('.txt'):
                    fullpath = os.path.join(root,filename)    
                    parser = AirBusPartParser(conf, Logging.getLogger(LOGGER_NAME))
                    parser.parse(fullpath)
                    parser.clean()
    else:
        fileName = 'F:\\tmp\\part\\0010\\27-54-49.txt'
        #fileName = 'F:\\tmp\\part\\test.txt'
        parser = AirBusPartParser(conf, Logging.getLogger(LOGGER_NAME))
        parser.parse(fileName)
        parser.clean()        
示例#15
0
 def __init__(self, controller, dbProxy, request):
     super(SatAirCrawler, self).__init__(controller, dbProxy, request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__url = request[SatAirCrawler.PARA_URL]
     self.__pageType = request[SatAirCrawler.PARA_PAGE_TYPE]
示例#16
0
        sql = 'insert into vendor (vendor_code, cage_code, cage_name, address, dummy) values ' + ','.join(
            vendorList)

        self.dbProxy.execute(sql)
        self.dbProxy.commit()


class Vendor(object):
    def __init__(self, vendorCode, vendorName):
        self.vendorCode = vendorCode
        self.vendorName = vendorName
        self.address = ''
        self.isDummy = 0


if __name__ == '__main__':
    import platform
    if 'window' in platform.system().lower():
        Logging.initLogger(os.path.join('conf', 'crawler.logging.win.cfg'))
    else:
        Logging.initLogger(os.path.join('conf', 'crawler.logging.cfg'))
    conf = {
        CrawlerConstants.CONFIG_FILE_DBHOST: 'localhost',
        CrawlerConstants.CONFIG_FILE_DBPORT: 3306,
        CrawlerConstants.CONFIG_FILE_DBUSER: '******',
        CrawlerConstants.CONFIG_FILE_DBPASS: '******',
        CrawlerConstants.CONFIG_FILE_DBNAME: 'airbus'
    }
    parser = AirBusVendorParser(conf, Logging.getLogger(LOGGER_NAME))
    fileName = 'F:\\tmp\\vendor.txt'
    parser.parse(fileName)
示例#17
0
 def __init__(self, confFile):
     '''
     Constructor
     '''
     super(DuplicateAgg, self).__init__(confFile)
     self.logger = Logging.getLogger(LOGGER_NAME_CLENER)
示例#18
0
 def __init__(self, controller, dbProxy, request):
     super(NSNCageCatalogCrawler, self).__init__(controller, dbProxy,
                                                 request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__cagePrefix = request[NSNCageCatalogCrawler.PARA_CAGE_PREFIX]
示例#19
0
 def __init__(self, controller, dbProxy, request):
     super(NSNCageFileCrawler, self).__init__(controller, dbProxy, request)
     self.logger = Logging.getLogger(LOGGER_NAME_CRAWL)
     self.__cageNumFilePath = request[
         NSNCageFileCrawler.PARA_CAGE_FILE_PATH]
     self.__cageNum = request[NSNCageFileCrawler.PARA_CAGE_NUM]