Пример #1
0
        sql = 'insert into vendor (vendor_code, cage_code, cage_name, address, dummy) values ' + ','.join(
            vendorList)

        self.dbProxy.execute(sql)
        self.dbProxy.commit()


class Vendor(object):
    def __init__(self, vendorCode, vendorName):
        self.vendorCode = vendorCode
        self.vendorName = vendorName
        self.address = ''
        self.isDummy = 0


if __name__ == '__main__':
    import platform
    if 'window' in platform.system().lower():
        Logging.initLogger(os.path.join('conf', 'crawler.logging.win.cfg'))
    else:
        Logging.initLogger(os.path.join('conf', 'crawler.logging.cfg'))
    conf = {
        CrawlerConstants.CONFIG_FILE_DBHOST: 'localhost',
        CrawlerConstants.CONFIG_FILE_DBPORT: 3306,
        CrawlerConstants.CONFIG_FILE_DBUSER: '******',
        CrawlerConstants.CONFIG_FILE_DBPASS: '******',
        CrawlerConstants.CONFIG_FILE_DBNAME: 'airbus'
    }
    parser = AirBusVendorParser(conf, Logging.getLogger(LOGGER_NAME))
    fileName = 'F:\\tmp\\vendor.txt'
    parser.parse(fileName)
Пример #2
0
            cageName = tds[2].text.strip()
            self.logger.debug('CageNum:%s, CageName:%s', cageNum, cageName)
            self.totalNum += 1

        nextPageDisabled = soup.findAll('li', {'class': 'next disabled'})
        if len(nextPageDisabled) > 0:
            return CrawlerConstants.VAL_STATUS_FINISH
        else:
            nextPage = soup.findAll('li', {'class': 'next'})
            if len(nextPage) > 0:
                return CrawlerConstants.VAL_STATUS_MORE
            else:
                return CrawlerConstants.VAL_STATUS_FINISH


if __name__ == '__main__':
    '''
    if PIDUtils.isPidFileExist(LOGGER_NAME_CRAWL):
        print 'Previous process is on-going, please stop it firstly'
        sys.exit(1)
    '''
    pid = os.getpid()
    PIDUtils.writePid(LOGGER_NAME_CRAWL, pid)
    Logging.initLogger('conf/crawler.logging.cfg')
    #Logging.initLogger('F:\\program\\crm\\crawler\\src\\python\\conf\\crawler.logging.cfg')
    ins = NSNCageCatalogCrawlerManager('conf/' + LOGGER_NAME_CRAWL + '.cfg')
    #ins = NSNCageCrawlerManager('F:\\program\\crm\\crawler\\src\\python\\conf\\nsn.cfg')
    ins.start()
    pidutils = PIDUtils(LOGGER_NAME_CRAWL, ins.shutDown, 5, ins.logger)
    pidutils.start()
    sys.exit(0)
Пример #3
0
            if len(ref) == 13:
                previousFullWithoutHyphen = ref
            else:
                ref = previousFullWithoutHyphen[:13 - len(ref)] + ref
            ref = ref[:4] + '-' + ref[4:6] + '-' + ref[6:9] + '-' + ref[9:]
            if ref == nsnNum:
                continue
            referenceList.append(ref)

        return referenceList


if __name__ == '__main__':
    '''
    if PIDUtils.isPidFileExist(LOGGER_NAME_CRAWL):
        print 'Previous process is on-going, please stop it firstly'
        sys.exit(1)
    '''
    pid = os.getpid()
    PIDUtils.writePid(LOGGER_NAME_CRAWL, pid)
    #Logging.initLogger('conf/crawler.logging.cfg')
    Logging.initLogger(
        'F:\\program\\crm\\crawler\\src\\python\\conf\\crawler.logging.win.cfg'
    )
    #ins = NSNCageFileCrawlerManager('conf/'+LOGGER_NAME_CRAWL+'.cfg')
    ins = NSNCageFileCrawlerManager(
        'F:\\program\\crm\\crawler\\src\\python\\conf\\nfc.cfg')
    ins.start()
    pidutils = PIDUtils(LOGGER_NAME_CRAWL, ins.shutDown, 5, ins.logger)
    pidutils.start()
    sys.exit(0)