def load(cls, db, **kwargs):
        """Load method for ORM

        arguments:
            db: instance of gtfsdb.Database

        keyword arguments:
            gtfs_directory: path to unzipped GTFS files
            batch_size: batch size for memory management
        """
        log = logging.getLogger(cls.__module__)
        start_time = time.time()
        batch_size = kwargs.get('batch_size', config.DEFAULT_BATCH_SIZE)
        directory = None
        transport_mode = kwargs.get('transport_mode')
        modified_date = kwargs.get('modified_date')

        if cls.datasource == config.DATASOURCE_GTFS:
            directory = kwargs.get('gtfs_directory')
        elif cls.datasource == config.DATASOURCE_LOOKUP:
            directory = resource_filename('gtfsdb', 'data')

        records = []
        file_path = os.path.join(directory, cls.filename)
        if os.path.exists(file_path):
            f = open(file_path, 'r')
            utf8_file = util.UTF8Recoder(f, 'utf-8-sig')
            reader = csv.DictReader(utf8_file)
            reader.fieldnames = [
                field.strip().lower() for field in reader.fieldnames
            ]
            table = cls.__table__
            #try:
            #db.engine.execute(table.delete())
            #except:
            #log.debug("NOTE: couldn't delete this table")

            i = 0
            for row in reader:
                row['transport_mode'] = transport_mode
                row['date_modified'] = modified_date

                records.append(cls.make_record(row))
                i += 1
                if i >= batch_size:
                    db.engine.execute(table.insert(), records)
                    sys.stdout.write('*')
                    records = []
                    i = 0
            if len(records) > 0:
                db.engine.execute(table.insert(), records)
            f.close()
        process_time = time.time() - start_time
        log.debug('{0}.load ({1:.0f} seconds)'.format(cls.__name__,
                                                      process_time))
示例#2
0
    def load(cls, db, **kwargs):
        """
        Load method for ORM

        arguments:
            db: instance of gtfsdb.Database

        keyword arguments:
            gtfs_directory: path to unzipped GTFS files
            batch_size: batch size for memory management
        """

        # step 0: set up some vars, including setting the log output to show the child of base that we're processing
        start_time = time.time()
        batch_size = kwargs.get('batch_size', config.DEFAULT_BATCH_SIZE)
        log = logging.getLogger(cls.__module__)

        # step 1: check that we have elements of a file path (a file name and a directory) for the data we'll load
        if cls.filename is None:
            log.info(
                "{0} lacks a 'filename' attribute ... not loading a null file (exit load)."
                .format(cls.__name__))
            return  # note early exit
        if cls.datasource is not config.DATASOURCE_GTFS and cls.datasource is not config.DATASOURCE_LOOKUP:
            log.info(
                "{0}.datasource != DATASOURCE_GTFS or DATASOURCE_LOOKUP (exit load)."
                .format(cls.__name__))
            return  # note early exit

        # step 2: load either a GTFS file from the unzipped file or a resource file (from a dir specified in config)
        directory = None
        if cls.datasource == config.DATASOURCE_GTFS:
            directory = kwargs.get('gtfs_directory')
        elif cls.datasource == config.DATASOURCE_LOOKUP:
            directory = resource_filename('gtfsdb', 'data')

        # step 3: load the file
        log.info("load {0}".format(cls.__name__))
        records = []
        file_path = os.path.join(directory, cls.filename)
        if os.path.exists(file_path):
            if sys.version_info >= (3, 0):
                f = open(file_path, 'rb')
            else:
                f = open(file_path, 'r')
            utf8_file = util.UTF8Recoder(f, 'utf-8-sig')
            reader = csv.DictReader(utf8_file)
            reader.fieldnames = [
                field.strip().lower() for field in reader.fieldnames
            ]
            table = cls.__table__
            try:
                db.engine.execute(table.delete())
            except:
                log.debug("NOTE: couldn't delete this table")

            i = 0
            for row in reader:
                records.append(cls.make_record(row))
                i += 1
                if i >= batch_size:
                    db.engine.execute(table.insert(), records)
                    sys.stdout.write('*')
                    records = []
                    i = 0
            if len(records) > 0:
                db.engine.execute(table.insert(), records)
            f.close()

        # step 4: done...
        process_time = time.time() - start_time
        log.debug('{0}.load ({1:.0f} seconds)'.format(cls.__name__,
                                                      process_time))