示例#1
0
def main(data_folder):
    logger.info("Creating database in {0}".format(data_folder))

    txt_files = [f for f in listdir(data_folder) if (isfile(join(data_folder, f)) and splitext(f)[1] == ".txt")]
    db_files = [f for f in listdir(data_folder) if (isfile(join(data_folder, f)) and splitext(f)[1] == ".db")]

    # delete all existing database files
    for f in db_files:
        logger.info("Deleting existing database file: {0}".format(join(data_folder, f)))
        remove(join(data_folder, f))

    # create new file
    logger.info("Creating database file: {0}".format(join(data_folder, "database.db")))
    con = sqlite3.connect(join(data_folder, "database.db"))
    cur = con.cursor()

    for f in txt_files:
        logger.info("Processing file: {0}".format(join(data_folder, f)))

        # we use the file name as table name
        table_name = splitext(f)[0]

        with open(join(data_folder, f), 'rb') as csvfile:

            # try to determine the csv dialect
            dialect = Sniffer().sniff(csvfile.read(1024))
            csvfile.seek(0)
            reader = DictReader(csvfile, dialect=dialect)
            # this will be our fields (columns)
            field_names = reader.fieldnames

            if len(field_names) == 0:
                logger.error("No header read from file: {0}. Ignoring this file.".format(join(data_folder, f)))
                continue

            search_fields = {"arrival_time":"arrival_time_sec", "departure_time":"departure_time_sec"}

            if set(search_fields.keys()).issubset(field_names):
                field_names.extend(search_fields.values())

            fields_in_statement = []
            for field in field_names:
                if field in __real_columns:
                    fields_in_statement.append(field + " REAL")
                else:
                    if field in __integer_columns:
                        fields_in_statement.append(field + " INTEGER")
                    else:
                        fields_in_statement.append(field)

            statement = "create table {0}({1});".format(table_name, ", ".join(fields_in_statement))
            _execute(cur, statement)

            quest = ["?" for i in range(0, len(field_names))]

            count = 0
            for row in reader:

                for k in search_fields.keys():
                    if row.has_key(k):
                        row[search_fields[k]] = utils.convert_time_to_sec(unicode(row[k], 'utf-8'))

                data = []
                for n in field_names:
                    data.append(unicode(str(row[n]), 'utf-8'))

                statement = "insert into {0} values ({1});".format(table_name, ", ".join(quest))
                cur.execute(statement, data)
                count += 1
            logger.info("Read {0} rows into {1}".format(count, table_name))

            con.commit()

    statement = "CREATE UNIQUE INDEX stops_stop_id_index ON stops (stop_id)"
    _execute(cur, statement)

    statement = "CREATE INDEX stop_times_stop_id_index ON stop_times (stop_id )"
    _execute(cur, statement)

    statement = "CREATE INDEX stop_times_trip_id_index ON stop_times (trip_id )"
    _execute(cur, statement)

    statement = "CREATE INDEX transfers_transfer_time_index ON transfers (min_transfer_time)"
    _execute(cur, statement)

    statement = "CREATE INDEX transfers_transfer_from_stop_id_index ON transfers (from_stop_id)"
    _execute(cur, statement)

    statement = "CREATE INDEX stops_stop_name_idx ON stops (stop_name)"
    _execute(cur, statement)

    con.commit()
    con.close()

    logger.info("Done.")
示例#2
0
 def travel_time(self, time):
     self._travel_time = convert_time_to_sec(time)
示例#3
0
 def time_window(self, time_window):
     self._time_window_sec = convert_time_to_sec(time_window)
示例#4
0
 def start_time(self, time):
     self._start_time_sec = convert_time_to_sec(time)