Exemplo n.º 1
0
    def test_parse_unicode_data(self):
        file_format = FileFormat.from_csv('tests/specs/simple_format.csv')
        parser = DataParser('tests/data/simple_format_2015-06-28-unicode.txt', file_format)

        self.assertEquals(
            [[['Foonyor', 1, 1], [u'Barzàne'.encode('utf-8'), 0, -12]]],
            list(parser.gen_data())
        )
Exemplo n.º 2
0
    def test_parse_data(self):
        file_format = FileFormat.from_csv('tests/specs/simple_format.csv')
        parser = DataParser('tests/data/simple_format_2015-06-28.txt', file_format)

        self.assertEquals(
            [[['Foonyor', 1, 1], ['Barzane', 0, -12], ['Quuxitude', 1, 103]]],
            list(parser.gen_data())
        )

        self.assertEquals(
            [[['Foonyor', 1, 1], ['Barzane', 0, -12]], [['Quuxitude', 1, 103]]],
            list(parser.gen_data(chunk_size=2))
        )

        self.assertEquals(
            [[['Foonyor', 1, 1]], [['Barzane', 0, -12]], [['Quuxitude', 1, 103]]],
            list(parser.gen_data(chunk_size=1))
        )
Exemplo n.º 3
0
import itertools

from data_parser import DataParser
from database_connector import SimpleDatabaseConnector
from file_formats import FilenamePrefixFormatSelector

parser = argparse.ArgumentParser(description='Process data file and store to SQL.')
parser.add_argument('db_url', help='Database url')
parser.add_argument('table_name', help='Database table to create')
parser.add_argument('data_file', help='Data file to dump')

args = parser.parse_args()

# init
format_selector = FilenamePrefixFormatSelector.from_directory('specs/')
format = format_selector.get_format(args.data_file)
data_parser = DataParser(args.data_file, format)
db_connector = SimpleDatabaseConnector(args.db_url)

print 'Creating table ...'
db_connector.create_table(args.table_name, format)  # TODO rollback on failure?

print 'Insertion rows ...'
total_inserted = 0
for rows in data_parser.gen_data():
    db_connector.insert_rows(rows)
    total_inserted += len(rows)
    print '%d rows inserted' % total_inserted

print 'Completed'