def create_clean_csv(csv_file_path):

    old_file_name = csv_file_path.split('/')[-1][:-4]
    with open(csv_file_path, 'r') as csv_file:

        csv_contents = csv.DictReader(csv_file)
        for index, row in enumerate(csv_contents):

            new_file_name = old_file_name + '_' + clean_data.monthly_or_quarterly(row) + '.csv'

            with open('d3_csv/' + new_file_name, 'w') as new_csv_file:
                columns = list(row.keys())
                for index, column in enumerate(columns):
                    if column == '':
                        columns[index] = 'date'

                csv_writer = csv.DictWriter(new_csv_file, fieldnames=columns)
                if index == 0:
                    print(new_file_name)
                    csv_writer.writeheader()

                if row.get('') is not None:
                    row['date'] = row['']
                    row.pop('')

                for key, value in row.items():
                    row[key] = ''
                    for char in value:
                        if char.isdigit() or char in ['.', ',']:
                            row[key] += char
                csv_writer.writerow(row)



        csv_file.close()
Exemplo n.º 2
0
def load_to_postgres(csv_file_path):
    with open(csv_file_path, 'r') as csv_file:
        csv_contents = csv.DictReader(csv_file)
        file_name = csv_file_path.split('/')[-1].replace('.csv', '')

        for index, row in enumerate(csv_contents):
            if index == 0:
                table_name = file_name + '_' + clean_data.monthly_or_quarterly(
                    row)

                try:
                    # probably a bad way to format, but it's a known data source
                    pg_cursor.execute(""" DROP TABLE IF EXISTS {table_name};
                            CREATE TABLE {table_name} (
                                metric_date date not null,
                                metric_values jsonb default '[]'::JSON,
                                initial_release_date date not null,
                                final_release_date date not null
                            );
                        """.format(table_name=table_name))

                except Exception as e:
                    print(table_name + ' Error: ' + str(e))

            populate_row(table_name=table_name, row=row)
        print('Loaded file ', file_name)
Exemplo n.º 3
0
def create_clean_csv(csv_file_path):

    old_file_name = csv_file_path.split('/')[-1][:-4]
    with open(csv_file_path, 'r') as csv_file:

        csv_contents = csv.DictReader(csv_file)
        for index, row in enumerate(csv_contents):

            new_file_name = old_file_name + '_' + clean_data.monthly_or_quarterly(
                row) + '.csv'

            with open('d3_csv/' + new_file_name, 'w') as new_csv_file:
                columns = list(row.keys())
                for index, column in enumerate(columns):
                    if column == '':
                        columns[index] = 'date'

                csv_writer = csv.DictWriter(new_csv_file, fieldnames=columns)
                if index == 0:
                    print(new_file_name)
                    csv_writer.writeheader()

                if row.get('') is not None:
                    row['date'] = row['']
                    row.pop('')

                for key, value in row.items():
                    row[key] = ''
                    for char in value:
                        if char.isdigit() or char in ['.', ',']:
                            row[key] += char
                csv_writer.writerow(row)

        csv_file.close()
Exemplo n.º 4
0
def load_to_postgres(csv_file_path):
    with open(csv_file_path, 'r') as csv_file:
        csv_contents = csv.DictReader(csv_file)
        file_name = csv_file_path.split('/')[-1].replace('.csv', '')

        for index, row in enumerate(csv_contents):
            if index == 0:
                table_name = file_name + '_' + clean_data.monthly_or_quarterly(
                    row)

                table_from_csv(table_name=table_name, column_names=row.keys())

            populate_row(table_name=table_name, row=row)
        print('Loaded file ', file_name)
def load_to_postgres(csv_file_path):
    with open(csv_file_path, 'r') as csv_file:
        csv_contents = csv.DictReader(csv_file)
        file_name = csv_file_path.split('/')[-1].replace('.csv', '')


        for index, row in enumerate(csv_contents):
            if index == 0:
                table_name = file_name + '_' + clean_data.monthly_or_quarterly(row)

                table_from_csv(
                    table_name=table_name,
                    column_names=row.keys()
                )

            populate_row(
                table_name=table_name,
                row=row
            )
        print('Loaded file ', file_name)