Python CSVparser примеры использования

Язык программирования: Python

Пространство имен/Пакет: lib

Класс/Тип: CSVparser

Примеров на hotexamples.com: 12

Python CSVparser - 12 примеров найдено. Это лучшие примеры Python кода для lib.CSVparser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

to_dict(12)

Основные методы

to_dict (12)

Пример #1

Показать файл

 def test_escape_dots(self):
     data = CSVparser.to_dict(current_dir +
                              '/mockups/csv/test_dot_escaping.csv')
     self.assertEqual(data, [{
         "Data$Column2": "data2",
         "Data\uff0eColumn1": "data1"
     }])

Пример #2

Показать файл

 def test_latin_1(self):
     data = CSVparser.to_dict(current_dir + '/mockups/csv/test_unicode.csv')
     self.assertEqual(data, [{
         "column1": "data1",
         "column2": "data2"
     }, {
         "column1": "data3",
         "column2": "Siberië"
     }])

Пример #3

Показать файл

 def test_numerical(self):
     data = CSVparser.to_dict(current_dir + '/mockups/csv/test_numbers.csv')
     self.assertEqual(data, [{
         "column1": "data1",
         "num": 1
     }, {
         "column1": "data3",
         "num": 2
     }])

Пример #4

Показать файл

 def test_simple(self):
     data = CSVparser.to_dict(current_dir + '/mockups/csv/test_simple.csv')
     self.assertEqual(data, [{
         "column1": "data1",
         "column2": "data2"
     }, {
         "column1": "data3",
         "column2": "data4"
     }])

Пример #5

Показать файл

 def test_special_char_escaping(self):
     data = CSVparser.to_dict(current_dir + '/mockups/schema/specialCharacterTest/test.csv')
     schema = SchemaGenerator.generate_schema(data)
     self.assertDictEqual(deep_sort(schema), deep_sort({
         'type': 'array',
         'items': {
             'type': 'object',
             'properties': {
                 'Data\uff0eColumn1': {'type': 'string'},
                 'Data$Column2': {'type': 'string'}
             },
             'required': ['Data\uff0eColumn1', 'Data$Column2']
         }
     }))

Пример #6

Показать файл

 def test_schema_generator(self):
     data = CSVparser.to_dict(current_dir + '/mockups/csv/test_simple.csv')
     schema = SchemaGenerator.generate_schema(data)
     self.assertEqual(schema, {
         'type': 'array',
         'items': {
             'type': 'object',
             'properties': {
                 'column1': {'type': 'string'},
                 'column2': {'type': 'string'}
             },
             'required': ['column1', 'column2']
         }
     })

Пример #7

Показать файл

 def test_numpy_float_error(self):
     data = CSVparser.to_dict(current_dir + '/mockups/schema/numpy-float64/float64.csv')
     schema = SchemaGenerator.generate_schema(data)
     self.assertEqual(schema, {
         'type': 'array',
         'items': {
             'type': 'object',
             'properties': {
                 'id': {'type': 'number'},
                 'put': {'type': 'number'}
             },
             'required': ['id', 'put']
         }
     })

Пример #8

Показать файл

Файл: main.py Проект: reinvantveer/edna-ld

def run(file_path):
    # Init logging and database
    init_logging()
    client, file_col, schema_col, source_data_col = init_mongodb(config)

    # Set up counters and file index
    successfully_ingested_files = 0
    file_counter = 0
    file_list = DirLister.get_file_list_recursive(file_path)

    logging.info('Processing %d files from %s' % (len(file_list), file_path))

    for file in file_list:
        file_counter += 1
        ProgressBar.update_progress(file_counter / len(file_list),
                                    ('Processing file %s' % file))

        # get the file stats
        document = {
            'stats': FileStatter.stats(file),
            'filePath': file,
            '_id': file,
            'hash': FileStatter.sha1_from_file(file)
        }

        # Load the data or skip if unable
        if file.lower().endswith('.mif'):
            try:
                data = MIFparser.to_dict(file)
            except ValueError as e:
                logging.error(e)
                # if the data loading doesn't work out, just log the error and skip the file
                continue
        elif file.lower().endswith('.mid'):
            logging.debug('Skipping .mid file.')
            continue  # .mid files are processed along with its 'parented' .mif file
        else:
            try:
                data = CSVparser.to_dict(file)
            except ValueError as e:
                logging.error('CSV parsing error on file %s: %s' % (file, e))
                # if the data loading doesn't work out, just log the error and skip the file
                continue

        # Generate the schema and try to ingest it
        try:
            schema_data = SchemaGenerator.generate_schema(data)
        except Exception as e:
            logging.error('Schema error on file %s: %s' % (file, e))
            continue

        schema_hash = FileStatter.sha1(schema_data)
        schema = {
            '_id': schema_hash,
            'schema': schema_data,
        }

        try:
            schema_col.insert_one(schema)
        except DuplicateKeyError:
            logging.debug('Schema %s was previously processed' % schema_hash)
        except Exception as e:
            logging.error('Ingest schema error on file %s: %s' % (file, e))
            # if the schema loading doesn't work out, just log the error and skip the file
            continue

        # Store the source data
        source_data_doc_sha1 = FileStatter.sha1(data)
        source_data_doc = {'_id': source_data_doc_sha1, 'data': data}

        try:
            source_data_col.insert_one(document=source_data_doc)
        except DuplicateKeyError:
            logging.debug('Sourcedata with sha1 %s was previously processed' %
                          source_data_doc_sha1)
        except Exception as e:
            logging.error('Ingest source data error on file %s: %s' %
                          (file, e))
            continue

        # Finalize the file document with the data reference and the schema reference
        document['data'] = source_data_doc_sha1
        document['schema'] = schema['_id']

        try:
            file_col.insert_one(document=document)
        except DuplicateKeyError:
            logging.warning('File %s was previously processed, skipping' %
                            file)
            # Skip to next file
            continue
        except Exception as e:
            logging.error('Ingest file metadata error on file %s: %s' %
                          (file, e))
            continue

        logging.debug('File %s was successfully ingested' % file)
        successfully_ingested_files += 1

    logging.info('Finished!')
    logging.info('Successfully ingested %d files of %d' %
                 (successfully_ingested_files, len(file_list)))
    client.close()

Пример #9

Показать файл

Файл: test_file_statter.py Проект: reinvantveer/edna-ld

 def test_data_hasher_from_schema_dict(self):
     dictionary = CSVparser.to_dict(current_dir + '/mockups/schema/caseInsensitiveTest/test.csv')
     sha1 = FileStatter.sha1(SchemaGenerator.generate_schema(dictionary))
     self.assertEqual(sha1, 'a59a9b5c48657c3828c4c308cd057997aa7927fb')

Пример #10

Показать файл

 def test_empty_csv(self):
     with self.assertRaisesRegex(ValueError, 'empty or invalid'):
         CSVparser.to_dict(current_dir + '/mockups/csv/empty.csv')

Пример #11

Показать файл

 def test_no_csv(self):
     with self.assertRaisesRegex(ValueError, 'invalid csv'):
         CSVparser.to_dict(current_dir + '/mockups/csv/test_no_csv.txt')

Пример #12

Показать файл

 def test_non_unicode(self):
     data = CSVparser.to_dict(current_dir +
                              '/mockups/csv/test_non_unicode.csv')
     self.assertEqual(data[0]['OMSCHRIJF'], 'mal voor reliëf')