Exemplos de CopyMapping.save em Python, exemplos de postgres_copy.CopyMapping.save em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: load_opd_data.py Projeto: sfchronicle/opd-use-of-force

    def handle(self, *args, **options):
        data = os.path.join(settings.BASE_DIR, "data")
        files = list(all_files(data, "*.csv"))

        for filepath in files:
            log("Opening file {}\n".format(filepath), "cyan")

            log("  Loading data ...\n")

            copy = CopyMapping(
                Incident,
                filepath,
                dict(
                    date="IncidentDate",
                    year="Year",
                    raw_location="Location",
                    address="Edited Street Address",
                    city_and_state="City and State",
                    full_address="Edited Full Address",
                    latitude="Latitude",
                    longitude="Longitude",
                    accuracy_score="Accuracy Score",
                    accuracy_type="Accuracy Type",
                    number="Number",
                    street="Street",
                    city="City",
                    state="State",
                    county="County",
                    zipcode="Zip",
                ),
            )

            copy.save()

            log("    Data loaded!\n", "green")

Exemplo n.º 2

0

Exibir arquivo

    def handle(self, *args, **kwargs):
        ccpath = kwargs['crimeCatPath']
        tppath = kwargs['targetPlacePath']

        # 		ccpath = '.../ctype2cc_170627.csv'

        print('loadAncil: crimeCat from %s...' % (ccpath))

        c1 = CopyMapping(
            CrimeCat,
            ccpath,
            # And a dict mapping the  model fields to CSV headers
            # CSV header = CType, CC
            dict(ctypeDesc='CType', crimeCat='CC'))
        c1.save()
        print('loadAncil: NCrimeCat=%d' % (CrimeCat.objects.all().count()))

        # 		tppath = '.../targetPlace.csv'

        print('loadAncil: targetPlace from %s ...' % (tppath))

        c2 = CopyMapping(
            TargetPlace,
            tppath,
            # And a dict mapping the  model fields to CSV headers
            # CSV header = placeType, ylat, xlng, name, desc
            dict(placeType='placeType',
                 ylat='ylat',
                 xlng='xlng',
                 name='name',
                 desc='desc'))
        c2.save()
        print('loadAncil: NTargetPlace=%d' %
              (TargetPlace.objects.all().count()))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: tests.py Projeto: mikejcorey/django-postgres-copy

 def test_limited_save(self):
     c = CopyMapping(LimitedMockObject, self.name_path,
                     dict(name='NAME', dt='DATE'))
     c.save()
     self.assertEqual(LimitedMockObject.objects.count(), 3)
     self.assertEqual(
         LimitedMockObject.objects.get(name='BEN').dt, date(2012, 1, 1))

Exemplo n.º 4

0

Exibir arquivo

def insertData(apps, schema_editor):
    lang = Lang(code='es', name='Español')
    lang.save()
    curdir = os.path.dirname(os.path.realpath(__file__))
    path = os.path.normpath(os.path.join(curdir, os.pardir, os.pardir, 'data', 'mercado_continuo.csv'))
    c = CopyMapping(
        Symbol,
        path,
        dict(name='nombre', ticker='ticker'),
        static_mapping= {'market': 'Mercado Continuo', 'type': 'stock'}
    )
    c.save()
    Symbol(ticker='IBEX35', name='IBEX 35', market='Mercado Continuo', type=Symbol.INDEX).save()
    Symbol(ticker='IBEXTR', name='IBEX Total Return', market='Mercado Continuo', type=Symbol.INDEX).save()
    invertia_quote_tpl = 'https://www.invertia.com/es/mercados/bolsa/empresas/historico?p_p_id=cotizacioneshistoricas_WAR_ivfrontmarketsportlet&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=exportExcel&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=2&_cotizacioneshistoricas_WAR_ivfrontmarketsportlet_startDate={{startDate}}&_cotizacioneshistoricas_WAR_ivfrontmarketsportlet_endDate={{endDate}}&_cotizacioneshistoricas_WAR_ivfrontmarketsportlet_idtel={invertia_key}'
    invertia_dividend_tpl = 'https://www.invertia.com/es/mercados/bolsa/empresas/dividendos/-/empresa/{empresa}/{invertia_key}'
    curdir = os.path.dirname(os.path.realpath(__file__))
    path = os.path.normpath(os.path.join(curdir, os.pardir, os.pardir, 'data', 'invertia_claves.csv'))
    invertia_keys = csv.reader(open(path))
    next(invertia_keys, None)  # skip the headers
    quote_sources = []
    for row in invertia_keys:
        quote_url = invertia_quote_tpl.format(invertia_key=row[2])
        dividend_url = invertia_dividend_tpl.format(empresa=row[1], invertia_key=row[2])
        try:
            symbol_id = Symbol.objects.get(ticker=row[0]).id
            SymbolSource(name='invertia', symbol_id=symbol_id, key=quote_url, type=SymbolSource.QUOTE).save()
            SymbolSource(name='invertia', symbol_id=symbol_id, key=dividend_url, type=SymbolSource.DIVIDEND).save()
        except Symbol.DoesNotExist:
            print('Symbol with ticker', row[0], "(%s)" % row[1], 'not found. Cotinuing...')
            continue
    mcs = Symbol.objects.filter(market='Mercado Continuo').exclude(ticker__in=('IBEX35', 'IBEXTR'))
    for symbol in mcs:
        SymbolSource(name='quantmod', symbol_id=symbol.id, key='%s.mc' % symbol.ticker, type=SymbolSource.SPLIT).save()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: tests.py Projeto: Medisapiens/django-postgres-copy

 def test_field_value_mapping_and_types(self):
     c = CopyMapping(
         BasicMockObject,
         self.mapping_path,
         dict(name='NAME', number='NUMBER', dt='DATE'),
         field_value_mapping={
             'name': {
                 'ben': 'Master Ben',
                 'joe': 'Padawan Joe',
                 'jane': 'Jedi Jane'
             },
             'number': {
                 'seven': 7,
                 'three': 3,
                 'five': 5
             }
         },
         field_copy_types={'number': 'text'}
     )
     c.save()
     self.assertEqual(BasicMockObject.objects.count(), 3)
     self.assertEqual(
         list(BasicMockObject.objects.order_by('name').values_list('name', 'number')),
         [('Jedi Jane', 5), ('Master Ben', 7), ('Padawan Joe', 3)]
     )
     self.assertEqual(
         BasicMockObject.objects.get(name='Master Ben').dt,
         date(2012, 1, 1)
     )

Exemplo n.º 6

0

Exibir arquivo

Arquivo: tests.py Projeto: Medisapiens/django-postgres-copy

    def test_foreignkey_mapping(self):
        c = CopyMapping(
            BasicMockObject,
            self.name_path,
            dict(name='NAME', number='NUMBER', dt='DATE')
        )
        c.save()
        self.assertEqual(BasicMockObject.objects.count(), 3)

        c = CopyMapping(
            BasicMockObject,
            self.fk_mapping_path,
            dict(name='NAME', number='NUMBER', dt='DATE', parent='PARENT'),
            field_value_mapping={
                'parent': {
                    'ben': BasicMockObject.objects.get(name='ben').pk,
                    'joe': BasicMockObject.objects.get(name='joe').pk,
                    'jane': BasicMockObject.objects.get(name='jane').pk
                },
            },
            field_copy_types={'parent': 'text'}
        )
        c.save()
        self.assertEqual(BasicMockObject.objects.count(), 6)
        self.assertEqual(
            list(BasicMockObject.objects.order_by('name').
                 values_list('name', 'parent__name')),
            [('ben', None), ('ben junior', 'ben'), ('jane', None),
             ('jane junior', 'jane'), ('joe', None), ('joe junior', 'joe')]
        )

Exemplo n.º 7

0

Exibir arquivo

Arquivo: tests.py Projeto: Medisapiens/django-postgres-copy

    def test_static_mapping_ignore_non_mapped_headers(self):
        c = CopyMapping(
            BasicMockObject,
            self.name_path,
            dict(name='NAME', number='NUMBER', dt='DATE'),
        )
        c.save()
        self.assertEqual(BasicMockObject.objects.count(), 3)

        c = CopyMapping(BasicMockObject,
                        self.fk_mapping_path,
                        dict(name='NAME', number='NUMBER', dt='DATE'),
                        static_mapping={
                            'parent':
                            BasicMockObject.objects.get(name='ben').pk
                        },
                        ignore_non_mapped_headers=True)
        c.save()
        self.assertEqual(BasicMockObject.objects.count(), 6)
        self.assertEqual(
            list(
                BasicMockObject.objects.order_by('name').values_list(
                    'name', 'parent__name')), [('ben', None),
                                               ('ben junior', 'ben'),
                                               ('jane', None),
                                               ('jane junior', 'ben'),
                                               ('joe', None),
                                               ('joe junior', 'ben')])

Exemplo n.º 8

0

Exibir arquivo

Arquivo: load_opd_data.py Projeto: sfchronicle/opd-use-of-force

    def handle(self, *args, **options):
        data = os.path.join(settings.BASE_DIR, 'data')
        files = list(all_files(data, '*.csv'))

        for filepath in files:
            log('Opening file {}\n'.format(filepath), 'cyan')

            log('  Loading data ...\n')

            copy = CopyMapping(
                Incident, filepath,
                dict(
                    date='IncidentDate',
                    year='Year',
                    raw_location='Location',
                    address='Edited Street Address',
                    city_and_state='City and State',
                    full_address='Edited Full Address',
                    latitude='Latitude',
                    longitude='Longitude',
                    accuracy_score='Accuracy Score',
                    accuracy_type='Accuracy Type',
                    number='Number',
                    street='Street',
                    city='City',
                    state='State',
                    county='County',
                    zipcode='Zip',
                ))

            copy.save()

            log('    Data loaded!\n', 'green')

Exemplo n.º 9

0

Exibir arquivo

Arquivo: tests.py Projeto: Medisapiens/django-postgres-copy

    def test_foreignkey_mapping(self):
        c = CopyMapping(BasicMockObject, self.name_path,
                        dict(name='NAME', number='NUMBER', dt='DATE'))
        c.save()
        self.assertEqual(BasicMockObject.objects.count(), 3)

        c = CopyMapping(BasicMockObject,
                        self.fk_mapping_path,
                        dict(name='NAME',
                             number='NUMBER',
                             dt='DATE',
                             parent='PARENT'),
                        field_value_mapping={
                            'parent': {
                                'ben':
                                BasicMockObject.objects.get(name='ben').pk,
                                'joe':
                                BasicMockObject.objects.get(name='joe').pk,
                                'jane':
                                BasicMockObject.objects.get(name='jane').pk
                            },
                        },
                        field_copy_types={'parent': 'text'})
        c.save()
        self.assertEqual(BasicMockObject.objects.count(), 6)
        self.assertEqual(
            list(
                BasicMockObject.objects.order_by('name').values_list(
                    'name', 'parent__name')), [('ben', None),
                                               ('ben junior', 'ben'),
                                               ('jane', None),
                                               ('jane junior', 'jane'),
                                               ('joe', None),
                                               ('joe junior', 'joe')])

Exemplo n.º 10

0

Exibir arquivo

Arquivo: tests.py Projeto: Medisapiens/django-postgres-copy

 def test_field_value_mapping_and_types(self):
     c = CopyMapping(BasicMockObject,
                     self.mapping_path,
                     dict(name='NAME', number='NUMBER', dt='DATE'),
                     field_value_mapping={
                         'name': {
                             'ben': 'Master Ben',
                             'joe': 'Padawan Joe',
                             'jane': 'Jedi Jane'
                         },
                         'number': {
                             'seven': 7,
                             'three': 3,
                             'five': 5
                         }
                     },
                     field_copy_types={'number': 'text'})
     c.save()
     self.assertEqual(BasicMockObject.objects.count(), 3)
     self.assertEqual(
         list(
             BasicMockObject.objects.order_by('name').values_list(
                 'name', 'number')), [('Jedi Jane', 5), ('Master Ben', 7),
                                      ('Padawan Joe', 3)])
     self.assertEqual(
         BasicMockObject.objects.get(name='Master Ben').dt,
         date(2012, 1, 1))

Exemplo n.º 11

0

Exibir arquivo

Arquivo: tests.py Projeto: mikejcorey/django-postgres-copy

 def test_bad_static_values(self):
     with self.assertRaises(ValueError):
         c = CopyMapping(ExtendedMockObject,
                         self.name_path,
                         dict(name='NAME', number='NUMBER', dt='DATE'),
                         encoding='UTF-8',
                         static_mapping=dict(static_bad=1))
         c.save()

Exemplo n.º 12

0

Exibir arquivo

Arquivo: tests.py Projeto: mikejcorey/django-postgres-copy

 def test_encoding_save(self):
     c = CopyMapping(MockObject,
                     self.null_path,
                     dict(name='NAME', number='NUMBER', dt='DATE'),
                     encoding='UTF-8')
     c.save()
     self.assertEqual(MockObject.objects.count(), 5)
     self.assertEqual(MockObject.objects.get(name='BADBOY').number, None)
     self.assertEqual(
         MockObject.objects.get(name='BEN').dt, date(2012, 1, 1))

Exemplo n.º 13

0

Exibir arquivo

Arquivo: tests.py Projeto: Medisapiens/django-postgres-copy

 def test_bad_static_values(self):
     with self.assertRaises(ValueError):
         c = CopyMapping(
             ExtendedMockObject,
             self.name_path,
             dict(name='NAME', number='NUMBER', dt='DATE'),
             encoding='UTF-8',
             static_mapping={'static_bad':1,}
         )
         c.save()

Exemplo n.º 14

0

Exibir arquivo

Arquivo: add_rows.py Projeto: ratulesrar3/cs122-project-group

 def handle(self, *args, **kwargs):
     c = CopyMapping(
         # Give it the model
         Forum,
         # The path to your CSV
         args[1],
         # And a dict mapping the  model fields to CSV headers
         dict(article_title='article_title'))
     # Then save it.
     c.save()

Exemplo n.º 15

0

Exibir arquivo

Arquivo: tests.py Projeto: mikejcorey/django-postgres-copy

    def test_save_foreign_key(self):
        c = CopyMapping(
            MockObject, self.foreign_path,
            dict(name='NAME', number='NUMBER', dt='DATE', parent='PARENT'))

        c.save()
        self.assertEqual(MockObject.objects.count(), 3)
        self.assertEqual(MockObject.objects.get(name='BEN').parent_id, 4)
        self.assertEqual(
            MockObject.objects.get(name='BEN').dt, date(2012, 1, 1))

Exemplo n.º 16

0

Exibir arquivo

Arquivo: tests.py Projeto: ziXet/django-postgres-copy

 def test_silent_save(self):
     c = CopyMapping(
         MockObject,
         self.name_path,
         dict(name='NAME', number='NUMBER', dt='DATE'),
     )
     c.save(silent=True)
     self.assertEqual(MockObject.objects.count(), 3)
     self.assertEqual(MockObject.objects.get(name='BEN').number, 1)
     self.assertEqual(
         MockObject.objects.get(name='BEN').dt, date(2012, 1, 1))

Exemplo n.º 17

0

Exibir arquivo

Arquivo: tests.py Projeto: mikejcorey/django-postgres-copy

 def test_static_values(self):
     c = CopyMapping(ExtendedMockObject,
                     self.name_path,
                     dict(name='NAME', number='NUMBER', dt='DATE'),
                     static_mapping=dict(static_val=1,
                                         static_string='test'))
     c.save()
     self.assertEqual(
         ExtendedMockObject.objects.filter(static_val=1).count(), 3)
     self.assertEqual(
         ExtendedMockObject.objects.filter(static_string='test').count(), 3)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: import_data.py Projeto: davidmohrmann/django-movies

 def handle(self, *args, **kwargs):
     c = CopyMapping(
         # Give it the model
         Person,
         # The path to your CSV
         '/path/to/my/data.csv',
         # And a dict mapping the  model fields to CSV headers
         dict(name='NAME', number='NUMBER', dt='DATE')
     )
     # Then save it.
     c.save()

Exemplo n.º 19

0

Exibir arquivo

Arquivo: tests.py Projeto: mikejcorey/django-postgres-copy

 def test_pipe_save(self):
     c = CopyMapping(
         MockObject,
         self.pipe_path,
         dict(name='NAME', number='NUMBER', dt='DATE'),
         delimiter="|",
     )
     c.save()
     self.assertEqual(MockObject.objects.count(), 3)
     self.assertEqual(MockObject.objects.get(name='BEN').number, 1)
     self.assertEqual(
         MockObject.objects.get(name='BEN').dt, date(2012, 1, 1))

Exemplo n.º 20

0

Exibir arquivo

Arquivo: tests.py Projeto: california-civic-data-coalition/django-postgres-copy

 def test_limited_save(self):
     c = CopyMapping(
         LimitedMockObject,
         self.name_path,
         dict(name='NAME', dt='DATE')
     )
     c.save()
     self.assertEqual(LimitedMockObject.objects.count(), 3)
     self.assertEqual(
         LimitedMockObject.objects.get(name='BEN').dt,
         date(2012, 1, 1)
     )

Exemplo n.º 21

0

Exibir arquivo

    def handle(self, *args, **options):
        SalaryRecord.objects.all().delete()
        clean_file_path = os.path.join(settings.DATA_DIR,
                                       'berkeley_faculty.csv')

        fields = [
            field.name for field in SalaryRecord._meta.get_fields()
            if field.name != 'id'
        ]
        mapping = {field: field for field in fields}

        c = CopyMapping(SalaryRecord, clean_file_path, mapping)
        c.save()

Exemplo n.º 22

0

Exibir arquivo

Arquivo: tests.py Projeto: miketahani/django-postgres-copy

 def test_silent_save(self):
     c = CopyMapping(
         MockObject,
         self.name_path,
         dict(name='NAME', number='NUMBER', dt='DATE'),
     )
     c.save(silent=True)
     self.assertEqual(MockObject.objects.count(), 3)
     self.assertEqual(MockObject.objects.get(name='BEN').number, 1)
     self.assertEqual(
         MockObject.objects.get(name='BEN').dt,
         date(2012, 1, 1)
     )

Exemplo n.º 23

0

Exibir arquivo

Arquivo: tests.py Projeto: miketahani/django-postgres-copy

 def test_field_override_save(self):
     c = CopyMapping(
         MockObject,
         self.null_path,
         dict(name='NAME', number='NUMBER', dt='DATE'),
     )
     c.save()
     self.assertEqual(MockObject.objects.count(), 5)
     self.assertEqual(MockObject.objects.get(name='BADBOY').number, None)
     self.assertEqual(
         MockObject.objects.get(name='BEN').dt,
         date(2012, 1, 1)
     )

Exemplo n.º 24

0

Exibir arquivo

Arquivo: tests.py Projeto: miketahani/django-postgres-copy

 def test_pipe_save(self):
     c = CopyMapping(
         MockObject,
         self.pipe_path,
         dict(name='NAME', number='NUMBER', dt='DATE'),
         delimiter="|",
     )
     c.save()
     self.assertEqual(MockObject.objects.count(), 3)
     self.assertEqual(MockObject.objects.get(name='BEN').number, 1)
     self.assertEqual(
         MockObject.objects.get(name='BEN').dt,
         date(2012, 1, 1)
     )

Exemplo n.º 25

0

Exibir arquivo

Arquivo: tests.py Projeto: Medisapiens/django-postgres-copy

    def test_save_foreign_key(self):
        c = CopyMapping(
            MockObject,
            self.foreign_path,
            dict(name='NAME', number='NUMBER', dt='DATE', parent='PARENT')
        )

        c.save()
        self.assertEqual(MockObject.objects.count(), 3)
        self.assertEqual(MockObject.objects.get(name='BEN').parent_id, 4)
        self.assertEqual(
            MockObject.objects.get(name='BEN').dt,
            date(2012, 1, 1)
        )

Exemplo n.º 26

0

Exibir arquivo

Arquivo: tests.py Projeto: california-civic-data-coalition/django-postgres-copy

 def test_static_values(self):
     c = CopyMapping(
         ExtendedMockObject,
         self.name_path,
         dict(name='NAME', number='NUMBER', dt='DATE'),
         static_mapping={'static_val':1,'static_string':'test'}
     )
     c.save()
     self.assertEqual(
         ExtendedMockObject.objects.filter(static_val = 1).count(),
         3
     )
     self.assertEqual(
         ExtendedMockObject.objects.filter(static_string = 'test').count(),
         3
     )

Exemplo n.º 27

0

Exibir arquivo

Arquivo: tests.py Projeto: ckirby/django-postgres-copy

 def test_overload_save(self):
     c = CopyMapping(
         OverloadMockObject,
         self.name_path,
         dict(name='NAME', number='NUMBER', dt='DATE'),
         overloaded_mapping=dict(lower_name='NAME')
     )
     c.save()
     self.assertEqual(OverloadMockObject.objects.count(), 3)
     self.assertEqual(OverloadMockObject.objects.get(name='BEN').number, 1)
     self.assertEqual(OverloadMockObject.objects.get(lower_name='ben').number, 1)
     self.assertEqual(
         OverloadMockObject.objects.get(name='BEN').dt,
         date(2012, 1, 1)
     )
     omo = OverloadMockObject.objects.first()
     self.assertEqual(omo.name.lower(), omo.lower_name)

Exemplo n.º 28

0

Exibir arquivo

Arquivo: loadcalaccessrawfile.py Projeto: nbedi/django-calaccess-raw-data

    def load_postgresql(self):
        """
        Load the file into a PostgreSQL database using COPY
        """
        # Drop all the records from the target model's real table
        self.cursor.execute('TRUNCATE TABLE "%s" CASCADE' % (self.model._meta.db_table))

        c = CopyMapping(
            self.model, self.csv, dict((f.name, f.db_column) for f in self.model._meta.fields), using=self.database
        )
        c.save(silent=True)

        # Print out the results
        if self.verbosity > 2:
            csv_count = self.get_row_count()
            model_count = self.model.objects.count()
            self.finish_load_message(model_count, csv_count)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: loadcalaccessrawfile.py Projeto: yujiap/django-calaccess-raw-data

    def load_postgresql(self, model, csv_path):
        """
        Takes a model and a csv_path and loads it into postgresql
        """
        # Drop all the records from the target model's real table
        self.cursor.execute('TRUNCATE TABLE "%s" CASCADE' %
                            (model._meta.db_table))

        c = CopyMapping(
            model,
            csv_path,
            dict((f.name, f.db_column) for f in model._meta.fields),
        )
        c.save(silent=True)

        # Print out the results
        csv_count = self.get_row_count(csv_path)
        model_count = model.objects.count()
        self.finish_load_message(model_count, csv_count)

Exemplo n.º 30

0

Exibir arquivo

Arquivo: loadcalaccessrawfile.py Projeto: geraldarthur/django-calaccess-raw-data

    def load_postgresql(self, model, csv_path):
        """
        Takes a model and a csv_path and loads it into postgresql
        """
        # Drop all the records from the target model's real table
        self.cursor.execute('TRUNCATE TABLE "%s" CASCADE' % (
            model._meta.db_table
        ))

        c = CopyMapping(
            model,
            csv_path,
            dict((f.name, f.db_column) for f in model._meta.fields),
        )
        c.save(silent=True)

        # Print out the results
        csv_count = self.get_row_count(csv_path)
        model_count = model.objects.count()
        self.finish_load_message(model_count, csv_count)

Exemplo n.º 31

0

Exibir arquivo

Arquivo: tests.py Projeto: mikejcorey/django-postgres-copy

 def test_overload_save(self):
     c = CopyMapping(
         OverloadMockObject,
         self.name_path,
         dict(name='NAME',
              lower_name='NAME',
              upper_name='NAME',
              number='NUMBER',
              dt='DATE'),
     )
     c.save()
     self.assertEqual(OverloadMockObject.objects.count(), 3)
     self.assertEqual(OverloadMockObject.objects.get(name='ben').number, 1)
     self.assertEqual(
         OverloadMockObject.objects.get(lower_name='ben').number, 1)
     self.assertEqual(
         OverloadMockObject.objects.get(upper_name='BEN').number, 1)
     self.assertEqual(
         OverloadMockObject.objects.get(name='ben').dt, date(2012, 1, 1))
     omo = OverloadMockObject.objects.first()
     self.assertEqual(omo.name.lower(), omo.lower_name)

Exemplo n.º 32

0

Exibir arquivo

Arquivo: loadcalaccessrawfile.py Projeto: mathiasfls/django-calaccess-raw-data

    def load_postgresql(self):
        """
        Load the file into a PostgreSQL database using COPY
        """
        # Drop all the records from the target model's real table
        self.cursor.execute('TRUNCATE TABLE "%s" CASCADE' %
                            (self.model._meta.db_table))

        c = CopyMapping(
            self.model,
            self.csv,
            dict((f.name, f.db_column) for f in self.model._meta.fields),
            using=self.database,
        )
        c.save(silent=True)

        # Print out the results
        if self.verbosity > 2:
            csv_count = self.get_row_count()
            model_count = self.model.objects.count()
            self.finish_load_message(model_count, csv_count)

Exemplo n.º 33

0

Exibir arquivo

Arquivo: views.py Projeto: delta-crew/sana.protocol_builder

    def import_csv(self, request):
        if 'csv' not in request.FILES:
            return JsonResponse(
                status=status.HTTP_400_BAD_REQUEST,
                data={
                    'success': False,
                    'errors': ["Missing 'csv' parameter"]
                }
            )

        csv = request.FILES['csv']

        file_path = "/tmp/{0}.csv".format(str(uuid.uuid4()))

        with open(file_path, 'wb+') as destination:
            for chunk in csv.chunks():
                destination.write(chunk)

        try:
            copy_mapping = CopyMapping(
                models.Concept,
                file_path,
                ConceptViewSet.CSV_COLUMN_MAPPING
            )
            copy_mapping.save()
        except (ValueError, DatabaseError):
            expected_columns = ', '.join(ConceptViewSet.CSV_COLUMN_MAPPING.keys())
            return JsonResponse(
                status=status.HTTP_400_BAD_REQUEST,
                data={
                    'success': False,
                    'errors': ['CSV import error. Expected columns are {0}'.format(expected_columns)]
                }
            )
        finally:
            os.remove(file_path)

        return JsonResponse({
            'success': True,
        })

Exemplo n.º 34

0

Exibir arquivo

Arquivo: views.py Projeto: m-socha/sana.protocol_builder

    def import_csv(self, request):
        if 'csv' not in request.FILES:
            return JsonResponse(status=status.HTTP_400_BAD_REQUEST,
                                data={
                                    'success': False,
                                    'errors': ["Missing 'csv' parameter"]
                                })

        csv = request.FILES['csv']

        file_path = "/tmp/{0}.csv".format(str(uuid.uuid4()))

        with open(file_path, 'wb+') as destination:
            for chunk in csv.chunks():
                destination.write(chunk)

        try:
            copy_mapping = CopyMapping(models.Concept, file_path,
                                       ConceptViewSet.CSV_COLUMN_MAPPING)
            copy_mapping.save()
        except (ValueError, DatabaseError):
            expected_columns = ', '.join(
                ConceptViewSet.CSV_COLUMN_MAPPING.keys())
            return JsonResponse(
                status=status.HTTP_400_BAD_REQUEST,
                data={
                    'success':
                    False,
                    'errors': [
                        'CSV import error. Expected columns are {0}'.format(
                            expected_columns)
                    ]
                })
        finally:
            os.remove(file_path)

        return JsonResponse({
            'success': True,
        })

Exemplo n.º 35

0

Exibir arquivo

Arquivo: views.py Projeto: anand-tiwari/Django-Postgresql

def insertRecords(requet):
    # save the data into model of Django
    c = CopyMapping(
            # Give it the model
            Product,
            # The path to your CSV
            'file.csv',
            # And a dict mapping the  model fields to CSV headers
            dict(category='category', name='name', image='image', price='price')
    )
    # Then save it.
    c.save()

    # save the record into postgresql database 
    conn = psycopg2.connect("dbname='testdb' user='******' host='localhost' password='******'")
    cur = conn.cursor()
    filesource = 'file.csv'
    reader = csv.reader(open(filesource, 'rb'))
    i=0
    
    for row in reader:
        if(i!=0):
            cur.execute("INSERT INTO test_table (category, image, price, name) VALUES(%s, %s, %s, %s)",(row[0],row[1],row[2],row[3]))
        i=1
        conn.commit()
    conn.close()
    
    # used for accesing data from postgresql database
    conn = psycopg2.connect("dbname='testdb' user='******' host='localhost' password='******'")
    cur = conn.cursor()
    cur.execute("""
        select * from test_table
        """)
    results = []
    rows = cur.fetchall()
    for row in rows:
        results.append(row)

    return render(requet,'insert.html',{})

Exemplo n.º 36

0

Exibir arquivo

Arquivo: import_ohlcv_csv.py Projeto: xbrlware/econometrika

 def handle(self, *args, **options):
     if options['ticker']:
         symbols = [Symbol.objects.get(ticker=options['ticker'])]
     else:
         symbols = Symbol.objects.all().order_by('ticker')
     curdir = os.path.dirname(os.path.realpath(__file__))
     for symbol in symbols:
         path = os.path.normpath(os.path.join(curdir, os.pardir, os.pardir, os.pardir, 'data', 'ohlcv', '%s.csv' % symbol.ticker))
         print('Importing %s' % path)
         try:
             c = CopyMapping(
                 # Give it the model
                 SymbolQuote,
                 # The path to your CSV
                 path,
                 # And a dict mapping the  model fields to CSV headers
                 dict(date='date', open='open', close='close', high='high', low='low', volume='volume'),
                 static_mapping = {'symbol_id': symbol.id}
             )
             c.save()
             self.stdout.write(self.style.SUCCESS('Successfully imported OHLCV rows for %s' % symbol.name))
         except ValueError:
             self.stdout.write(self.style.ERROR('Could not find CSV file for %s in %s' % (symbol.name, path)))

Exemplo n.º 37

0

Exibir arquivo

Arquivo: tests.py Projeto: Medisapiens/django-postgres-copy

    def test_static_mapping_ignore_non_mapped_headers(self):
        c = CopyMapping(
            BasicMockObject,
            self.name_path,
            dict(name='NAME', number='NUMBER', dt='DATE'),
        )
        c.save()
        self.assertEqual(BasicMockObject.objects.count(), 3)

        c = CopyMapping(
            BasicMockObject,
            self.fk_mapping_path,
            dict(name='NAME', number='NUMBER', dt='DATE'),
            static_mapping={'parent': BasicMockObject.objects.get(name='ben').pk},
            ignore_non_mapped_headers=True
        )
        c.save()
        self.assertEqual(BasicMockObject.objects.count(), 6)
        self.assertEqual(
            list(BasicMockObject.objects.order_by('name').
                 values_list('name', 'parent__name')),
            [('ben', None), ('ben junior', 'ben'), ('jane', None),
             ('jane junior', 'ben'), ('joe', None), ('joe junior', 'ben')]
        )

Exemplo n.º 38

0

Exibir arquivo

    def handle(self, *args, **options):
        """
        For each of the filing types we support, loops through the downloaded
        CSVs of that type, using the CopyMapping library to dump the records
        into the raw database, preserving all fields from the original data.
        """
        self.header("Load raw FEC filings")
        name_to_model = {
            "F3X": RawF3XFiling,
            "F3P": RawF3PFiling,
            "F24": RawF24Filing,
            "sa": RawContribution,
            "se": RawIndependentExpenditure,
        }

        if options['flush']:
            self.log('Flushing the DB')
            for model in name_to_model.values():
                model.objects.all().delete()

        # Characters to strip from filenames to get filing number
        ignore = ascii_letters + punctuation
        loaded = 0

        for name, model in name_to_model.items():
            data_dir = os.path.join(self.DATA_DIR, name)
            # If, for some reason, we have no filings of this type
            if not os.path.exists(data_dir):
                continue

            # We're making a mapping of CSV to model fields for CopyMapping
            # In this case, they're identical.
            mapping = {
                field: field
                for field in model._meta.get_all_field_names()
            }

            for file_name in os.listdir(data_dir):
                # Hidden files will mess with us
                if not file_name.startswith('.'):
                    # Remove our list of ignore characters from the file name
                    # to give us just the filing number.
                    filing_no = int(file_name.translate(None, ignore))

                    if model.objects.filter(filing_no=filing_no).exists():
                        self.log(
                            '{} CSV for filing {} is already in the DB'.format(
                                name, filing_no))
                    else:
                        self.log('Loading {} CSV for filing {}'.format(
                            name, filing_no))
                        try:
                            c = CopyMapping(model,
                                            os.path.join(data_dir, file_name),
                                            mapping)
                            c.save(silent=True)
                            loaded += 1
                        except:
                            self.failure(
                                'Error on {} CSV for filing {}'.format(
                                    name, filing_no))

        self.success("Loaded {} CSVs".format(loaded))

Exemplo n.º 39

0

Exibir arquivo

Arquivo: import_csv_psql_copy.py Projeto: greglinch/mccelections-1

    def handle(self, *args, **options):
        d = datetime.datetime

        start_time = d.now()
        self.stdout.write('\n')

        initial_message = 'About to loop through all files saved in %s directory and update Result Stage table for %s. \n' % (
            save_date_string, election_date_string)

        self.stdout.write(initial_message)

        for file in os.listdir(file_directory):

            full_file_path = file_directory + '/' + file
            delete_all_resultstage_objects(self)

            start_message = 'Started loading file at ' + str(
                unicode(start_time)) + ' for ' + election_date_string

            c = CopyMapping(
                # Give it the model
                ResultStage,
                # The path to your CSV
                full_file_path,
                # And a dict mapping the model fields to CSV headers
                dict(id='id',
                     unique_id='unique_id',
                     raceid='raceid',
                     racetype='racetype',
                     racetypeid='racetypeid',
                     ballotorder='ballotorder',
                     candidateid='candidateid',
                     description='description',
                     electiondate='electiondate',
                     fipscode='fipscode',
                     first='first',
                     incumbent='incumbent',
                     initialization_data='initialization_data',
                     is_ballot_measure='is_ballot_measure',
                     last='last',
                     lastupdated='lastupdated',
                     level='level',
                     national='national',
                     officeid='officeid',
                     officename='officename',
                     party='party',
                     polid='polid',
                     polnum='polnum',
                     precinctsreporting='precinctsreporting',
                     precinctsreportingpct='precinctsreportingpct',
                     precinctstotal='precinctstotal',
                     reportingunitid='reportingunitid',
                     reportingunitname='reportingunitname',
                     runoff='runoff',
                     seatname='seatname',
                     seatnum='seatnum',
                     statename='statename',
                     statepostal='statepostal',
                     test='test',
                     uncontested='uncontested',
                     votecount='votecount',
                     votepct='votepct',
                     winner='winner'))

            loading_message = 'Loading %s.' % (full_file_path)
            self.stdout.write(loading_message)
            # initial_message = '%s.' % ()
            self.stdout.write('\n')

            c.save()
            self.stdout.write('Saved')

            end_time = d.now()
            import_length = str(unicode(end_time - start_time))
            end_message = 'Finished copying snapshot at ' + str(
                unicode(end_time))
            self.stdout.write('\n')

            self.stdout.write(end_message)
            self.stdout.write('\n')

            self.stdout.write('Loading this snapshot took ' + import_length)

            self.stdout.write('Sleeping for %s') % (sleep_time)
            time.sleep(sleep_time)
            self.stdout.write('--------------------------------------')

Exemplo n.º 40

0

Exibir arquivo

Arquivo: import_data.py Projeto: erasche/galactic-radio-telescope

    def import_report(self, instance, report_id):
        report_base = os.path.join(instance.report_dir, report_id)
        logging.info("[%s] Processing %s", instance.id, report_base)

        # first load the metadata
        with open(report_base + '.json', 'r') as handle:
            meta = json.load(handle)
            try:
                validate(meta)
            except Exception as e:
                logging.exception(e)
                return False

        # Next we'll extract files.
        extracted_files = []
        data_map = {}
        with tarfile.open(report_base + '.tar.gz', 'r') as tar:
            for member in tar:
                guessed = self.fix_name(member)
                if guessed == 'unknown':
                    continue

                # fancy safe name.
                tmpname = uuid.uuid4().hex + '.tsv'
                extracted_to = os.path.join(TMPDIR, tmpname)
                logging.info("[%s] Extracting %s to %s, guessed:%s", instance.id, member.name, extracted_to, guessed)
                # Record where the 'params' file is or the 'metrics' file.
                data_map[guessed] = extracted_to
                # Change the archive member's name in order to ensure that it
                # is extracted to somewhere with a safe name.
                member.name = tmpname
                # Extract into CWD.
                tar.extract(member, TMPDIR)
                # Track where we put it for cleanup later.
                extracted_files.append(extracted_to)

        if 'jobs' in data_map:
            c = CopyMapping(
                Job,
                data_map['jobs'],
                dict(external_job_id='id', tool_id='tool_id',
                    tool_version='tool_version', state='state',
                    create_time='create_time'),
                quote_character="\b",
                static_mapping={
                    'instance_id': instance.id,
                },
                delimiter='\t'
            )
            c.save()

        if 'params' in data_map:
            c = CopyMapping(
                JobParam,
                data_map['params'],
                dict(external_job_id='job_id', name='name', value='value'),
                quote_character="\b",
                static_mapping={
                    'instance_id': instance.id,
                },
                delimiter='\t'
            )
            c.save()

        if 'metric_num' in data_map:
            c = CopyMapping(
                MetricNumeric,
                data_map['metric_num'],
                dict(external_job_id='job_id',
                    plugin='plugin', name='name', value='value'),
                quote_character="\b",
                static_mapping={
                    'instance_id': instance.id,
                },
                delimiter='\t'
            )
            c.save()

        if 'datasets' in data_map:
            c = CopyMapping(
                Dataset,
                data_map['datasets'],
                dict(
                    external_job_id='job_id',
                    external_dataset_id='dataset_id',
                    extension='extension',
                    file_size='file_size',
                    param_name='param_name',
                    file_type='type',
                ),
                quote_character="\b",
                static_mapping={
                    'instance_id': instance.id,
                },
                delimiter='\t'
            )
            c.save()

        for f in extracted_files:
            try:
                logging.info("[%s] Cleaning up %s", instance.id, f)
                os.unlink(f)
            except Exception as e:
                logging.exception(e)

        if 'users' in meta:
            if 'active' in meta['users']:
                instance.users_recent = meta['users']['active']
            if 'total' in meta['users']:
                instance.users_total = meta['users']['total']
        if 'jobs' in meta:
            if 'ok' in meta['jobs']:
                instance.jobs_run = meta['jobs']['ok']

        instance.last_import = report_id
        instance.save()
        return True

Exemplo n.º 41

0

Exibir arquivo

    def import_report(self, instance, report_id):
        report_base = os.path.join(instance.report_dir, report_id)
        logging.info("[%s] Processing %s", instance.id, report_base)

        # first load the metadata
        with open(report_base + '.json', 'r') as handle:
            meta = json.load(handle)
            try:
                validate(meta)
            except Exception as e:
                logging.exception(e)
                return False

        # Next we'll extract files.
        extracted_files = []
        data_map = {}
        with tarfile.open(report_base + '.tar.gz', 'r') as tar:
            for member in tar:
                guessed = self.fix_name(member)
                if guessed == 'unknown':
                    continue

                # fancy safe name.
                tmpname = uuid.uuid4().hex + '.tsv'
                extracted_to = os.path.join(tempfile.gettempdir(), 'grt',
                                            tmpname)
                logging.info("[%s] Extracting %s to %s, guessed:%s",
                             instance.id, member.name, extracted_to, guessed)
                # Record where the 'params' file is or the 'metrics' file.
                data_map[guessed] = extracted_to
                # Change the archive member's name in order to ensure that it
                # is extracted to somewhere with a safe name.
                member.name = tmpname
                # Extract into CWD.
                tar.extract(member, TMPDIR)
                # Track where we put it for cleanup later.
                extracted_files.append(extracted_to)

        if 'jobs' in data_map:
            c = CopyMapping(Job,
                            data_map['jobs'],
                            dict(external_job_id='id',
                                 tool_id='tool_id',
                                 tool_version='tool_version',
                                 state='state',
                                 create_time='create_time'),
                            quote_character="\b",
                            static_mapping={
                                'instance_id': instance.id,
                            },
                            delimiter='\t')
            c.save()

        if 'params' in data_map:
            c = CopyMapping(JobParam,
                            data_map['params'],
                            dict(external_job_id='job_id',
                                 name='name',
                                 value='value'),
                            quote_character="\b",
                            static_mapping={
                                'instance_id': instance.id,
                            },
                            delimiter='\t')
            c.save()

        if 'metric_num' in data_map:
            c = CopyMapping(MetricNumeric,
                            data_map['metric_num'],
                            dict(external_job_id='job_id',
                                 plugin='plugin',
                                 name='name',
                                 value='value'),
                            quote_character="\b",
                            static_mapping={
                                'instance_id': instance.id,
                            },
                            delimiter='\t')
            c.save()

        if 'datasets' in data_map:
            c = CopyMapping(Dataset,
                            data_map['datasets'],
                            dict(
                                external_job_id='job_id',
                                external_dataset_id='dataset_id',
                                extension='extension',
                                file_size='file_size',
                                param_name='param_name',
                                file_type='type',
                            ),
                            quote_character="\b",
                            static_mapping={
                                'instance_id': instance.id,
                            },
                            delimiter='\t')
            c.save()

        for f in extracted_files:
            try:
                logging.info("[%s] Cleaning up %s", instance.id, f)
                os.unlink(f)
            except Exception as e:
                logging.exception(e)

        if 'users' in meta:
            if 'active' in meta['users']:
                instance.users_recent = meta['users']['active']
            if 'total' in meta['users']:
                instance.users_total = meta['users']['total']
        if 'jobs' in meta:
            if 'ok' in meta['jobs']:
                instance.jobs_run = meta['jobs']['ok']

        instance.last_import = report_id
        instance.save()
        return True

Exemplo n.º 42

0

Exibir arquivo

    def handle(self, *args, **kwargs):

        # Check if data source is from a directory or a url and assign input_file to selected option
        if kwargs['typesource'] == 'web':
            # Write content from url into csv file
            url = str(kwargs['inputfile'])
            logger.info('URL: {0}'.format(url))
            req = requests.get(url)
            url_content = req.content
            csv_path = str(
                Path(kwargs['directory'] + '/' + kwargs['inputfile']))
            csv_file = open(csv_path, 'wb')
            csv_file.write(url_content)
            csv_file.close()
            input_file = csv_path
        elif kwargs['typesource'] == 'directory':
            input_file = Path(kwargs['inputfile'])
            logger.info('INPUT FILE: {0}'.format(input_file))
        else:
            logger.info(
                'WARNING (lwf_csv_import.py) non-valid value entered for "typesource": {0}'
                .format(kwargs['typesource']))
            return

        # Get the parent class, assumes parent class is in module within lwf/models directory
        parent_name = kwargs['parentclass'].rsplit('.', 1)[-1]
        package = importlib.import_module("lwf.models." + parent_name)
        parent_class = getattr(package, parent_name)

        # Assign variables used to write csv_temporary
        csv_temporary = Path(kwargs['directory'] + '/' + kwargs['model'] +
                             '_temporary.csv')
        input_fields = parent_class.input_fields
        database_fields = [field.name for field in parent_class._meta.fields]
        date_format = parent_class.date_format
        model_class = None
        written_timestamps = []
        rows_before = 24
        rows_after = 0
        rows_buffer = []
        nead_header = []

        # Write data in input_file into csv_temporary with additional computed fields
        try:
            with open(csv_temporary, 'w',
                      newline='') as sink, open(input_file, 'r') as source:

                sink.write(','.join(database_fields) + '\n')
                records_written = 0

                # Skip number of header lines designated in parent class header line count
                for i in range(parent_class.header_line_count):
                    first_lines = source.readline()
                    nead_header.append(first_lines)
                    next(source, None)

                while True:

                    line = source.readline()

                    if not line:
                        break
                    line_array = [
                        v for v in line.strip().split(parent_class.delimiter)
                        if len(v) > 0
                    ]

                    # Skip header lines that start with designated parent class header symbol
                    # For example: the '#' character
                    if line.startswith(parent_class.header_symbol):
                        nead_header.append(line)
                        continue

                    if len(line_array) != len(input_fields):
                        error_msg = "Line has {0} values, header {1} columns ".format(
                            len(line_array), len(input_fields))
                        logger.error(error_msg)
                        raise ValueError(error_msg)

                    row = {
                        input_fields[i]: line_array[i]
                        for i in range(len(line_array))
                    }

                    # Process row and add new computed fields
                    # Check which kind of cleaner should be applied
                    if kwargs['parentclass'] == 'LWFMeteo':
                        line_clean = get_lwf_meteo_line_clean(row, date_format)
                    elif kwargs['parentclass'] == 'LWFStation':
                        line_clean = get_lwf_station_line_clean(
                            row, date_format)
                    else:
                        logger.info(
                            'WARNING (lwf_csv_import.py) {0} parentclass does not exist'
                            .format(kwargs['parentclass']))
                        return

                    # Get the model
                    class_name = kwargs['model'].rsplit('.', 1)[-1]
                    package = importlib.import_module("lwf.models")
                    model_class = getattr(package, class_name)

                    # Make timestamp_iso value a UTC timezone aware datetime object
                    dt_obj = line_clean['timestamp_iso']
                    aware_dt = make_aware(dt_obj)

                    # Check if record with identical timestamp already exists in table, otherwise write record to
                    # temporary csv file after checking for record with duplicate timestamp
                    try:
                        model_class.objects.get(timestamp_iso=aware_dt)
                    except model_class.DoesNotExist:
                        if line_clean[
                                'timestamp_iso'] not in written_timestamps:
                            # keep timestamps length small
                            written_timestamps = written_timestamps[
                                (-1) * min(len(written_timestamps), 1000):]
                            written_timestamps += [line_clean['timestamp_iso']]

                            # slide the row buffer window
                            rows_buffer = rows_buffer[
                                (-1) * min(len(rows_buffer), rows_before +
                                           rows_after):] + [line_clean]

                            # check values before and after
                            if len(rows_buffer) > rows_after:
                                sink.write(','.join([
                                    "{0}".format(v) for v in
                                    rows_buffer[-(1 + rows_after)].values()
                                ]) + '\n')
                                records_written += 1

                # Write nead header configuration file if applicable
                if len(nead_header) > parent_class.header_line_count:
                    header_symbol = parent_class.header_symbol
                    write_nead_config('lwf', nead_header, kwargs['model'],
                                      kwargs['parentclass'], header_symbol)

        except FileNotFoundError as e:
            logger.info(
                'WARNING (lwf_csv_import.py) file not found {0}, exception {1}'
                .format(input_file, e))
            return

        if model_class is None:
            logger.info(
                'WARNING (lwf_csv_import.py) no data found for {0}'.format(
                    kwargs['model']))
            return

        # Assign copy_dictionary from database_fields
        copy_dictionary = {
            database_fields[i]: database_fields[i]
            for i in range(0, len(database_fields))
        }

        # Import processed and cleaned data into Postgres database
        c = CopyMapping(

            # Assign model
            model_class,

            # Temporary CSV with input data and computed fields
            csv_temporary,

            # Dictionary mapping the model fields to CSV fields
            copy_dictionary,
        )
        # Then save it.
        c.save()

        # Log import message
        logger.info(
            '{0} successfully imported, {1} new record(s) written in {2}'.
            format((kwargs['inputfile']), records_written, (kwargs['model'])))

        # Delete csv_temporary
        os.remove(csv_temporary)

Exemplo n.º 43

0

Exibir arquivo

Arquivo: loadfecrawdata.py Projeto: datadesk/django-fec-raw-data

    def handle(self, *args, **options):
        """
        For each of the filing types we support, loops through the downloaded
        CSVs of that type, using the CopyMapping library to dump the records
        into the raw database, preserving all fields from the original data.
        """
        self.header("Load raw FEC filings")
        name_to_model = {
            "F3X": RawF3XFiling,
            "F3P": RawF3PFiling,
            "F24": RawF24Filing,
            "sa": RawContribution,
            "se": RawIndependentExpenditure,
        }

        if options['flush']:
            self.log('Flushing the DB')
            for model in name_to_model.values():
                model.objects.all().delete()

        # Characters to strip from filenames to get filing number
        ignore = ascii_letters + punctuation
        loaded = 0

        for name, model in name_to_model.items():
            data_dir = os.path.join(self.DATA_DIR, name)
            # If, for some reason, we have no filings of this type
            if not os.path.exists(data_dir):
                continue

            # We're making a mapping of CSV to model fields for CopyMapping
            # In this case, they're identical.
            mapping = {
                field: field for field in model._meta.get_all_field_names()
            }

            for file_name in os.listdir(data_dir):
                # Hidden files will mess with us
                if not file_name.startswith('.'):
                    # Remove our list of ignore characters from the file name
                    # to give us just the filing number.
                    filing_no = int(file_name.translate(None, ignore))

                    if model.objects.filter(filing_no=filing_no).exists():
                        self.log(
                            '{} CSV for filing {} is already in the DB'.format(
                                name,
                                filing_no
                            )
                        )
                    else:
                        self.log(
                            'Loading {} CSV for filing {}'.format(
                                name,
                                filing_no
                            )
                        )
                        try:
                            c = CopyMapping(
                                model,
                                os.path.join(data_dir, file_name),
                                mapping
                            )
                            c.save(silent=True)
                            loaded += 1
                        except:
                            self.failure('Error on {} CSV for filing {}'.format(name, filing_no))

        self.success("Loaded {} CSVs".format(loaded))

Exemplo n.º 44

0

Exibir arquivo

    def handle(self, *args, **kwargs):

        # Assign kwargs from url to variables
        inputfile = kwargs['inputfile']
        typesource = kwargs['typesource']
        directory = kwargs['directory']
        app = kwargs['app']
        model = kwargs['model']

        # Validate app
        if not apps.is_installed(app):
            logger.error(f' ERROR app {app} not found')
            return

        # Validate model
        try:
            model_class = get_model_cl(app, model)
        except AttributeError as e:
            logger.error(f' ERROR model {model} not found, exception {e}')
            return

        # Check if data source is from a directory or a url and assign input_file to selected option
        if typesource == 'web':

            # Assign url to inputfile
            url = str(inputfile)

            # Check if sizelimit kwarg passed
            size_limit = kwargs['sizelimit']
            if size_limit:

                # Get size of file to download in megabytes
                info_url = requests.head(url)
                size_bytes_url = info_url.headers['Content-Length']
                size_mb_url = int(size_bytes_url) / (1024 * 1024)

                # If file to download is over limit then log error and stop processing
                if size_mb_url > int(size_limit):
                    logger.error(
                        f' ERROR {url} is larger than maximum size allowed: {size_limit} MB'
                    )
                    return

            # logger.info(f' Started importing input URL: {url}')
            req = requests.get(url)
            url_content = req.content
            input_file = Path(f'{directory}/{model}_downloaded.csv')
            csv_file = open(input_file, 'wb')
            csv_file.write(url_content)
            csv_file.close()

        elif typesource == 'directory':
            input_file = Path(inputfile)
            logger.info(f' Started importing input file: {input_file}')

        else:
            logger.error(
                f' ERROR non-valid value entered for "typesource": {typesource}'
            )
            return

        # Get parent class name
        parent_class_name = model_class.__base__.__name__

        # Get line cleaner function
        try:
            line_cleaner = self.get_line_cleaner(parent_class_name)
        except Exception as e:
            logger.error(e)
            return

        # Assign other variables used to write csv_temporary
        csv_temporary = Path(f'{directory}/{model}_temporary.csv')
        input_fields = model_class.input_fields
        database_fields = [
            field.name for field in model_class._meta.fields
            if field.name != 'id'
        ]
        date_format = model_class.date_format
        written_timestamps = []
        rows_before = 24
        rows_after = 0
        rows_buffer = []
        nead_header = []

        # Write data in input_file into csv_temporary with additional computed fields
        try:
            with open(csv_temporary, 'w',
                      newline='') as sink, open(input_file, 'r') as source:

                sink.write(','.join(database_fields) + '\n')
                records_written = 0

                # Skip number of header lines designated in parent class header line count
                for i in range(model_class.header_line_count):
                    first_lines = source.readline()
                    nead_header.append(first_lines)
                    next(source, None)

                while True:

                    line = source.readline()

                    if not line:
                        break
                    line_array = [
                        v for v in line.strip().split(model_class.delimiter)
                        if len(v) > 0
                    ]

                    # Skip header lines that start with designated parent class header symbol
                    # For example: the '#' character
                    if line.startswith(model_class.header_symbol):
                        nead_header.append(line)
                        continue

                    if len(line_array) != len(input_fields):
                        error_msg = f' ERROR: line has {len(line_array)} values, header has {len(input_fields)} columns'
                        logger.error(error_msg)
                        raise ValueError(error_msg)

                    row = {
                        input_fields[i]: line_array[i]
                        for i in range(len(line_array))
                    }

                    # Process row and add new computed fields
                    line_clean = line_cleaner(row, date_format)

                    # Make timestamp_iso value a UTC timezone aware datetime object
                    dt_obj = line_clean['timestamp_iso']
                    aware_dt = make_aware(dt_obj)

                    # Check if record with identical timestamp already exists in table, otherwise write record to
                    # temporary csv file after checking for record with duplicate timestamp
                    try:
                        model_class.objects.get(timestamp_iso=aware_dt)
                    except model_class.DoesNotExist:
                        if line_clean[
                                'timestamp_iso'] not in written_timestamps:
                            # keep timestamps length small
                            written_timestamps = written_timestamps[
                                (-1) * min(len(written_timestamps), 1000):]
                            written_timestamps += [line_clean['timestamp_iso']]

                            # slide the row buffer window
                            rows_buffer = rows_buffer[
                                (-1) * min(len(rows_buffer), rows_before +
                                           rows_after):] + [line_clean]

                            # check values before and after
                            if len(rows_buffer) > rows_after:
                                sink.write(','.join([
                                    "{0}".format(v) for v in
                                    rows_buffer[-(1 + rows_after)].values()
                                ]) + '\n')
                                records_written += 1

                # Write nead header configuration file if applicable
                if nead_header:
                    header_symbol = model_class.header_symbol
                    write_nead_config(app, nead_header, model,
                                      parent_class_name, header_symbol)

        except FileNotFoundError as e:
            logger.error(f' ERROR file not found {input_file}, exception {e}')
            return

        # Assign copy_dictionary from database_fields
        copy_dictionary = {
            database_fields[i]: database_fields[i]
            for i in range(0, len(database_fields))
        }

        # Import processed and cleaned data into Postgres database
        c = CopyMapping(

            # Assign model
            model_class,

            # Temporary CSV with input data and computed fields
            csv_temporary,

            # Dictionary mapping the model fields to CSV fields
            copy_dictionary,
        )
        # Then save it.
        c.save()

        # Log import message
        logger.info(
            f' Finished import: {records_written} new records written in {model}'
        )

        # Delete csv_temporary
        os.remove(csv_temporary)

        # If file downloaded from web delete it
        if os.path.isfile(f'{directory}/{model}_downloaded.csv'):
            os.remove(f'{directory}/{model}_downloaded.csv')