Exemplo n.º 1
0
    def test_doesnt_mangle_data(self, share_source):
        rd = RawData(source=share_source,
                     app_label='foo',
                     data=b'This is just some data')
        rd.save()

        assert RawData.objects.first().data == 'This is just some data'
Exemplo n.º 2
0
    def test_must_have_source(self):
        rd = RawData(data='SomeData', app_label='foo')

        with pytest.raises(IntegrityError) as e:
            rd.save()

        assert 'null value in column "source_id" violates not-null constraint' in e.value.args[
            0]
Exemplo n.º 3
0
    def handle(self, *args, **options):
        if not options['harvester'] and options['all']:
            options['harvester'] = [k for k in self.map.keys()]

        if options['harvester']:
            connection = connections['migration_source']

            # This is required to populate the connection object properly
            if connection.connection is None:
                connection.cursor()

            for source in options['harvester']:
                target = self.map[source]
                config = apps.get_app_config(target)

                print('{} -> {}'.format(source, target))
                with transaction.atomic(using='migration_source'):
                    with connection.connection.cursor(
                            'scrapi_migration') as cursor:
                        cursor.execute("""
                                SELECT "docID", raw
                                FROM webview_document
                                WHERE source = '{source}'
                            """.format(source=source))

                        with transaction.atomic():
                            record_count = 0
                            records = cursor.fetchmany(size=cursor.itersize)

                            while records:
                                bulk = []
                                for (doc_id, raw) in records:
                                    if raw is None or raw == 'null' or raw[
                                            'timestamps'] is None or raw[
                                                'timestamps'][
                                                    'harvestFinished'] is None:
                                        print('{} -> {}: {} : raw is null'.
                                              format(source, target, doc_id))
                                        continue
                                    harvest_finished = arrow.get(
                                        raw['timestamps']['harvestFinished'])
                                    data = raw['doc'].encode()
                                    bulk.append(
                                        RawData(
                                            source=config.user,
                                            app_label=config.label,
                                            provider_doc_id=doc_id,
                                            sha256=sha256(data).hexdigest(),
                                            data=data,
                                            date_seen=harvest_finished.
                                            datetime,
                                            date_harvested=harvest_finished.
                                            datetime,
                                        ))
                                RawData.objects.bulk_create(bulk)
                                record_count += len(records)
                                print('{} -> {}: {}'.format(
                                    source, target, record_count))
                                records = cursor.fetchmany(
                                    size=cursor.itersize)
Exemplo n.º 4
0
    def test_must_have_data(self, share_source):
        rd = RawData(source=share_source, app_label='foo')

        with pytest.raises(exceptions.ValidationError) as e:
            rd.clean_fields()
            rd.save()

        assert 'This field cannot be blank.' == e.value.message_dict['data'][0]
Exemplo n.º 5
0
def raw_data(share_source):
    raw_data = RawData(source=share_source, data={})
    raw_data.save()
    return raw_data
Exemplo n.º 6
0
def raw_data(share_source):
    raw_data = RawData(source=share_source, data={})
    raw_data.save()
    return raw_data