Пример #1
0
    def setUp(self):
        super(CaseMultimediaS3DBTest, self).setUp()
        with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"):
            config = settings.S3_BLOB_DB_SETTINGS

        self.s3db = TemporaryS3BlobDB(config)
        assert get_blob_db() is self.s3db, (get_blob_db(), self.s3db)
Пример #2
0
    def forwards(apps, schema_editor):
        if settings.UNIT_TESTING:
            return

        try:
            get_blob_db()
        except Exception:
            raise MigrationError(
                "Cannot get blob db:\n{error}{message}".format(
                    error=traceback.format_exc(),
                    message=BLOB_DB_NOT_CONFIGURED,
                ))

        try:
            BlobMigrationState.objects.get(slug=slug)
            return  # already migrated
        except BlobMigrationState.DoesNotExist:
            pass

        migrator = MIGRATIONS[slug]
        total = 0
        for doc_type, model_class in doc_type_tuples_to_dict(migrator.doc_types).items():
            total += get_doc_count_by_type(model_class.get_db(), doc_type)
        if total > 500:
            message = MIGRATION_INSTRUCTIONS.format(slug=slug, total=total)
            raise MigrationNotComplete(message)

        # just do the migration if the number of documents is small
        migrated, skipped = migrator.migrate()
        if skipped:
            raise MigrationNotComplete(DOCS_SKIPPED_WARNING.format(skipped))
Пример #3
0
    def setUp(self):
        db1 = TemporaryFilesystemBlobDB()
        assert get_blob_db() is db1, (get_blob_db(), db1)
        data = b'binary data not valid utf-8 \xe4\x94'
        self.not_founds = set()
        self.blob_metas = []

        for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]:
            meta = db1.put(BytesIO(data), meta=new_meta(type_code=type_code))
            lost = new_meta(type_code=type_code, content_length=42)
            self.blob_metas.append(meta)
            self.blob_metas.append(lost)
            lost.save()
            self.not_founds.add((
                lost.id,
                lost.domain,
                lost.type_code,
                lost.parent_id,
                lost.key,
            ))

        self.test_size = len(self.blob_metas)
        db2 = TemporaryFilesystemBlobDB()
        self.db = TemporaryMigratingBlobDB(db2, db1)
        assert get_blob_db() is self.db, (get_blob_db(), self.db)
        discard_migration_state(self.slug)
Пример #4
0
    def tearDownClass(cls):
        for group in Group.by_domain(DOMAIN):
            group.delete()
        delete_all_users()

        for _, item_list in cls.item_lists.items():
            item_list[0].delete()
            item_list[1].delete()

        get_blob_db().delete(key=FIXTURE_BUCKET + "/" + DOMAIN)
        cls.domain.delete()
        super(OtaFixtureTest, cls).tearDownClass()
Пример #5
0
    def setUp(self):
        super(BaseMigrationTestCase, self).setUp()
        with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"):
            config = settings.S3_BLOB_DB_SETTINGS
            self.s3db = TemporaryS3BlobDB(config)
            assert get_blob_db() is self.s3db, (get_blob_db(), self.s3db)

        FormProcessorTestUtils.delete_all_cases_forms_ledgers()
        self.domain_name = uuid.uuid4().hex
        self.domain = create_domain(self.domain_name)
        # all new domains are set complete when they are created
        DomainMigrationProgress.objects.filter(domain=self.domain_name).delete()
        self.assertFalse(should_use_sql_backend(self.domain_name))
Пример #6
0
    def test_hard_delete_forms_and_attachments(self):
        forms = [create_form_for_test(DOMAIN) for i in range(3)]
        form_ids = sorted(form.form_id for form in forms)
        forms = FormAccessorSQL.get_forms(form_ids)
        self.assertEqual(3, len(forms))

        other_form = create_form_for_test('other_domain')
        self.addCleanup(lambda: FormAccessorSQL.hard_delete_forms('other_domain', [other_form.form_id]))

        attachments = sorted(
            get_blob_db().metadb.get_for_parents(form_ids),
            key=lambda meta: meta.parent_id
        )
        self.assertEqual(3, len(attachments))

        deleted = FormAccessorSQL.hard_delete_forms(DOMAIN, form_ids[1:] + [other_form.form_id])
        self.assertEqual(2, deleted)

        forms = FormAccessorSQL.get_forms(form_ids)
        self.assertEqual(1, len(forms))
        self.assertEqual(form_ids[0], forms[0].form_id)

        for attachment in attachments[1:]:
            with self.assertRaises(BlobNotFound):
                attachment.open()

        with attachments[0].open() as content:
            self.assertIsNotNone(content.read())
        other_form = FormAccessorSQL.get_form(other_form.form_id)
        self.assertIsNotNone(other_form.get_xml())
Пример #7
0
def bulk_atomic_blobs(docs):
    """Atomic blobs persistence to be used with ``db.bulk_save(docs)``

    Blobs may be added to or deleted from objects within the context
    body. Blobs previously added with
    ``DeferredBlobMixin.deferred_put_attachment`` will be persisted
    automatically. NOTE this method will persist attachments, but it
    does not save the documents to couch. Call `db.bulk_save(docs)`
    within the context to do that.

    :param docs: A list of model objects.
    """
    save = lambda: None
    contexts = [d.atomic_blobs(save) for d in docs if hasattr(d, "atomic_blobs")]
    with ExitStack() as stack:
        for mgr in contexts:
            stack.enter_context(mgr)
        delete_blobs = []
        for doc in docs:
            if isinstance(doc, DeferredBlobMixin) and doc._deferred_blobs:
                for name, info in list(six.iteritems(doc._deferred_blobs)):
                    if info is not None:
                        doc.put_attachment(name=name, **info)
                    else:
                        meta = doc.external_blobs.pop(name, None)
                        if meta is not None:
                            delete_blobs.append(meta.key)
                        doc._deferred_blobs.pop(name)
                assert not doc._deferred_blobs, doc._deferred_blobs
        yield
        db = get_blob_db()
        for key in delete_blobs:
            db.delete(key=key)
Пример #8
0
    def fetch_attachment(self, name, stream=False):
        """Get named attachment

        :param stream: When true, return a file-like object that can be
        read at least once (streamers should not expect to seek within
        or read the contents of the returned file more than once).
        """
        db = get_blob_db()
        try:
            meta = self.external_blobs[name]
            blob = db.get(meta.id, self._blobdb_bucket())
        except (KeyError, NotFound):
            if self.migrating_blobs_from_couch:
                return super(BlobMixin, self).fetch_attachment(name, stream=stream)
            raise ResourceNotFound(u"{model} attachment: {name!r}".format(
                                   model=type(self).__name__, name=name))
        if stream:
            return blob

        with blob:
            body = blob.read()
        try:
            body = body.decode("utf-8", "strict")
        except UnicodeDecodeError:
            # Return bytes on decode failure, otherwise unicode.
            # Ugly, but consistent with restkit.wrappers.Response.body_string
            pass
        return body
Пример #9
0
    def test_export_delete(self):
        blobdb = get_blob_db()
        data_files = []
        for domain_name in [self.domain.name, self.domain2.name]:
            data_files.append(DataFile.save_blob(
                BytesIO((domain_name + " csv").encode('utf-8')),
                domain=domain_name,
                filename="data.csv",
                description="data file",
                content_type="text/csv",
                delete_after=datetime.utcnow() + timedelta(minutes=10),
            ))
            EmailExportWhenDoneRequest.objects.create(domain=domain_name)
            self._assert_export_counts(domain_name, 1)

        self.domain.delete()

        with self.assertRaises(NotFound):
            blobdb.get(key=data_files[0].blob_id)

        with blobdb.get(key=data_files[1].blob_id) as f:
            self.assertEqual(f.read(), (self.domain2.name + " csv").encode('utf-8'))

        self._assert_export_counts(self.domain.name, 0)
        self._assert_export_counts(self.domain2.name, 1)
Пример #10
0
 def as_file(self):
     try:
         value = self._fileobj
     except AttributeError:
         value = get_blob_db().get(key=self.name) if self.name else None
         self._fileobj = value
     return value
Пример #11
0
    def delete_old_images():
        start = datetime.utcnow()
        max_age = start - timedelta(days=90)
        db = get_blob_db()

        def _get_query(db_name, max_age=max_age):
            return BlobMeta.objects.using(db_name).filter(
                content_type='image/jpeg',
                type_code=CODES.form_attachment,
                domain='icds-cas',
                created_on__lt=max_age
            )

        run_again = False
        for db_name in get_db_aliases_for_partitioned_query():
            bytes_deleted = 0
            metas = list(_get_query(db_name)[:1000])
            if metas:
                for meta in metas:
                    bytes_deleted += meta.content_length or 0
                db.bulk_delete(metas=metas)
                datadog_counter('commcare.icds_images.bytes_deleted', value=bytes_deleted)
                datadog_counter('commcare.icds_images.count_deleted', value=len(metas))
                run_again = True

        if run_again:
            delete_old_images.delay()
Пример #12
0
    def fetch_attachment(self, name, stream=False):
        """Get named attachment

        :param stream: When true, return a file-like object that can be
        read at least once (streamers should not expect to seek within
        or read the contents of the returned file more than once).
        """
        db = get_blob_db()
        try:
            try:
                key = self.external_blobs[name].key
            except KeyError:
                if self._migrating_blobs_from_couch:
                    return super(BlobMixin, self) \
                        .fetch_attachment(name, stream=stream)
                raise NotFound(name)
            blob = db.get(key=key)
        except NotFound:
            raise ResourceNotFound(
                "{model} {model_id} attachment: {name!r}".format(
                    model=type(self).__name__,
                    model_id=self._id,
                    name=name,
                ))
        if stream:
            return blob

        with blob:
            return blob.read()
Пример #13
0
    def hard_delete_forms(domain, form_ids, delete_attachments=True):
        assert isinstance(form_ids, list)

        if delete_attachments:
            attachments = list(FormAccessorSQL.get_attachments_for_forms(form_ids))

        with get_cursor(XFormInstanceSQL) as cursor:
            cursor.execute('SELECT hard_delete_forms(%s, %s) AS deleted_count', [domain, form_ids])
            results = fetchall_as_namedtuple(cursor)
            deleted_count = sum([result.deleted_count for result in results])

        if delete_attachments:
            attachments_to_delete = attachments
            if deleted_count != len(form_ids):
                # in the unlikely event that we didn't delete all forms (because they weren't all
                # in the specified domain), only delete attachments for forms that were deleted.
                deleted_forms = set()
                for form_id in form_ids:
                    if not FormAccessorSQL.form_exists(form_id):
                        deleted_forms.add(form_id)

                attachments_to_delete = []
                for attachment in attachments:
                    if attachment.form_id in deleted_forms:
                        attachments_to_delete.append(attachment)

            db = get_blob_db()
            paths = [
                db.get_path(attachment.blob_id, attachment.blobdb_bucket())
                for attachment in attachments_to_delete
            ]
            db.bulk_delete(paths)

        return deleted_count
Пример #14
0
 def __init__(self):
     self.db = get_blob_db()
     self.total_blobs = 0
     self.not_found = 0
     if not isinstance(self.db, MigratingBlobDB):
         raise MigrationError(
             "Expected to find migrating blob db backend (got %r)" % self.db)
Пример #15
0
def test_get_blobdb(self, msg, root=True, blob_dir=None):
    with tempdir() as tmp:
        if root == "file":
            tmp = join(tmp, "file")
            with open(tmp, "w") as fh:
                fh.write("x")
        conf = SharedDriveConfiguration(
            shared_drive_path=tmp if root else root,
            restore_dir=None,
            transfer_dir=None,
            temp_dir=None,
            blob_dir=blob_dir,
        )
        with override_settings(SHARED_DRIVE_CONF=conf, S3_BLOB_DB_SETTINGS=None):
            with assert_raises(mod.Error, msg=re.compile(msg)):
                mod.get_blob_db()
Пример #16
0
def dump_locations(domain, download_id, include_consumption, headers_only, task=None):
    exporter = LocationExporter(domain, include_consumption=include_consumption,
                                headers_only=headers_only, async_task=task)

    fd, path = tempfile.mkstemp()
    writer = Excel2007ExportWriter()
    writer.open(header_table=exporter.get_headers(), file=path)
    with writer:
        exporter.write_data(writer)

    with open(path, 'rb') as file_:
        db = get_blob_db()
        expiry_mins = 60
        db.put(
            file_,
            domain=domain,
            parent_id=domain,
            type_code=CODES.tempfile,
            key=download_id,
            timeout=expiry_mins,
        )

        file_format = Format.from_format(Excel2007ExportWriter.format)
        expose_blob_download(
            download_id,
            expiry=expiry_mins * 60,
            mimetype=file_format.mimetype,
            content_disposition=safe_filename_header('{}_locations'.format(domain), file_format.extension),
            download_id=download_id,
        )
Пример #17
0
 def _get_restore_xml(self):
     db = get_blob_db()
     try:
         blob = db.get(self.restore_blob_id)
     except (KeyError, NotFound) as e:
         # Todo - custom exception
         raise e
     return blob
Пример #18
0
 def __init__(self, *args, **kw):
     super(BlobDbBackendMigrator, self).__init__(*args, **kw)
     self.db = get_blob_db()
     self.total_blobs = 0
     self.not_found = 0
     if not isinstance(self.db, MigratingBlobDB):
         raise MigrationError(
             "Expected to find migrating blob db backend (got %r)" % self.db)
Пример #19
0
    def setUp(self):
        with trap_extra_setup(AttributeError, msg="S3_BLOB_DB_SETTINGS not configured"):
            config = settings.S3_BLOB_DB_SETTINGS

        fsdb = TemporaryFilesystemBlobDB()
        assert get_blob_db() is fsdb, (get_blob_db(), fsdb)
        self.migrate_docs = docs = []
        for i in range(self.test_size):
            doc = SavedBasicExport(configuration=_mk_config("config-%s" % i))
            doc.save()
            doc.set_payload(("content %s" % i).encode('utf-8'))
            docs.append(doc)

        s3db = TemporaryS3BlobDB(config)
        self.db = TemporaryMigratingBlobDB(s3db, fsdb)
        assert get_blob_db() is self.db, (get_blob_db(), self.db)
        BaseMigrationTest.discard_migration_state(self.slug)
Пример #20
0
    def save_for_later(cls, fileobj, timeout, domain, restore_user_id):
        """Save restore response for later

        :param fileobj: A file-like object.
        :param timeout: Minimum content expiration in seconds.
        :returns: A new `CachedResponse` pointing to the saved content.
        """
        name = 'restore-{}.xml'.format(uuid4().hex)
        get_blob_db().put(
            NoClose(fileobj),
            domain=domain,
            parent_id=restore_user_id,
            type_code=CODES.restore,
            key=name,
            timeout=max(timeout // 60, 60),
        )
        return cls(name)
Пример #21
0
def test_get_blobdb(self, msg, root=True, blob_dir=None):
    with tempdir() as tmp:
        if (root == "file" and six.PY3) or (root == b"file" and six.PY2):
            tmp = join(tmp, "file" if six.PY3 else b"file")
            with open(tmp, "w", encoding='utf-8') as fh:
                fh.write("x")
        conf = SharedDriveConfiguration(
            shared_drive_path=tmp if root else root,
            restore_dir=None,
            transfer_dir=None,
            temp_dir=None,
            blob_dir=blob_dir,
        )
        with patch("corehq.blobs._db", new=[]):
            with override_settings(SHARED_DRIVE_CONF=conf, S3_BLOB_DB_SETTINGS=None):
                with assert_raises(mod.Error, msg=re.compile(msg)):
                    mod.get_blob_db()
Пример #22
0
def get_default_blob_size(bucket, blob_id):
    try:
        length = get_blob_db().size(blob_id, bucket)
    except NotFound:
        length = UNKNOWN
    if blob_id.startswith("restore-response-"):
        return BlobSize(UNKNOWN, "restore", length, bucket, blob_id)
    return BlobSize(UNKNOWN, bucket, length, bucket, blob_id)
Пример #23
0
 def __init__(self, slug, couchdb, filename=None):
     super(BlobDbBackendMigrator, self).__init__(slug, couchdb, filename)
     self.db = get_blob_db()
     self.total_blobs = 0
     self.not_found = 0
     if not isinstance(self.db, MigratingBlobDB):
         raise MigrationError(
             "Expected to find migrating blob db backend (got %r)" % self.db)
Пример #24
0
    def delete_content(self):
        db = get_blob_db()
        bucket = self._blobdb_bucket()
        deleted = db.delete(self.blob_id, bucket)
        if deleted:
            self.blob_id = None

        return deleted
Пример #25
0
    def __init__(self, bucket, meta_model):
        """
        :meta_model is a django model used to store meta info
            must contain columns identifier, filename, length
        """

        self._bucket = bucket
        self._db = get_blob_db()
        self._meta_model = meta_model
Пример #26
0
    def __init__(self, *args, **kw):
        super(TemporaryBlobDBMixin, self).__init__(*args, **kw)

        blobs._db.append(self)
        try:
            # verify get_blob_db() returns our new db
            assert blobs.get_blob_db() is self, 'got wrong blob db'
        except:
            self.close()
            raise
Пример #27
0
    def write_content(self, content):
        if not self.name:
            raise InvalidAttachment("cannot save attachment without name")

        db = get_blob_db()
        bucket = self._blobdb_bucket()
        info = db.put(content, self.name, bucket)
        self.md5 = info.md5_hash
        self.content_length = info.length
        self.blob_id = info.identifier
Пример #28
0
def expose_zipped_blob_download(data_path, filename, format, domain):
    """Expose zipped file content as a blob download

    :param data_path: Path to data file. Will be deleted.
    :param filename: File name.
    :param format: `couchexport.models.Format` constant.
    :param domain: Domain name.
    :returns: A link to download the file.
    """
    try:
        _, zip_temp_path = tempfile.mkstemp(".zip")
        with ZipFile(zip_temp_path, 'w') as zip_file_:
            zip_file_.write(data_path, filename)
    finally:
        os.remove(data_path)

    try:
        expiry_mins = 60 * 24
        file_format = Format.from_format(format)
        file_name_header = safe_filename_header(filename, file_format.extension)
        ref = expose_blob_download(
            filename,
            expiry=expiry_mins * 60,
            mimetype=file_format.mimetype,
            content_disposition=file_name_header
        )
        with open(zip_temp_path, 'rb') as file_:
            get_blob_db().put(
                file_,
                domain=domain,
                parent_id=domain,
                type_code=CODES.tempfile,
                key=ref.download_id,
                timeout=expiry_mins
            )
    finally:
        os.remove(zip_temp_path)

    return "%s%s?%s" % (
        get_url_base(),
        reverse('retrieve_download', kwargs={'download_id': ref.download_id}),
        "get_file"  # download immediately rather than rendering page
    )
Пример #29
0
def check_blobdb():
    """Save something to the blobdb and try reading it back."""
    db = get_blob_db()
    contents = "It takes Pluto 248 Earth years to complete one orbit!"
    info = db.put(StringIO(contents))
    with db.get(info.identifier) as fh:
        res = fh.read()
    db.delete(info.identifier)
    if res == contents:
        return ServiceStatus(True, "Successfully saved a file to the blobdb")
    return ServiceStatus(False, "Failed to save a file to the blobdb")
Пример #30
0
    def put_attachment(self, content, name=None, content_type=None,
                       content_length=None, domain=None, type_code=None):
        """Put attachment in blob database

        See `get_short_identifier()` for restrictions on the upper bound
        for number of attachments per object.

        :param content: String or file object.
        """
        db = get_blob_db()

        if name is None:
            name = getattr(content, "name", None)
        if name is None:
            raise InvalidAttachment("cannot save attachment without name")
        if self._id is None:
            raise ResourceNotFound("cannot put attachment on unidentified document")
        if hasattr(self, "domain"):
            if domain is not None and self.domain != domain:
                raise ValueError("domain mismatch: %s != %s" % (self.domain, domain))
            domain = self.domain
        elif domain is None:
            raise ValueError("domain attribute or argument is required")
        old_meta = self.blobs.get(name)

        if isinstance(content, six.text_type):
            content = BytesIO(content.encode("utf-8"))
        elif isinstance(content, bytes):
            content = BytesIO(content)

        # do we need to worry about BlobDB reading beyond content_length?
        meta = db.put(
            content,
            domain=domain or self.domain,
            parent_id=self._id,
            name=name,
            type_code=(self._blobdb_type_code if type_code is None else type_code),
            content_type=content_type,
        )
        self.external_blobs[name] = BlobMetaRef(
            key=meta.key,
            blobmeta_id=meta.id,
            content_type=content_type,
            content_length=meta.content_length,
        )
        if self._migrating_blobs_from_couch and self._attachments:
            self._attachments.pop(name, None)
        if self._atomic_blobs is None:
            self.save()
            if old_meta and old_meta.key:
                db.delete(key=old_meta.key)
        elif old_meta and old_meta.key:
            self._atomic_blobs[name].append(old_meta.key)
        return True
Пример #31
0
def delete_expired_blobs():
    run_again = False
    bytes_deleted = 0
    for dbname in get_db_aliases_for_partitioned_query():
        expired = list(BlobMeta.objects.using(dbname).filter(
            expires_on__isnull=False,
            expires_on__lt=_utcnow(),
        )[:1000])
        if not expired:
            continue
        if len(expired) == 1000:
            run_again = True
        get_blob_db().bulk_delete(metas=expired)
        log.info("deleted expired blobs: %r", [m.key for m in expired])
        shard_deleted = sum(m.content_length for m in expired)
        bytes_deleted += shard_deleted
        datadog_counter('commcare.temp_blobs.bytes_deleted', value=shard_deleted)

    if run_again:
        delete_expired_blobs.delay()

    return bytes_deleted
Пример #32
0
    def toHttpResponse(self):
        blob_db = get_blob_db()
        file_obj = blob_db.get(self.identifier, self.bucket)
        blob_size = blob_db.size(self.identifier, self.bucket)

        response = StreamingHttpResponse(FileWrapper(file_obj, CHUNK_SIZE),
                                         content_type=self.content_type)

        response['Content-Length'] = blob_size
        response['Content-Disposition'] = self.content_disposition
        for k, v in self.extras.items():
            response[k] = v
        return response
Пример #33
0
 def write_file(self, f, filename, domain):
     identifier = random_url_id(16)
     meta = get_blob_db().put(f,
                              domain=domain,
                              parent_id=domain,
                              type_code=CODES.data_import,
                              key=identifier)
     assert identifier == meta.key, (identifier, meta.key)
     file_meta = self._meta_model(identifier=identifier,
                                  filename=filename,
                                  length=meta.content_length)
     file_meta.save()
     return file_meta
Пример #34
0
def _generate_incremental_export(incremental_export, last_doc_date=None):
    export_instance = incremental_export.export_instance
    export_instance.export_format = Format.UNZIPPED_CSV  # force to unzipped CSV

    # Remove the date period from the ExportInstance, since this is added automatically by Daily Saved exports
    export_instance.filters.date_period = None
    filters = export_instance.get_filters()
    if last_doc_date:
        filters.append(ServerModifiedOnRangeFilter(gt=last_doc_date))

    class LastDocTracker:
        def __init__(self, doc_iterator):
            self.doc_iterator = doc_iterator
            self.last_doc = None
            self.doc_count = 0

        def __iter__(self):
            for doc in self.doc_iterator:
                self.last_doc = doc
                self.doc_count += 1
                yield doc

    with TransientTempfile() as temp_path, metrics_track_errors(
            'generate_incremental_exports'):
        writer = get_export_writer([export_instance],
                                   temp_path,
                                   allow_pagination=False)
        with writer.open([export_instance]):
            query = get_export_query(export_instance, filters)
            query = query.sort('server_modified_on'
                               )  # reset sort to this instead of opened_on
            docs = LastDocTracker(query.run().hits)
            write_export_instance(writer, export_instance, docs)

        export_file = ExportFile(writer.path, writer.format)

        if docs.doc_count <= 0:
            return

        new_checkpoint = incremental_export.checkpoint(
            docs.doc_count, docs.last_doc.get('server_modified_on'))

        with export_file as file_:
            db = get_blob_db()
            db.put(file_,
                   domain=incremental_export.domain,
                   parent_id=new_checkpoint.blob_parent_id,
                   type_code=CODES.data_export,
                   key=str(new_checkpoint.blob_key),
                   timeout=24 * 60)
    return new_checkpoint
Пример #35
0
def populate_export_download_task(export_instances, filters, download_id, filename=None, expiry=10 * 60):
    """
    :param expiry:  Time period for the export to be available for download in minutes
    """
    domain = export_instances[0].domain
    with TransientTempfile() as temp_path, datadog_track_errors('populate_export_download_task'):
        export_file = get_export_file(
            export_instances,
            filters,
            temp_path,
            # We don't have a great way to calculate progress if it's a bulk download,
            # so only track the progress for single instance exports.
            progress_tracker=populate_export_download_task if len(export_instances) == 1 else None
        )

        file_format = Format.from_format(export_file.format)
        filename = filename or export_instances[0].name

        with export_file as file_:
            db = get_blob_db()
            db.put(
                file_,
                domain=domain,
                parent_id=domain,
                type_code=CODES.data_export,
                key=download_id,
                timeout=expiry,
            )

            expose_blob_download(
                download_id,
                expiry=expiry * 60,
                mimetype=file_format.mimetype,
                content_disposition=safe_filename_header(filename, file_format.extension),
                download_id=download_id,
            )

    email_requests = EmailExportWhenDoneRequest.objects.filter(
        domain=domain,
        download_id=download_id
    )
    for email_request in email_requests:
        try:
            couch_user = CouchUser.get_by_user_id(email_request.user_id, domain=domain)
        except CouchUser.AccountTypeError:
            pass
        else:
            if couch_user is not None:
                process_email_request(domain, download_id, couch_user.get_email())
    email_requests.delete()
Пример #36
0
 def save_dump_to_blob(data_file_path, data_file_name, result_file_format):
     expiry_mins = 60 * 24
     with open(data_file_path, 'rb') as file_:
         blob_db = get_blob_db()
         blob_db.put(file_, data_file_name, timeout=expiry_mins)
     file_format = Format.from_format(result_file_format)
     file_name_header = safe_filename_header(data_file_name,
                                             file_format.extension)
     blob_dl_object = expose_blob_download(
         data_file_name,
         expiry=expiry_mins * 60,
         mimetype=file_format.mimetype,
         content_disposition=file_name_header)
     return blob_dl_object.download_id
Пример #37
0
    def setUpClass(cls):
        super().setUpClass()

        cls.db = TemporaryFilesystemBlobDB()
        assert get_blob_db() is cls.db, (get_blob_db(), cls.db)
        cls.data = data = b'binary data not valid utf-8 \xe4\x94'
        cls.blob_metas = []
        cls.not_found = set()

        cls.domain_name = str(uuid.uuid4)

        for type_code in [CODES.form_xml, CODES.multimedia, CODES.data_export]:
            for domain in (cls.domain_name, str(uuid.uuid4())):
                meta = cls.db.put(BytesIO(data),
                                  meta=new_meta(domain=domain,
                                                type_code=type_code))
                lost = new_meta(domain=domain,
                                type_code=type_code,
                                content_length=42)
                cls.blob_metas.append(meta)
                cls.blob_metas.append(lost)
                lost.save()
                cls.not_found.add(lost.key)
Пример #38
0
def run_data_pull(data_pull_slug, domain, month, location_id=None, email=None):
    subject = _('Custom ICDS Data Pull')
    try:
        filename = DataExporter(data_pull_slug,
                                "icds-ucr-citus",
                                month=month,
                                location_id=location_id).export()
    except Exception:
        if email:
            message = _("""
                            Hi,
                            Could not generate the requested data pull.
                            The error has been notified. Please report as an issue for quicker followup
                        """)
            send_html_email_async.delay(subject, [email],
                                        message,
                                        email_from=settings.DEFAULT_FROM_EMAIL)
        raise
    else:
        if email and filename:
            db = get_blob_db()
            download_id = DownloadBase.new_id_prefix + make_uuid()
            with open(filename, 'rb') as _file:
                db.put(
                    _file,
                    domain=domain,
                    parent_id=domain,
                    type_code=CODES.data_export,
                    key=download_id,
                    timeout=24 * 60,
                )
            exposed_download = expose_blob_download(
                filename,
                expiry=24 * 60 * 60,
                mimetype=Format.from_format(Format.ZIP).mimetype,
                content_disposition=safe_filename_header(filename),
                download_id=download_id)
            os.remove(filename)
            path = reverse(
                'retrieve_download',
                kwargs={'download_id': exposed_download.download_id})
            link = f"{web.get_url_base()}{path}?get_file"
            message = _("""
            Hi,
            Please download the data from {link}.
            The data is available only for 24 hours.
            """).format(link=link)
            send_html_email_async.delay(subject, [email],
                                        message,
                                        email_from=settings.DEFAULT_FROM_EMAIL)
Пример #39
0
def store_file_in_blobdb(domain, export_file, expired=BLOB_EXPIRATION_TIME):
    db = get_blob_db()
    key = uuid.uuid4().hex
    try:
        kw = {"meta": db.metadb.get(parent_id='AaaFile', key=key)}
    except BlobMeta.DoesNotExist:
        kw = {
            "domain": domain,
            "parent_id": 'AaaFile',
            "type_code": CODES.tempfile,
            "key": key,
            "timeout": expired
        }
    return db.put(export_file, **kw)
Пример #40
0
def diff_form_state(form_id, *, in_couch=False):
    if form_id is None:
        old = {"form_state": "unknown"}
        new = {"form_state": "unknown"}
    else:
        in_couch = in_couch or couch_form_exists(form_id)
        in_sql = sql_form_exists(form_id)
        couch_miss = "missing"
        if not in_couch and get_blob_db().metadb.get_for_parent(form_id):
            couch_miss = MISSING_BLOB_PRESENT
            log.warning("couch form missing, blob present: %s", form_id)
        old = {"form_state": FORM_PRESENT if in_couch else couch_miss}
        new = {"form_state": FORM_PRESENT if in_sql else "missing"}
    return old, new
Пример #41
0
 def process_object(self, object):
     blob_id = object.restore_blob_id
     info = BlobInfo(identifier=blob_id,
                     length=object.content_length,
                     digest=None)
     self.total_blobs += 1
     db = get_blob_db()
     try:
         content = db.get(blob_id)
     except NotFound:
         self.not_found += 1
     else:
         with content:
             self.db.copy_blob(content, info, DEFAULT_BUCKET)
Пример #42
0
 def process_object(self, attachment):
     from_db = get_blob_db()
     bucket = attachment.blobdb_bucket()
     blob_id = attachment.blob_id
     info = BlobInfo(identifier=blob_id, length=attachment.content_length,
                     digest="md5=" + attachment.md5)
     self.total_blobs += 1
     try:
         content = from_db.get(blob_id, bucket)
     except NotFound:
         self.not_found += 1
     else:
         with content:
             self.db.copy_blob(content, info, bucket)
Пример #43
0
 def process_doc(self, doc):
     obj = BlobHelper(doc, self.couchdb)
     bucket = obj._blobdb_bucket()
     assert obj.external_blobs and obj.external_blobs == obj.blobs, doc
     from_db = get_blob_db()
     for name, meta in obj.blobs.iteritems():
         self.total_blobs += 1
         try:
             content = from_db.get(meta.id, bucket)
         except NotFound:
             self.not_found += 1
         else:
             with content:
                 self.db.copy_blob(content, meta.info, bucket)
     return True
Пример #44
0
    def handle(self, zipname, **options):
        from_zip = zipfile.ZipFile(zipname)

        to_db = get_blob_db()

        for filename in from_zip.namelist():
            bucket = '/'.join(filename.split('/')[:-1])
            identifier = filename.split('/')[-1]
            blob = cStringIO.StringIO(from_zip.read(filename))
            # copy_blob only needs the identifier
            blob_info = BlobInfo(identifier=identifier, length="", digest="")
            try:
                to_db.copy_blob(blob, blob_info, bucket)
            except FileExists:
                continue
Пример #45
0
 def store_file_in_blobdb(self, file, expired=EXPIRED):
     db = get_blob_db()
     try:
         kw = {
             "meta": db.metadb.get(parent_id='IcdsFile', key=self.blob_id)
         }
     except BlobMeta.DoesNotExist:
         kw = {
             "domain": DASHBOARD_DOMAIN,
             "parent_id": 'IcdsFile',
             "type_code": CODES.tempfile,
             "key": self.blob_id,
             "timeout": expired
         }
     db.put(file, **kw)
Пример #46
0
 def create(cls, user_id, restore_content, comment=""):
     """
     The method to create a new DemoUserRestore object
     ags:
         user_id: the id of the CommCareUser
         restore_content: a string or file-like object of user's restore XML
     """
     restore = cls(
         demo_user_id=user_id,
         restore_comment=comment,
     )
     with AtomicBlobs(get_blob_db()) as db:
         restore._write_restore_blob(restore_content, db)
         restore.save()
     return restore
Пример #47
0
 def delete_attachment(self, name):
     if self._migrating_blobs_from_couch and self._attachments:
         deleted = bool(self._attachments.pop(name, None))
     else:
         deleted = False
     meta = self.external_blobs.pop(name, None)
     if meta is not None:
         if self._atomic_blobs is None:
             deleted = get_blob_db().delete(key=meta.key) or deleted
         else:
             self._atomic_blobs[name].append(meta.key)
             deleted = True
     if self._atomic_blobs is None:
         self.save()
     return deleted
Пример #48
0
def dump_locations(domain,
                   download_id,
                   include_consumption,
                   headers_only,
                   owner_id,
                   root_location_ids=None,
                   task=None,
                   **kwargs):
    exporter = LocationExporter(domain,
                                include_consumption=include_consumption,
                                root_location_ids=root_location_ids,
                                headers_only=headers_only,
                                async_task=task,
                                **kwargs)

    fd, path = tempfile.mkstemp()
    writer = Excel2007ExportWriter()
    writer.open(header_table=exporter.get_headers(), file=path)
    with writer:
        exporter.write_data(writer)

    with open(path, 'rb') as file_:
        db = get_blob_db()
        expiry_mins = 60
        db.put(
            file_,
            domain=domain,
            parent_id=domain,
            type_code=CODES.tempfile,
            key=download_id,
            timeout=expiry_mins,
        )

        file_format = Format.from_format(Excel2007ExportWriter.format)
        filename = '{}_locations'.format(domain)
        if len(root_location_ids) == 1:
            root_location = SQLLocation.objects.get(
                location_id=root_location_ids[0])
            filename += '_{}'.format(root_location.name)
        expose_blob_download(
            download_id,
            expiry=expiry_mins * 60,
            mimetype=file_format.mimetype,
            content_disposition=safe_filename_header(filename,
                                                     file_format.extension),
            download_id=download_id,
            owner_ids=[owner_id],
        )
Пример #49
0
    def test_cached_global_fixture_user_id(self):
        sandwich = self.make_data_type("sandwich", is_global=True)
        self.make_data_item(sandwich, "7.39")
        frank = self.user.to_ota_restore_user()
        sammy = CommCareUser.create(self.domain, 'sammy', '***', None,
                                    None).to_ota_restore_user()

        fixtures = call_fixture_generator(frank)
        self.assertEqual({item.attrib['user_id']
                          for item in fixtures}, {frank.user_id})
        self.assertTrue(get_blob_db().exists(key=FIXTURE_BUCKET + '/' +
                                             self.domain))

        fixtures = call_fixture_generator(sammy)
        self.assertEqual({item.attrib['user_id']
                          for item in fixtures}, {sammy.user_id})
Пример #50
0
 def save_dump_to_blob(self, temp_path):
     with open(temp_path, 'rb') as file_:
         blob_db = get_blob_db()
         blob_db.put(
             file_,
             self.result_file_name,
             timeout=60 * 48)  # 48 hours
     file_format = Format.from_format(Format.CSV)
     file_name_header = safe_filename_header(
         self.result_file_name, file_format.extension)
     blob_dl_object = expose_blob_download(
         self.result_file_name,
         mimetype=file_format.mimetype,
         content_disposition=file_name_header
     )
     return blob_dl_object.download_id
Пример #51
0
def _store_excel_in_blobdb(report_class, file, domain):
    key = uuid.uuid4().hex
    expired = 60 * 24 * 7  # 7 days
    db = get_blob_db()

    kw = {
        "domain": domain,
        "parent_id": key,
        "type_code": CODES.tempfile,
        "key": key,
        "timeout": expired,
        "properties": {"report_class": report_class}
    }
    file.seek(0)
    db.put(file, **kw)
    return key
Пример #52
0
def check_blobdb():
    """Save something to the blobdb and try reading it back."""
    db = get_blob_db()
    contents = b"It takes Pluto 248 Earth years to complete one orbit!"
    meta = db.put(
        BytesIO(contents),
        domain="<unknown>",
        parent_id="check_blobdb",
        type_code=CODES.tempfile,
    )
    with db.get(key=meta.key) as fh:
        res = fh.read()
    db.delete(key=meta.key)
    if res == contents:
        return ServiceStatus(True, "Successfully saved a file to the blobdb")
    return ServiceStatus(False, "Failed to save a file to the blobdb")
Пример #53
0
def _migrate_form_attachments(sql_form, couch_form):
    """Copy over attachment meta - includes form.xml"""
    attachments = []
    metadb = get_blob_db().metadb

    def try_to_get_blob_meta(parent_id, type_code, name):
        try:
            meta = metadb.get(parent_id=parent_id,
                              type_code=type_code,
                              name=name)
            assert meta.domain == couch_form.domain, (meta.domain,
                                                      couch_form.domain)
            return meta
        except BlobMeta.DoesNotExist:
            return None

    if couch_form._attachments and any(name not in couch_form.blobs
                                       for name in couch_form._attachments):
        _migrate_couch_attachments_to_blob_db(couch_form)

    for name, blob in couch_form.blobs.items():
        type_code = CODES.form_xml if name == "form.xml" else CODES.form_attachment
        meta = try_to_get_blob_meta(sql_form.form_id, type_code, name)

        # there was a bug in a migration causing the type code for many form attachments to be set as form_xml
        # this checks the db for a meta resembling this and fixes it for postgres
        # https://github.com/dimagi/commcare-hq/blob/3788966119d1c63300279418a5bf2fc31ad37f6f/corehq/blobs/migrate.py#L371
        if not meta and name != "form.xml":
            meta = try_to_get_blob_meta(sql_form.form_id, CODES.form_xml, name)
            if meta:
                meta.type_code = CODES.form_attachment
                meta.save()

        if not meta:
            meta = metadb.new(
                domain=couch_form.domain,
                name=name,
                parent_id=sql_form.form_id,
                type_code=type_code,
                content_type=blob.content_type,
                content_length=blob.content_length,
                key=blob.key,
            )
            meta.save()

        attachments.append(meta)
    sql_form.attachments_list = attachments
Пример #54
0
    def get_forms_with_attachments_meta(self, form_ids, ordered=False):
        assert isinstance(form_ids, list)
        if not form_ids:
            return []
        forms = list(self.get_forms(form_ids))

        attachments = sorted(
            get_blob_db().metadb.get_for_parents(form_ids),
            key=lambda meta: meta.parent_id
        )
        forms_by_id = {form.form_id: form for form in forms}
        attach_prefetch_models(forms_by_id, attachments, 'parent_id', 'attachments_list')

        if ordered:
            sort_with_id_list(forms, form_ids, 'form_id')

        return forms
Пример #55
0
    def toHttpResponse(self):
        if self.download_id.startswith(self.new_id_prefix):
            blob_key = self.download_id
        else:
            # legacy key; remove after all legacy blob downloads have expired
            blob_key = "_default/" + self.identifier
        blob_db = get_blob_db()
        file_obj = blob_db.get(key=blob_key, type_code=CODES.tempfile)

        response = StreamingHttpResponse(FileWrapper(file_obj, CHUNK_SIZE),
                                         content_type=self.content_type)

        response['Content-Length'] = file_obj.content_length
        response['Content-Disposition'] = self.content_disposition
        for k, v in self.extras.items():
            response[k] = v
        return response
Пример #56
0
    def handle(self, files, migrate=False, num_workers=10, **options):
        set_max_connections(num_workers)
        blob_db = get_blob_db()
        if not isinstance(blob_db, MigratingBlobDB):
            raise CommandError(
                "Expected to find migrating blob db backend (got %r)" %
                blob_db)
        old_db = blob_db.old_db
        new_db = blob_db.new_db
        ignored = 0

        try:
            pool = Pool(size=num_workers)
            for filepath in files:
                print("Processing {}".format(filepath))
                with open(filepath, encoding='utf-8') as fh:
                    for line in fh:
                        if not line:
                            continue
                        try:
                            rec = json.loads(line)
                        except ValueError:
                            ignored += 1
                            print(("Ignore {}", line))
                            continue
                        pool.spawn(process, rec, old_db, new_db, migrate)

            print("CTRL+C to abort")
            while not pool.join(timeout=10):
                print("waiting for {} workers to finish...".format(len(pool)))
        except KeyboardInterrupt:
            pass

        if ignored:
            print("Ignored {} malformed records".format(ignored))
        for type_code, stats in sorted(Stats.items.items()):
            try:
                group = BLOB_MIXIN_MODELS[type_code].__name__
            except KeyError:
                group = CODES.name_of(type_code, "type_code %s" % type_code)
            total = stats.new + stats.old + stats.noref + stats.lost
            print("{}: checked {} records".format(group, total))
            print("  Found in new db: {}".format(stats.new))
            print("  Found in old db: {}".format(stats.old))
            print("  Not referenced: {}".format(stats.noref))
            print("  Not found: {}".format(stats.lost))
Пример #57
0
def cache_fixture_items_data(io_data, domain, fixure_name, key_prefix):
    db = get_blob_db()
    try:
        kw = {"meta": db.metadb.get(
            parent_id=domain,
            type_code=CODES.fixture,
            name=fixure_name,
        )}
    except BlobMeta.DoesNotExist:
        kw = {
            "domain": domain,
            "parent_id": domain,
            "type_code": CODES.fixture,
            "name": fixure_name,
            "key": key_prefix + '/' + domain,
        }
    db.put(io_data, **kw)
Пример #58
0
def generate_toggle_csv_download(self, tag, download_id, username):
    toggles = _get_toggles_with_tag(tag)
    total = _get_toggle_item_count(toggles)
    current_progress = [0]

    def increment_progress():
        current_progress[0] += 1
        DownloadBase.set_progress(self, current_progress[0], total)

    timeout_mins = 24 * 60
    with TransientTempfile() as temp_path:
        _write_toggle_data(temp_path, toggles, increment_progress)

        with open(temp_path, 'rb') as file:
            db = get_blob_db()
            meta = db.put(
                file,
                domain="__system__",
                parent_id="__system__",
                type_code=CODES.tempfile,
                key=download_id,
                timeout=timeout_mins,
            )

    now = datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S")
    filename = f'{settings.SERVER_ENVIRONMENT}_toggle_export_{now}'
    expose_blob_download(
        download_id,
        expiry=timeout_mins * 60,
        content_disposition=safe_filename_header(filename, ".csv"),
        download_id=download_id,
    )

    user = CouchUser.get_by_username(username)
    if user:
        url = absolute_reverse("retrieve_download", args=[download_id])
        url += "?get_file"
        valid_until = meta.expires_on.replace(
            tzinfo=pytz.UTC).strftime(USER_DATETIME_FORMAT)
        send_HTML_email("Feature Flag download ready",
                        user.get_email(),
                        html_content=inspect.cleandoc(f"""
        Download URL: {url}
        Download Valid until: {valid_until}
        """))
Пример #59
0
def _get_blob_deletion_pillow(pillow_id,
                              couch_db,
                              checkpoint=None,
                              change_feed=None):
    if checkpoint is None:
        checkpoint = PillowCheckpoint(pillow_id)
    if change_feed is None:
        change_feed = CouchChangeFeed(couch_db, include_docs=False)
    return ConstructedPillow(
        name=pillow_id,
        checkpoint=checkpoint,
        change_feed=change_feed,
        processor=BlobDeletionProcessor(get_blob_db(), couch_db.dbname),
        change_processed_event_handler=PillowCheckpointEventHandler(
            checkpoint=checkpoint,
            checkpoint_frequency=KAFKA_CHECKPOINT_FREQUENCY,
        ),
    )
Пример #60
0
def create_form_with_missing_xml(domain_name):
    form = submit_form_locally(TEST_FORM, domain_name).xform
    form = FormAccessors(domain_name).get_form(form.form_id)
    blobs = get_blob_db()
    with mock.patch.object(blobs.metadb, "delete"):
        if isinstance(form, XFormInstance):
            # couch
            form.delete_attachment("form.xml")
            assert form.get_xml() is None, form.get_xml()
        else:
            # sql
            blobs.delete(form.get_attachment_meta("form.xml").key)
            try:
                form.get_xml()
                assert False, "expected BlobNotFound exception"
            except BlobNotFound:
                pass
    return form