Exemplo n.º 1
0
def couch_blob_helper(doc, *args, **kw):
    obj = BlobHelper(doc, *args, **kw)
    get_domain = DOMAIN_MAP.get(obj.doc_type)
    if get_domain is not None:
        assert not hasattr(obj, "domain"), obj
        obj.domain = get_domain(doc)
    assert hasattr(obj, "domain"), obj.doc_type
    return obj
Exemplo n.º 2
0
def couch_blob_helper(doc, *args, **kw):
    obj = BlobHelper(doc, *args, **kw)
    get_domain = DOMAIN_MAP.get(obj.doc_type)
    if get_domain is not None:
        assert not hasattr(obj, "domain"), obj
        obj.domain = get_domain(doc)
    elif not hasattr(obj, "domain"):
        obj.domain = None  # will trigger "unknown-domain" error
    return obj
Exemplo n.º 3
0
 def _prepare_doc(self, doc):
     if self.load_attachments:
         obj = BlobHelper(doc, self.couchdb)
         doc["_attachments"] = {
             name: {
                 "content_type": meta["content_type"],
                 "content": obj.fetch_attachment(name),
             }
             for name, meta in doc["_attachments"].items()
         }
Exemplo n.º 4
0
 def _prepare_doc(self, doc):
     if self.load_attachments:
         obj = BlobHelper(doc, self.couchdb)
         doc["_attachments"] = {
             name: {
                 "content_type": meta["content_type"],
                 "content": obj.fetch_attachment(name),
             }
             for name, meta in doc["_attachments"].items()
         }
Exemplo n.º 5
0
 def _do_migration(self, doc):
     obj = BlobHelper(doc, self.couchdb)
     bucket = obj._blobdb_bucket()
     assert obj.external_blobs and obj.external_blobs == obj.blobs, doc
     for name, meta in obj.blobs.iteritems():
         self.total_blobs += 1
         try:
             content = self.db.old_db.get(meta.id, bucket)
         except NotFound:
             self.not_found += 1
         else:
             with content:
                 self.db.copy_blob(content, meta.info, bucket)
     return True
Exemplo n.º 6
0
 def migrate(self, doc, couchdb):
     obj = BlobHelper(doc, couchdb)
     bucket = obj._blobdb_bucket()
     assert obj.external_blobs and obj.external_blobs == obj.blobs, doc
     for name, meta in obj.blobs.iteritems():
         self.total_blobs += 1
         try:
             content = self.db.old_db.get(meta.id, bucket)
         except NotFound:
             self.not_found += 1
         else:
             with content:
                 self.db.copy_blob(content, meta.info, bucket)
     return True
Exemplo n.º 7
0
 def process_doc(self, doc):
     obj = BlobHelper(doc, self.couchdb)
     bucket = obj._blobdb_bucket()
     assert obj.external_blobs and obj.external_blobs == obj.blobs, doc
     from_db = get_blob_db()
     for name, meta in obj.blobs.iteritems():
         self.total_blobs += 1
         try:
             content = from_db.get(meta.id, bucket)
         except NotFound:
             self.not_found += 1
         else:
             with content:
                 self.db.copy_blob(content, meta.info, bucket)
     return True
Exemplo n.º 8
0
 def _do_migration(self, doc):
     attachments = doc.pop("_attachments")
     external_blobs = doc.setdefault("external_blobs", {})
     obj = BlobHelper(doc, self.couchdb)
     try:
         with obj.atomic_blobs():
             for name, data in list(attachments.iteritems()):
                 if name in external_blobs:
                     continue  # skip attachment already in blob db
                 obj.put_attachment(name=name, **data)
     except ResourceConflict:
         # Do not migrate document if `atomic_blobs()` fails.
         # This is an unlikely state, but could happen if the
         # document is (externally) modified between when the
         # migration fetches and processes the document.
         return False
     return True
Exemplo n.º 9
0
 def migrate(self, doc, couchdb):
     attachments = doc.pop("_attachments")
     external_blobs = doc.setdefault("external_blobs", {})
     obj = BlobHelper(doc, couchdb)
     try:
         with obj.atomic_blobs():
             for name, data in list(attachments.iteritems()):
                 if name in external_blobs:
                     continue  # skip attachment already in blob db
                 obj.put_attachment(name=name, **data)
     except ResourceConflict:
         # Do not migrate document if `atomic_blobs()` fails.
         # This is an unlikely state, but could happen if the
         # document is (externally) modified between when the
         # migration fetches and processes the document.
         return False
     return True
Exemplo n.º 10
0
 def __init__(self, doc, database, exclude_attachments=False):
     self._attachments = {}
     self.attachments = {}
     self.database = database
     _attachments = doc.get("_attachments", None) or {}
     _attachments.update(doc.get("external_blobs", None) or {})
     if _attachments:
         if not exclude_attachments:
             self._attachments = _attachments
             obj = BlobHelper(doc, database, None)
             self.attachments = {k: obj.fetch_attachment(k) for k in _attachments}
         if doc.get("_attachments"):
             doc["_attachments"] = {}
         if "external_blobs" in doc:
             doc["external_blobs"] = {}
     self.doc = doc
     del self.doc['_rev']
Exemplo n.º 11
0
def save(transform, database):
    # this is a fancy save method because we do some special casing
    # with the attachments and with deleted documents
    def save():
        try:
            database.save_doc(transform.doc, force_update=True)
        except ResourceNotFound:
            # this is likely a document that was deleted locally that
            # you later want to copy back over there is a wacky hack
            # that you can use to handle this
            rev = get_deleted_doc_rev(database, transform.doc['_id'])
            transform.doc['_rev'] = rev
            database.save_doc(transform.doc)
    if transform.attachments:
        obj = BlobHelper(transform.doc, database)
        with obj.atomic_blobs(save):
            for name, attach in transform.attachments.items():
                content_type = transform._attachments[name]["content_type"]
                obj.put_attachment(attach, name, content_type=content_type)
    else:
        save()
Exemplo n.º 12
0
def save(transform, database):
    # this is a fancy save method because we do some special casing
    # with the attachments and with deleted documents
    def save():
        try:
            database.save_doc(transform.doc, force_update=True)
        except ResourceNotFound:
            # this is likely a document that was deleted locally that
            # you later want to copy back over there is a wacky hack
            # that you can use to handle this
            rev = get_deleted_doc_rev(database, transform.doc['_id'])
            transform.doc['_rev'] = rev
            database.save_doc(transform.doc)
    if transform.attachments:
        obj = BlobHelper(transform.doc, database)
        with obj.atomic_blobs(save):
            for name, attach in transform.attachments.items():
                content_type = transform._attachments[name]["content_type"]
                obj.put_attachment(attach, name, content_type=content_type)
    else:
        save()
Exemplo n.º 13
0
        def return_iterator():
            yield "<restoredata>"
            for result in res['hits']['hits']:
                data_row = result['fields']

#                if data_row['script_case_id'] not in active_patients:
#                    continue
                try:
                    xml_str = (BlobHelper(data_row, db)
                        .fetch_attachment('form.xml')
                        .replace("<?xml version=\'1.0\' ?>", '')
                        .replace("<?xml version='1.0' encoding='UTF-8' ?>", ''))
                    yield xml_str
                except Exception, ex:
                    logging.error("for downloader: error fetching attachment: %s" % ex)
Exemplo n.º 14
0
def broken_suite_files(build):
    db = Application.get_db()
    error = None
    try:
        suite = BlobHelper(build, db).fetch_attachment('files/suite.xml')
    except ResourceNotFound:
        error = 'build has no attachment files/suite.xml'
    else:
        try:
            validate_suite(suite)
        except SuiteValidationError as error:
            pass
    if error:
        yield '%s\t%s\t%s\t%s\t%s\n' % (
            build.get('built_on'),
            build.get('domain'),
            build['_id'],
            build.get('copy_of'),
            error,
        )
Exemplo n.º 15
0
def _get_submission_xml(xform, db):
    xml = BlobHelper(xform, db).fetch_attachment('form.xml')
    if isinstance(xml, unicode):
        xml = xml.encode('utf-8')
    return xml
Exemplo n.º 16
0
def _get_submission_xml(xform, db):
    xml = BlobHelper(xform, db, CODES.form_xml).fetch_attachment('form.xml')
    if isinstance(xml, six.text_type):
        xml = xml.encode('utf-8')
    return xml
Exemplo n.º 17
0
def _get_submission_xml(xform, db):
    xml = BlobHelper(xform, db).fetch_attachment('form.xml')
    if isinstance(xml, unicode):
        xml = xml.encode('utf-8')
    return xml
Exemplo n.º 18
0
def migrate(slug, doc_type_map, doc_migrator_class, filename=None, reset=False,
            max_retry=2):
    """Migrate blobs

    :param slug: Migration name.
    :param doc_type_map: Dict of `doc_type_name: model_class` pairs.
    :param doc_migrator_class: A `BaseDocMigrator` subclass used to
    migrate documents.
    :param filename: File path for intermediate storage of migration
    data.
    :param reset: Reset existing migration state (if any), causing all
    documents to be reconsidered for migration, if this is true. This
    does not reset the django migration flag.
    flag, which is set when the migration completes successfully.
    :param max_retry: Number of times to retry migrating a document
    before giving up.
    :returns: A tuple `(<num migrated>, <num skipped>)`
    """
    couchdb = next(iter(doc_type_map.values())).get_db()
    assert all(m.get_db() is couchdb for m in doc_type_map.values()), \
        "documents must live in same couch db: %s" % repr(doc_type_map)

    dirpath = None
    if filename is None:
        dirpath = mkdtemp()
        filename = os.path.join(dirpath, "export.txt")

    def encode_content(data):
        if isinstance(data, unicode):
            data = data.encode("utf-8")
        return b64encode(data)

    total = sum(get_doc_count_by_type(couchdb, doc_type)
                for doc_type in doc_type_map)
    print("Migrating {} documents: {}...".format(
        total,
        ", ".join(sorted(doc_type_map))
    ))
    migrated = 0
    skipped = 0
    visited = 0
    start = datetime.now()
    doc_migrator = doc_migrator_class()
    load_attachments = doc_migrator.load_attachments
    blobs_key = doc_migrator.blobs_key
    iter_key = slug + "-blob-migration"
    docs_by_type = ResumableDocsByTypeIterator(couchdb, doc_type_map, iter_key)
    if reset:
        docs_by_type.discard_state()

    with open(filename, 'wb') as f:
        for doc in docs_by_type:
            visited += 1
            if doc.get(blobs_key):
                if load_attachments:
                    obj = BlobHelper(doc, couchdb)
                    doc["_attachments"] = {
                        name: {
                            "content_type": meta["content_type"],
                            "content": obj.fetch_attachment(name),
                        }
                        for name, meta in doc["_attachments"].items()
                    }
                    # make copy with encoded attachments for JSON dump
                    backup_doc = dict(doc)
                    backup_doc["_attachments"] = {
                        name: {
                            "content_type": meta["content_type"],
                            "content": encode_content(meta["content"]),
                        }
                        for name, meta in doc["_attachments"].items()
                    }
                else:
                    backup_doc = doc
                f.write('{}\n'.format(json.dumps(backup_doc)))
                f.flush()
                ok = doc_migrator.migrate(doc, couchdb)
                if ok:
                    migrated += 1
                else:
                    try:
                        docs_by_type.retry(doc, max_retry)
                    except TooManyRetries:
                        print("Skip: {doc_type} {_id}".format(**doc))
                        skipped += 1
                if (migrated + skipped) % 100 == 0:
                    elapsed = datetime.now() - start
                    remaining = elapsed / visited * total
                    print("Migrated {}/{} of {} documents in {} ({} remaining)"
                          .format(migrated, visited, total, elapsed, remaining))

    doc_migrator.after_migration()

    if dirpath is not None:
        os.remove(filename)
        os.rmdir(dirpath)

    print("Migrated {}/{} of {} documents ({} previously migrated, {} had no attachments)."
        .format(
            migrated,
            visited,
            total,
            total - visited,
            visited - (migrated + skipped)
        ))
    if skipped:
        print(MIGRATIONS_SKIPPED_WARNING.format(skipped))
    else:
        BlobMigrationState.objects.get_or_create(slug=slug)[0].save()
    return migrated, skipped
Exemplo n.º 19
0
def _get_submission_xml(xform, db):
    return BlobHelper(xform, db, CODES.form_xml).fetch_attachment('form.xml')