Exemplo n.º 1
0
    def vbucket_states(opts, spec, bucket_dir):
        """Reads all the latest couchstore files in a directory, and returns
           map of state string (e.g., 'active') to map of vbucket_id to doc."""
        vbucket_states = defaultdict(dict)

        for f in latest_couch_files(bucket_dir):
            vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1))
            try:
                store = couchstore.CouchStore(f, 'r')
                try:
                    doc_str = store.localDocs['_local/vbstate']
                    if doc_str:
                        doc = json.loads(doc_str)
                        state = doc.get('state', None)
                        if state:
                            vbucket_states[state][vbucket_id] = doc
                        else:
                            return "error: missing vbucket_state from: %s" \
                                % (f), None
                except Exception as e:
                    return ("error: could not read _local/vbstate from: %s" +
                            "; exception: %s") % (f, e), None
                store.close()
            except Exception as e:
                return ("error: could not read couchstore file: %s" +
                        "; exception: %s") % (f, e), None

        if vbucket_states:
            return 0, vbucket_states
        return "error: no vbucket_states in files: %s" % (bucket_dir), None
Exemplo n.º 2
0
    def vbucket_states(
        opts, spec, bucket_dir
    ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[Dict[str, Any]]]:
        """Reads all the latest couchstore files in a directory, and returns
           map of state string (e.g., 'active') to map of vbucket_id to doc."""
        vbucket_states: Dict[str, Any] = defaultdict(dict)

        for f in latest_couch_files(bucket_dir):
            vbucket_id = int(re.match(
                SFD_RE, os.path.basename(f)).group(1))  # type: ignore
            try:
                store = couchstore.CouchStore(f, 'r')
                try:
                    doc_str = store.localDocs['_local/vbstate']
                    if doc_str:
                        doc = json.loads(doc_str)
                        state = doc.get('state', None)
                        if state:
                            vbucket_states[state][vbucket_id] = doc
                        else:
                            return f'error: missing vbucket_state from: {f}', None
                except Exception as e:
                    return f'error: could not read _local/vbstate from: {f}; exception: {e}', None
                store.close()
            except Exception as e:
                return f'error: could not read couchstore file: {f}; exception: {e}', None

        if vbucket_states:
            return 0, vbucket_states
        return f'error: no vbucket_states in files: {bucket_dir}', None
Exemplo n.º 3
0
def open_latest_store(bucket_dir: str, glob_pattern: str, filter_re: str, default_name: str, mode: str = 'c') \
        -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[couchstore.CouchStore], str]:
    store_paths = latest_couch_files(bucket_dir,
                                     glob_pattern=glob_pattern,
                                     filter_re=filter_re)
    if not store_paths:
        if mode == 'r':
            return 0, None, ''
        store_paths = [f'{bucket_dir}/{default_name}']
    if len(store_paths) != 1:
        return f'error: no single, latest couchstore file: {glob_pattern}; found: {store_paths}', None, ''
    try:
        return 0, couchstore.CouchStore(str(store_paths[0]),
                                        mode), store_paths[0]
    except Exception as e:
        return f'error: could not open couchstore file: {store_paths[0]}; exception: {e}', None, ''
Exemplo n.º 4
0
def open_latest_store(bucket_dir,
                      glob_pattern,
                      filter_re,
                      default_name,
                      mode='c'):
    store_paths = latest_couch_files(bucket_dir,
                                     glob_pattern=glob_pattern,
                                     filter_re=filter_re)
    if not store_paths:
        if mode == 'r':
            return 0, None, None
        store_paths = [bucket_dir + '/' + default_name]
    if len(store_paths) != 1:
        return ("error: no single, latest couchstore file: %s" +
                "; found: %s") % (glob_pattern, store_paths), None, None
    try:
        return 0, couchstore.CouchStore(str(store_paths[0]),
                                        mode), store_paths[0]
    except Exception as e:
        return ("error: could not open couchstore file: %s" +
                "; exception: %s") % (store_paths[0], e), None, None
Exemplo n.º 5
0
    def loader(self):
        rv, d = data_dir(self.spec)
        if rv != 0:
            self.queue.put((rv, None))
            return

        source_vbucket_state = \
            getattr(self.opts, 'source_vbucket_state', 'active')

        source_nodes = self.source_bucket['nodes']
        if len(source_nodes) != 1:
            self.queue.put((
                f'error: expected 1 node in source_bucket: {self.source_bucket["name"]}',
                None))
            return

        vbucket_states = source_nodes[0].get('vbucket_states', None)
        if not vbucket_states:
            self.queue.put((
                f'error: missing vbucket_states in source_bucket: {self.source_bucket["name"]}',
                None))
            return

        vbuckets = vbucket_states.get(source_vbucket_state, None)
        if vbuckets is None:  # Empty dict is valid.
            self.queue.put((
                f'error: missing vbuckets in source_bucket: {self.source_bucket["name"]}',
                None))
            return

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        store = None
        vbucket_id = None

        # Level of indirection since we can't use python 3 nonlocal statement.
        abatch: List[pump.Batch] = [pump.Batch(self)]

        def change_callback(doc_info):
            if doc_info:
                # Handle the new key name spacing for collections and co
                cid, key = decodeCollectionID(doc_info.id.encode())
                # Only support keys in the _default collection
                if cid != 0:
                    logging.debug('Skipping as not default collection')
                    return

                if self.skip(key, vbucket_id):
                    return

                if doc_info.deleted:
                    cmd = couchbaseConstants.CMD_DCP_DELETE
                    val = b''
                else:
                    cmd = couchbaseConstants.CMD_DCP_MUTATION
                    val = doc_info.getContents(
                        options=couchstore.CouchStore.DECOMPRESS)
                try:
                    rev_meta_bytes = doc_info.revMeta.get_bytes()
                    if len(rev_meta_bytes) == 18:
                        conf_res = 0
                        cas, exp, flg, flex_meta, dtype = struct.unpack(
                            SFD_REV_META, rev_meta_bytes)
                    elif len(rev_meta_bytes) == 19:
                        cas, exp, flg, flex_meta, dtype, conf_res = struct.unpack(
                            SFD_REV_META_PRE_4_6, rev_meta_bytes)
                    else:
                        raise ValueError(
                            'Does not match pre- or post-4.6 format')
                    meta = bytes([doc_info.revSequence])
                    seqno = doc_info.sequence
                    nmeta = 0
                    msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val,
                           seqno, dtype, nmeta, conf_res)
                    abatch[0].append(msg, len(val))
                except Exception as e:
                    self.queue.put((
                        f'error: could not read couchstore file due to unsupported file format version;'
                        f' exception: {e}', None))
                    return

            if (abatch[0].size() >= batch_max_size
                    or abatch[0].bytes >= batch_max_bytes):
                self.queue.put((0, abatch[0]))
                abatch[0] = pump.Batch(self)

        for f in latest_couch_files(f'{d}/{self.source_bucket["name"]}'):
            vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1))
            if not vbucket_id in vbuckets:
                continue

            try:
                store = couchstore.CouchStore(f, 'r')
                store.forEachChange(0, change_callback)
                store.close()
            except Exception as e:
                # MB-12270: Some files may be deleted due to compaction. We can
                # safely ignore them and move to next file.
                pass

        if abatch[0].size():
            self.queue.put((0, abatch[0]))
        self.queue.put((0, None))
Exemplo n.º 6
0
    def loader(self):
        rv, d = data_dir(self.spec)
        if rv != 0:
            self.queue.put((rv, None))
            return

        source_vbucket_state = \
            getattr(self.opts, 'source_vbucket_state', 'active')

        source_nodes = self.source_bucket['nodes']
        if len(source_nodes) != 1:
            self.queue.put(("error: expected 1 node in source_bucket: %s" %
                            (self.source_bucket['name']), None))
            return

        vbucket_states = source_nodes[0].get('vbucket_states', None)
        if not vbucket_states:
            self.queue.put(
                ("error: missing vbucket_states in source_bucket: %s" %
                 (self.source_bucket['name']), None))
            return

        vbuckets = vbucket_states.get(source_vbucket_state, None)
        if vbuckets is None:  # Empty dict is valid.
            self.queue.put(("error: missing vbuckets in source_bucket: %s" %
                            (self.source_bucket['name']), None))
            return

        batch_max_size = self.opts.extra['batch_max_size']
        batch_max_bytes = self.opts.extra['batch_max_bytes']

        store = None
        vbucket_id = None

        # Level of indirection since we can't use python 3 nonlocal statement.
        abatch = [pump.Batch(self)]

        def change_callback(doc_info):
            if doc_info:
                key = doc_info.id
                if self.skip(key, vbucket_id):
                    return

                if doc_info.deleted:
                    cmd = memcacheConstants.CMD_TAP_DELETE
                    val = ''
                else:
                    cmd = memcacheConstants.CMD_TAP_MUTATION
                    val = doc_info.getContents(
                        options=couchstore.CouchStore.DECOMPRESS)

                cas, exp, flg = struct.unpack(SFD_REV_META, doc_info.revMeta)
                meta = struct.pack(SFD_REV_SEQ, doc_info.revSequence)
                msg = (cmd, vbucket_id, key, flg, exp, cas, meta, val)
                abatch[0].append(msg, len(val))

            if (abatch[0].size() >= batch_max_size
                    or abatch[0].bytes >= batch_max_bytes):
                self.queue.put((0, abatch[0]))
                abatch[0] = pump.Batch(self)

        for f in latest_couch_files(d + '/' + self.source_bucket['name']):
            vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1))
            if not vbucket_id in vbuckets:
                continue

            try:
                store = couchstore.CouchStore(f, 'r')
            except Exception, e:
                self.queue.put(("error: could not open couchstore file: %s"
                                "; exception: %s" % (f, e), None))
                return

            store.forEachChange(0, change_callback)
            store.close()
Exemplo n.º 7
0
                        "error: could not read couchstore file due to unsupported file format version;"
                        " exception: %s" % e, None))
                    return

            if (abatch[0].size() >= batch_max_size
                    or abatch[0].bytes >= batch_max_bytes):
                self.queue.put((0, abatch[0]))
                abatch[0] = pump.Batch(self)

        for f in latest_couch_files(d + '/' + self.source_bucket['name']):
            vbucket_id = int(re.match(SFD_RE, os.path.basename(f)).group(1))
            if not vbucket_id in vbuckets:
                continue

            try:
                store = couchstore.CouchStore(f, 'r')
                store.forEachChange(0, change_callback)
                store.close()
            except Exception, e:
                #MB-12270: Some files may be deleted due to compaction. We can
                #safely ingore them and move to next file.
                pass

        if abatch[0].size():
            self.queue.put((0, abatch[0]))
        self.queue.put((0, None))


class SFDSink(pump.Sink):
    """Sink for couchstore in couchbase server/file/directory layout."""
    def __init__(self, opts, spec, source_bucket, source_node, source_map,