Пример #1
0
def get_colors(obs, factors, as_df=True):
    v_colors = dict()
    for f in factors:
        v = obs.loc[:, f]
        d = OrderedDict(zip(sorted(v.unique()), colors[f]))
        v_colors[f] = pd.Series(v.map(d).values, index=v.index)
    return v_colors if not as_df else pd.DataFrame(v_colors)
Пример #2
0
def expand_range(value):
    """ receive a value in form a-d,x-y,z and return list of expanded values. 
        Note, all remap functions expect strings so the expanded range will 
        also be a list of strings.
        if unable to expand range then return list with original value
    """
    try:
        result = []
        for v in list(set(value.split(","))):
            r1 = re.search(RANGE_REGEX, v)
            if r1 is None:
                result.append(v)
                continue
            start = int(r1.group("s"))
            end = int(r1.group("e"))
            if start > end:
                for i in range(end, start + 1):
                    result.append("%d" % i)
            else:
                for i in range(start, end + 1):
                    result.append("%d" % i)
        return sorted(set(result))
    except Exception as e:
        logger.warn("failed to expand range (%s): %s" % (value, e))
        return ["%s" % value]
Пример #3
0
    def __init__(self, game, network, train_directory):
        self.game = game
        env_name = '%sNoFrameskip-v4' % game
        env = gym.make(env_name)
        #env = gym.wrappers.Monitor(env, '/tmp/temp_%s' % game, mode='evaluation', force=True)

        vb_file = os.path.join(train_directory, "vb.npy")
        vb = np.load(vb_file)
        parameters_file = sorted(os.listdir(train_directory))[-3]

        self.policy = Policy(env, network, "elu")

        parameters_path = os.path.join(train_directory, parameters_file)
        print('Using parameters file %s \n' % parameters_path)

        with open(parameters_path, 'rb') as f:
            parameters = pickle.load(f)['parameters']

        self.policy.set_parameters(parameters)
        self.policy.set_vb(vb)
Пример #4
0
    def gen_format_all(self, usage):
        """
        generates string for format all
        """
        format_string = " "
        core_strings = []
        for core, usage in usage.items():
            if core == 'usage_cpu' and self.exclude_average:
                continue
            elif core == 'usage':
                continue

            core = core.replace('usage_', '')
            string = self.formatter.vformat(self.format_all, (),
                                            {'core': core, 'usage': usage})
            core_strings.append(string)

        core_strings = sorted(core_strings)

        return format_string.join(core_strings)
Пример #5
0
    def gen_format_all(self, usage):
        """
        generates string for format all
        """
        format_string = " "
        core_strings = []
        for core, usage in usage.items():
            if core == 'usage_cpu' and self.exclude_average:
                continue
            elif core == 'usage':
                continue

            core = core.replace('usage_', '')
            string = self.formatter.format(format_string=self.format_all,
                                           core=core,
                                           usage=usage)
            core_strings.append(string)

        core_strings = sorted(core_strings)

        return format_string.join(core_strings)
Пример #6
0
            # subsample
            ds = ds.sample(n=n_cells if n_cells < ds.shape[0] else ds.shape[0],
                           replace=False).loc[:, genes]

            # sort by timepoint, patient
            d = data.obs.loc[ds.index].sort_values(['patient_id', 'timepoint'])
            ds = ds.loc[d.index, :]

            colors = list()
            for f, cmap in [
                ('patient_id', 'tab20'),
                ('timepoint', 'tab20'),
                    # ('cell_type', 'tab20')
            ]:
                q = data.obs.loc[ds.index, f]
                qq = sorted(q.unique())
                m = dict(zip(qq, range(len(qq))))
                cmap = plt.get_cmap(cmap)
                colors.append(cmap([m[x] for x in q]))

            g = sns.clustermap(ds.T.dropna().T,
                               row_cluster=False,
                               row_colors=colors,
                               metric="euclidean",
                               xticklabels=False,
                               yticklabels=False,
                               cbar_kws={"label": "Expression\n(Z-score)"},
                               cmap="RdBu_r",
                               center=0,
                               robust=True)
            g.ax_heatmap.set_xlabel("Genes (n = {})".format(ds.shape[1]))
Пример #7
0
def search_entries(entries,
                   regex,
                   ignore_case=False,
                   sort_reg=None,
                   sort_neg=False,
                   retro_option=False,
                   retro_space=False,
                   retro_month=False,
                   retro_full=False,
                   retro_deletion=False):
    """
    loop through entries list and print each full entry that matches regex
    """

    results = []
    logging.debug("regex: %s, ignore_case: %r, negative: %r" %
                  (regex, ignore_case, sort_neg))
    start = time.time()
    matches = 0
    if ignore_case and regex is not None:
        for e in entries:
            matched = False
            if re.search(regex, e, re.IGNORECASE):
                matches += 1
                matched = True
            if (matched and not sort_neg) or (not matched and sort_neg):
                results.append(e)
    elif regex is not None:
        for e in entries:
            matched = False
            if re.search(regex, e):
                matches += 1
                matched = True
            if (matched and not sort_neg) or (not matched and sort_neg):
                results.append(e)
    else:
        # get all entries
        results = entries

    logging.debug("search time: %s" % td(start, time.time()))
    logging.debug("search matches: %d" % matches)
    if len(results) == 0:
        logging.debug("no results to print")
        return

    # sort the 'filtered' results into final result list
    final_results = []
    if sort_reg is not None:
        logging.debug("sort_reg: %s" % sort_reg)
        sort_match_count = 0
        start = time.time()
        ts_results_default = []  # catch all if we don't match any results
        ts_results = {}
        for r in results:
            r1 = re.search(sort_reg, r)
            if r1 is not None:
                sort_match_count += 1
                m = r1.group("m")
                if m not in ts_results:
                    ts_results[m] = []
                ts_results[m].append(r)
            else:
                ts_results_default.append(r)
        # final results is combined ts_results_default and sorted results
        final_results = ts_results_default
        for k in sorted(ts_results):
            final_results += ts_results[k]
        logging.debug("sort attribute time: %s" % td(start, time.time()))
        logging.debug("sort attribute matches: %d" % sort_match_count)
    else:
        final_results = results

    # print all results (peek first for suppot for retro_peek)
    if retro_option:
        retro_peek = results[0].splitlines()[0]
        if "# aaaModLR" in retro_peek:
            new_results = aaa_entry_parse(final_results, retro_full)
        elif "# eventRecord" in retro_peek:
            new_results = event_entry_parse(final_results, retro_full)
        elif "# faultRecord" in retro_peek:
            new_results = fault_entry_parse(final_results, retro_full,
                                            retro_deletion)
        else:
            logging.error("Unable to Determine Record Type")
            new_results = final_results

        if retro_space:
            new_results = space_option(new_results)
        for e in new_results:
            if retro_month:
                print(letter_month(e))
            else:
                print(e)
    # normal non-retro printing
    else:
        for r in final_results:
            print("".join(r))
Пример #8
0
def execute_snapshot(snapshot_id):
    """ perform snapshot operation for provided fabric name and definition name.
        The snapshot will be stored in config["DATA_DIR"]. The progress of the
        collection and final result are saved to the database.

        format of snapshot:
            snapshot.<fabric-name>.<date>.tgz
            /md5checksum                checksum of bundle.tgz
            /bundle.tgz
                /snapshot.json          snapshot object attributes
                /definition.json
                /node-X                 note, node-0 is for global objects
                    /<classname.json>   per-object class collection

    """
    from ..utils import (get_app_config, format_timestamp, pretty_print)
    logger.debug("execute snapshot for: %s" % snapshot_id)

    config = get_app_config()
    src = config.get("TMP_DIR", "/tmp/")
    dst = config.get("DATA_DIR", "/tmp/")

    reg = "topology/pod-[0-9]+/node-(?P<node>[0-9]+)(/|$)"
    reg = re.compile(reg)

    # verify that snapshot object exists, if not abort since no way to alert
    # user of final result
    ts = time.time()
    apic = None
    s = Snapshots.load(_id=snapshot_id)
    if not s.exists():
        logger.warn("snapshot %s not found" % snapshot_id)
        return

    filename = "snapshot.%s.%s.tgz" % (s.fabric, format_timestamp(ts,
                                                                  msec=True))
    filename = get_unique_filename(dst, filename)
    src = "%s/%s" % (src, re.sub("\.tgz$", "", filename))

    # update snapshot state with error message and error status
    def fail(msg="", cleanup=True):
        if len(msg) > 0: s.error = msg
        s.status = "error"
        s.total_time = abs(time.time() - ts)
        logger.debug("snapshot %s failed: %s" % (s._id, s.error))
        if not s.save():
            logger.warn("failed to save snapshot %s state" % s._id)
        if cleanup: tmp_cleanup()
        return

    # clean up working directory
    def tmp_cleanup():
        if os.path.isdir(src):
            logger.debug("cleanup directory %s" % src)
            try:
                shutil.rmtree(src)
            except Exception as e:
                logger.warn("failed to cleanup directory(%s): %s" % (src, e))
        else:
            logger.debug("cleanup directory %s not found" % src)
        if apic is not None: apic.close()

    # update snapshot progress percentage
    def progress(i, total):
        abort_snapshot()  # check for abort signal before progress update
        if total > 0: p = round(i / (1.0 * total), 2)
        else: p = 0
        s.progress = p
        s.total_time = abs(time.time() - ts)
        if not s.save():
            logger.warn("failed to save snapshot %s state" % s._id)
        return

    # user can force snapshot execution to stop by deleting the object
    # abort_snapshot is checked at a regular interval.  If snapshot was deleted
    # then perform cleanup and return True, else return False
    def abort_snapshot():
        _s = Snapshots.load(_id=s._id)
        if not _s.exists():
            logger.debug("snapshot %s has been deleted, aborting" % s._id)
            tmp_cleanup()
            raise Exception("snapshot operation %s aborted" % s._id)

    # write dict data to specified path
    def json_write(path, data):
        if not re.search("\.json$", path): path = "%s.json" % path
        if not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))
        logger.debug("saving to %s" % (path))
        with open("%s" % path, "w") as f:
            json.dump(data, f)

    # init progress, error, status
    s.nodes = []
    s.fabric_domain = ""
    s.progess = 0.0
    s.filename = ""
    s.filesize = 0
    s.start_time = ts
    s.total_time = 0
    s.wait_time = 0
    s.error = ""
    s.status = "running"
    s.save()

    try:
        # setup working directory to store collection outputs
        if os.path.exists(src):
            return fail("tmp directory already exists: %s" % src,
                        cleanup=False)
        os.makedirs(src)

        # get definition managed_objects to collect
        d = Definitions.load(definition=s.definition)
        if not d.exists():
            return fail("unable to read definition: %s" % s.definition)
        managed_objects = d.get_managed_objects()

        # ensure all required objects are within definition.  If not add them
        for r in Remap.REQUIRED_MANAGED_OBJECTS:
            if r not in managed_objects:
                logger.debug("adding required mo to definition: %s" % r)
                mo = ManagedObjects(classname=r)
                mo.analyzer = "exclude"
                managed_objects[mo.classname] = mo.to_json()

        # create apic session
        apic = aci_utils.get_apic_session(s.fabric)
        if apic is None: return fail("unable to connect to %s apic" % s.fabric)

        ret = aci_utils.get_class(apic, "infraCont")
        if ret is not None and len(ret)>0 and "infraCont" in ret[0] and \
            "attributes" in ret[0]["infraCont"] and \
            "fbDmNm" in ret[0]["infraCont"]["attributes"]:
            s.fabric_domain = ret[0]["infraCont"]["attributes"]["fbDmNm"]

        complete_count = 0
        for classname in managed_objects:
            complete_count += 1
            o = managed_objects[classname]
            if len(o["classname"]) == 0:
                logger.warn("%s ignoring empty classname %s" %
                            (s.definition, o["classname"]))
                continue
            _t = time.time()
            response_classes = []
            logger.debug("getting data for class %s" % o["classname"])
            if o["pseudo"]:
                # for pseudo mo, collect each of the classnames defined in the
                # corresponding customer analyzer.  If no customer analyzer then
                # no objects will be collected...
                if o["analyzer"] in ManagedObjects.ANALYZERS and \
                    "classnames" in ManagedObjects.ANALYZERS[o["analyzer"]]:
                    ret = []
                    for ac in ManagedObjects.ANALYZERS[
                            o["analyzer"]]["classnames"]:
                        order = "%s.%s" % (ac, "dn")  # static to dn for now
                        sret = aci_utils.get_class(apic, ac, orderBy=order)
                        if sret is not None:
                            ret += sret
                            response_classes.append(ac)
            elif len(o["uri"]) > 0:
                ret = aci_utils.get(apic, o["uri"])
                response_classes.append(o["classname"])
            else:
                order = "%s.%s" % (o["classname"], o["key"])
                ret = aci_utils.get_class(apic, o["classname"], orderBy=order)
                response_classes.append(o["classname"])
            s.wait_time += abs(time.time() - _t)
            # if failed to get object, might not exists on current version of
            # code.  just continue
            if ret is None:
                progress(complete_count, len(managed_objects) + 1)
                continue

            # need to parse each received object and group based on node-id
            cc_match = None  # cache expected response_classes value (usually the
            # same for all objects expect for pseudo case)
            nodes = {}
            for c in ret:
                node = "0"
                if type(c) is dict and len(c) >= 1:
                    cc = None
                    # check for each possible response_class in returned object
                    if cc_match is not None and cc_match in c:
                        cc = c[cc_match]
                    else:
                        for rclass in response_classes:
                            if rclass in c:
                                cc_match = rclass
                                cc = c[cc_match]
                                break
                            else:
                                # check if 'children' is present and first level
                                # child has corresponding object. before child
                                # check, try to extract node-id (i.e., stats
                                # object node-id from original parent object)
                                tc = c[c.keys()[0]]
                                if "attributes" in tc and "dn" in tc[
                                        "attributes"]:
                                    r = reg.search(tc["attributes"]["dn"])
                                    if r is not None: node = r.group("node")
                                if "children" in tc:
                                    for child in tc["children"]:
                                        if rclass in child:
                                            cc = child[rclass]
                                            c = child
                                            break
                    if cc is None:
                        logger.debug("failed to extract data for %s from %s" %
                                     (o["classname"], c))
                        continue
                    if type(cc) is dict and len(
                            cc) >= 1 and "attributes" in cc:
                        if "dn" in cc["attributes"]:
                            r = reg.search(cc["attributes"]["dn"])
                            if r is not None: node = r.group("node")
                        if node not in nodes: nodes[node] = []
                        if node not in s.nodes: s.nodes.append(node)
                        nodes[node].append(c)
                        continue
                logger.debug("skipping unsupported object: %s" % c)

            # for each node, write results to file
            for n in nodes:
                json_write("%s/node-%s/%s" % (src, n, o["classname"]),
                           nodes[n])

            # check for abort and then update progress
            progress(complete_count, len(managed_objects) + 1)

        # sort nodes before saving
        s.nodes = sorted(s.nodes)

        json_write("%s/snapshot" % src, s.to_json())
        json_write("%s/definition" % src, {
            "definition": s.definition,
            "managed_objects": managed_objects
        })

        # bundle all files
        ret = aci_utils.run_command("cd %s ; tar -zcvf bundle.tgz ./*" % src)
        if ret is None: return fail("failed to create snapshot bundle")

        # get md5 for bundle
        md5 = aci_utils.get_file_md5("%s/bundle.tgz" % src)
        if md5 is None: return fail("failed to calculate bundle md5")
        json_write("%s/md5checksum" % src, {"md5checksum": md5})

        # create final bundle with bundle.tgz and md5checksum.json
        cmd = "cd %s ; tar --force-local -zcvf %s " % (src, filename)
        cmd += "./bundle.tgz ./md5checksum.json"
        if aci_utils.run_command(cmd) is None:
            return fail("failed to create snapshot bundle")

        # to prevent race condition of 'delete' operation during compression,
        # perform one last abort check before moving complete file to dst
        abort_snapshot()

        # create the directory if not present
        if not os.path.isdir(dst): os.makedirs(dst)

        # move final bundle to dst directory and cleanup tmp directory
        if aci_utils.run_command("mv %s/%s %s/" % (src, filename, dst)):
            return fail("failed to save snapshot bundle")
        tmp_cleanup()

        # after everything is complete, update progress to 100% and complete
        s.progress = 1.0
        s.status = "complete"
        s.filename = "%s/%s" % (dst, filename)
        s.filesize = os.path.getsize("%s/%s" % (dst, filename))
        s.total_time = abs(time.time() - ts)
        if not s.save(): logger.warn("failed to save snapshot status")
        logger.debug("snapshot %s complete: %s " % (snapshot_id, s))

    except Exception as e:
        logger.debug(traceback.format_exc())
        fail("unexpected error occurred: %s" % e)
Пример #9
0
def gen_gif(path_in,
            pattern='.*',
            fn_out='movie.gif',
            duration=0.5,
            file_range=None,
            repeat={},
            path_out=None,
            user_confirm=True):
    """Generate a gif from a collection of images in a given directory.
    path_in:        path of input images
    pattern         regular expression matching file to include in gif
    fn_out          filename for output gif
    duration        duration between frames in seconds
    file_range      replaces "{number}" in pattern with re matching a range of numbers
    repeat          dict of frame numbers and the number of times those frames should be repeated
    path_out        directory to write gif to
    """
    assert os.path.isdir(path_in)
    if path_out is None:
        path_out = path_in
    assert os.path.isdir(path_out)

    if (file_range is not None) and ('{range}' in fn_out):
        fn_out.format(range='{}-{}'.format(file_range[0], file_range[1]))

    if file_range is not None:
        assert '{number}' in pattern, 'Include "{number}" in pattern when using file range'
        pattern = pattern.format(number=regexp_range(*file_range))

    filenames = fn_filter(path_in, pattern)
    filenames = sorted(filenames)

    nframes = len(filenames)
    assert nframes > 0, 'No frames to create gif from'

    if -1 in repeat.keys(
    ):  # If repeating final frame, replace '-1' with index
        repeat[nframes - 1] = repeat[-1]
        repeat.pop(-1)

    if user_confirm:
        print('{} frames will be combined into gif in: {}'.format(
            nframes, os.path.join(path_in, fn_out)))
        if nframes < 60:
            pprint(filenames)
        choice = input('Proceed? [y/n]: ')
        if not choice == 'y':
            print('gif was not produced')
            return  ## return from function without renaming

    with imageio.get_writer(os.path.join(path_out, fn_out),
                            mode='I',
                            format='GIF-FI',
                            duration=0.12) as writer:  # duration = 0.4

        for i, filename in enumerate(filenames):
            image = imageio.imread(os.path.join(path_in, filename))
            writer.append_data(image)
            if repeat is not None and i in repeat.keys():
                for j in np.arange(repeat[i]):
                    writer.append_data(image)

    print('Wrote gif containing {} frames to: {}'.format(
        nframes, os.path.join(path_in, fn_out)))
Пример #10
0
def execute_compare(compare_id):
    """ perform comparison between two snapshots """
    logger.debug("execute compare for: %s" % compare_id)

    ts = time.time()    
    c = Compare.load(_id=compare_id)
    if not c.exists():
        logger.warn("compare object %s not found" % compare_id)
        return

    # init progress, error, status to block second process from running
    c.progess = 0.0
    c.start_time = ts
    c.total_time = 0
    c.error = ""
    c.status = "running"
    c.save()

    # delete any previous CompareResults
    CompareResults.delete(_filters={"compare_id":c._id})

    # create a working directory to extract objects
    config = get_app_config()
    tmp = config.get("TMP_DIR", "/tmp/")
    tmp = "%s/compare.%s.%s" % (tmp, c._id, int(ts)) 
    
    # update state with error message and error status
    def fail(msg="", cleanup=True):
        if len(msg)>0: c.error = msg
        c.status = "error"
        c.total_time = abs(time.time() - ts)
        logger.debug("compare %s failed: %s" % (c._id, c.error))
        if not c.save(): logger.warn("failed to save compare %s state"%c._id)
        if cleanup: tmp_cleanup()
        return

    # clean up working directory
    def tmp_cleanup():
        if os.path.isdir(tmp):
            logger.debug("cleanup directory %s" % tmp)
            try: shutil.rmtree(tmp)
            except Exception as e: 
                logger.warn("failed to cleanup directory(%s): %s"%(tmp,e))
        else: logger.debug("cleanup directory %s not found" % tmp)

    # update snapshot progress percentage
    def progress(i, total):
        abort_compare() # check for abort signal before progress update
        if total>0: p = round(i/(1.0*total), 2)
        else: p = 0
        if p > 1: p = 1
        c.progress = p
        c.total_time = abs(time.time() - ts)
        if not c.save(): logger.warn("failed to save compare %s state"%c._id)
        logger.debug("progress %s/%s = %s" % (i, total, p))
        return

    # user can force compare execution to stop by deleting the object
    # abort_compare is checked at a regular interval.  If compare was deleted
    # then perform cleanup and return True, else return False
    def abort_compare():
        _s = Compare.load(_id=c._id)
        if not _s.exists() or _s.status == "abort":
            logger.debug("compare object %s has been aborted"%c._id)
            tmp_cleanup()
            raise Exception("compare operation %s aborted" % c._id)

    # return dict representation of json in provided filename
    def get_json_data(fname):
        with open(fname, "r") as f:
            return json.load(f)

    # load snapshot1 (s1) and snapshot2 (s2) along with definition in compare object
    s1 = Snapshots.load(_id=c.snapshot1)
    s2 = Snapshots.load(_id=c.snapshot2)
    d = Definitions.load(definition=c.definition)
    classnames = []
    if not s1.exists(): return fail("snapshot %s not found" % c.snapshot1)
    if not s2.exists(): return fail("snapshot %s not found" % c.snapshot2)
    if not s1.status=="complete": 
        return fail("snapshot %s not complete"%c.snapshot1)
    if not s2.status=="complete":
        return fail("snapshot %s not complete"%c.snapshot2)
    if not d.exists(): 
        logger.debug("unknown definition provided in compare request: '%s'", c.definition)
    else:
        classnames = d.get_managed_objects()

    try:
        # create tmp directory for extracting files
        if os.path.exists(tmp):
            return fail("tmp directory already exists: %s"%tmp, cleanup=False)
        logger.debug("make directory: %s" % tmp)
        os.makedirs(tmp)

        # extract snaphots to tmp directories
        for index, s in enumerate([s1, s2]):
            td = "%s/%s/%s" % (tmp, index, s._id)
            logger.debug("make directory: %s" % td)
            os.makedirs(td)
            if not os.path.exists(s.filename):
                return fail("snapshot file %s not found" % s.filename)
            shutil.copy(s.filename, td)
            fname = s.filename.split("/")[-1]
            # extract snapshot contents
            cmd = "cd %s ; tar --force-local -zxf %s" % (td, fname)
            if aci_utils.run_command(cmd) is None:
                return fail("failed to extract snapshot %s" % s.id)
            for f in ["bundle.tgz", "md5checksum.json"]:
                if not os.path.exists("%s/%s" % (td, f)):
                    return fail("snapshot %s missing required file %s"%(s.id,f))
            # check bundle against md5
            embedded = get_json_data("%s/md5checksum.json" % td)["md5checksum"]
            md5 = aci_utils.get_file_md5("%s/bundle.tgz" % td)
            if md5 is None or md5!=embedded:
                return fail("snapshot %s invalid md5(%s) expected(%s)" % (
                    s.id, md5, embedded))
            # extract bundle
            cmd = "cd %s ; tar -zxf %s" % (td, "bundle.tgz")
            if aci_utils.run_command(cmd) is None:
                return fail("failed to extract bundle %s" % s.id)
           
        # get the union of the list of classes from both s1 and s2 definitions,
        # use the managed_object definition from s2 (should always be the same) 
        # use the definition in s2 for list of objects, attributes, and flags
        # if the classname is not present in definition from s1 then skip it
        js1 = get_json_data("%s/0/%s/definition.json" % (tmp, s1._id))
        js2 = get_json_data("%s/1/%s/definition.json" % (tmp, s2._id))
        if len(js1)==0: return fail("invalid definition in snapshot %s"%s1._id)
        if len(js2)==0: return fail("invalid definition in snapshot %s"%s2._id)
        s1_classes = [cn for cn in js1["managed_objects"].keys()]
        managed_objects = {}
        for classname in js2["managed_objects"]:
            if classname not in s1_classes:
                logger.debug("skipping %s not in snapshot1" % classname)
                continue
            managed_objects[classname] = js2["managed_objects"][classname]

        # build list of all nodes (join of s1 and s2 nodes)
        nodes =[ int(x) for x in sorted(list(set().union(s1.nodes, s2.nodes)))]

        # create pool to perform work
        pool = Pool(processes=config.get("MAX_POOL_SIZE", 4))

        filtered_nodes = []
        for n in nodes:
            if len(c.nodes)==0 or n in c.nodes: filtered_nodes.append(n)

        # build out work that needs to be completed
        class_work = [] # list of classes to perform 'class' work
        analyzer_work = [] # list of customer analyzers to work
        for classname in managed_objects:
            mo = managed_objects[classname]
            if len(classnames)>0 and classname not in classnames: continue
            if mo["analyzer"]=="" or mo["analyzer"]=="default": class_work.append(classname)
        for analyzer in ManagedObjects.ANALYZERS:
            # skip empty analyzer and default analyzer as they are 'class-work'
            if len(analyzer)==0 or analyzer=="default": continue
            # if a filter is set on the comparison for specific classname, then
            # ensure should be the name of the analyzer
            if len(classnames)==0 or analyzer in classnames: analyzer_work.append(analyzer)

        # think of progress as the sum of all tasks that need to be completed.
        # one task for each class in 'class_work' per node plus
        # one task for each analyzer in 'analyzer_work' per node plus
        # two task per node to for remapping (one for each snapshot)
        total_progress =len(class_work)*len(filtered_nodes) + \
                        len(analyzer_work)*len(filtered_nodes) + \
                        len(filtered_nodes)*2
        current_progress = 0
        progress(current_progress, total_progress)

        # perform comparision for each node
        for i,n in enumerate(filtered_nodes):
            if n!="0" and c.remap:
                # build remap for s1 and s2 
                r1 = Remap(n, "%s/0/%s/node-%s" % (tmp, s1._id, n))
                current_progress+=1
                r2 = Remap(n, "%s/1/%s/node-%s" % (tmp, s2._id, n))
                current_progress+=1
            else: 
                # create disabled remap objects
                r1 = Remap(n)
                r2 = Remap(n) 
                current_progress+=2
            progress(current_progress, total_progress) 
    
            work = []
            # build out classwork
            for classname in class_work:
                o = managed_objects[classname]
                cn = "node-%s/%s.json" % (n, classname)
                f1 = "%s/0/%s/%s" % (tmp, s1._id, cn)
                f2 = "%s/1/%s/%s" % (tmp, s2._id, cn)
                work.append(("class", c, o, f1, f2, r1, r2))

            # build out analyzer work
            f1 = "%s/0/%s/node-%s/" % (tmp, s1._id, n)
            f2 = "%s/1/%s/node-%s/" % (tmp, s2._id, n)
            for analyzer in analyzer_work:
                work.append(("custom", analyzer, c, f1, f2, r1, r2 ))
                
            # start the work using multiprocessing or serialize
            if c.serialize:
                for w in work: 
                    generic_compare(w)
                    current_progress+=1
                    progress(current_progress, total_progress) 
            else:
                pool.map(generic_compare, work)
                current_progress+= len(work)
                progress(current_progress, total_progress) 
                
        # cleanup tmp extracted files
        tmp_cleanup() 

        # create totals by summing over current compareResults
        counters = ["created", "modified", "deleted", "equal"]
        base = {"created":0, "modified":0, "deleted":0, "equal":0}
        total = copy.deepcopy(base)
        per_class = {}  # indexed per classname
        per_node = {}   # indexed per node_id
        results = CompareResults.read(compare_id=c._id)
        for o in results["objects"]:
            if o["classname"] not in per_class:
                per_class[o["classname"]] = copy.deepcopy(base)
                per_class[o["classname"]]["classname"] = o["classname"]
                per_class[o["classname"]]["name"] = o["name"]
            if o["node_id"] not in per_node:
                per_node[o["node_id"]] = copy.deepcopy(base)
                per_node[o["node_id"]]["node_id"] = o["node_id"]
            for counter in counters:
                per_class[o["classname"]][counter]+= o["total"][counter]
                per_node[o["node_id"]][counter]+= o["total"][counter]
                total[counter]+= o["total"][counter]
        # save totals
        c.total = total
        c.total_per_class = [per_class[k] for k in sorted(per_class.keys())]
        c.total_per_node = [per_node[k] for k in sorted(per_node.keys())]

        # after everything is complete, update progress to 100% and complete
        c.progress = 1.0
        c.status = "complete"
        c.total_time = abs(time.time() - ts)
        if not c.save(): logger.warn("failed to save compare status")
        logger.debug("compare complete: %s " % c._id)

    except Exception as e:
        logger.debug(traceback.format_exc())
        fail("unexpected error occurred: %s" % e)