def __init__(self, circuits, model, dataset=None, num_sub_tables=None, num_table_processors=1, num_param_dimension_processors=(), param_dimensions=(), param_dimension_blk_sizes=(), resource_alloc=None, verbosity=0): unique_circuits, to_unique = self._compute_unique_circuits(circuits) aliases = circuits.op_label_aliases if isinstance(circuits, _CircuitList) else None ds_circuits = _lt.apply_aliases_to_circuits(unique_circuits, aliases) unique_complete_circuits = [model.complete_circuit(c) for c in unique_circuits] #Create evenly divided groups of indices of unique_complete_circuits max_sub_table_size = None # was an argument but never used; remove in future assert(max_sub_table_size is None), "No support for size-limited subtables yet!" ngroups = num_sub_tables groups = [set(sub_array) for sub_array in _np.array_split(range(len(unique_complete_circuits)), ngroups)] #atoms = [] #elindex_outcome_tuples = {unique_i: list() for unique_i in range(len(unique_circuits))} # #offset = 0 #for group in groups: # atoms.append(_TermCOPALayoutAtom(unique_complete_circuits, ds_circuits, group, model, # dataset, offset, elindex_outcome_tuples)) # offset += atoms[-1].num_elements def _create_atom(group): return _TermCOPALayoutAtom(unique_complete_circuits, ds_circuits, group, model, dataset) super().__init__(circuits, unique_circuits, to_unique, unique_complete_circuits, _create_atom, groups, num_table_processors, num_param_dimension_processors, param_dimensions, param_dimension_blk_sizes, resource_alloc, verbosity)
def apply_aliases(self): """ Applies any operation-label aliases to this circuit list. Returns ------- list A list of :class:`Circuit`s. """ return _lt.apply_aliases_to_circuits(self._circuits, self.op_label_aliases)
def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_processors=1, num_param_dimension_processors=(), param_dimensions=(), param_dimension_blk_sizes=(), resource_alloc=None, verbosity=0): #OUTDATED: TODO - revise this: # 1. pre-process => get complete circuits => spam-tuples list for each no-spam circuit (no expanding yet) # 2. decide how to divide no-spam circuits into groups corresponding to sub-strategies # - create tree of no-spam circuits (may contain instruments, etc, just not SPAM) # - heuristically find groups of circuits that meet criteria # 3. separately create a tree of no-spam expanded circuits originating from each group => self.atoms # 4. assign "cache" and element indices so that a) all elements of a tree are contiguous # and b) elements with the same spam-tuple are continguous. # 5. initialize base class with given per-original-circuit element indices. unique_circuits, to_unique = self._compute_unique_circuits(circuits) aliases = circuits.op_label_aliases if isinstance(circuits, _CircuitList) else None ds_circuits = _lt.apply_aliases_to_circuits(unique_circuits, aliases) unique_complete_circuits = [model.complete_circuit(c) for c in unique_circuits] #Note: "unique" means a unique circuit *before* circuit-completion, so there could be duplicate # "unique circuits" after completion, e.g. "rho0Gx" and "Gx" could both complete to "rho0GxMdefault_0". circuits_by_unique_nospam_circuits = _collections.OrderedDict() for i, c in enumerate(unique_complete_circuits): _, nospam_c, _ = model.split_circuit(c) if nospam_c in circuits_by_unique_nospam_circuits: circuits_by_unique_nospam_circuits[nospam_c].append(i) else: circuits_by_unique_nospam_circuits[nospam_c] = [i] unique_nospam_circuits = list(circuits_by_unique_nospam_circuits.keys()) # Split circuits into groups that will make good subtrees (all procs do this) max_sub_tree_size = None # removed from being an argument (unused) if (num_sub_trees is not None and num_sub_trees > 1) or max_sub_tree_size is not None: circuit_tree = _EvalTree.create(unique_nospam_circuits) groups, helpful_scratch = circuit_tree.find_splitting(len(unique_nospam_circuits), max_sub_tree_size, num_sub_trees, verbosity - 1) #print("%d circuits => tree of size %d" % (len(unique_nospam_circuits), len(circuit_tree))) else: groups = [set(range(len(unique_nospam_circuits)))] helpful_scratch = [set()] # (elements of `groups` contain indices into `unique_nospam_circuits`) # Divide `groups` into num_tree_processors roughly equal sets (each containing # potentially multiple groups) #my_group_indices, group_owners, grp_subcomm = self._distribute(num_tree_processors, len(groups), # resource_alloc, verbosity) #my_group_indices = set(my_group_indices) #my_atoms = [] #elindex_outcome_tuples = _collections.OrderedDict([ # (orig_i, list()) for orig_i in range(len(unique_circuits))]) # #offset = 0 #for i, (group, helpful_scratch_group) in enumerate(zip(groups, helpful_scratch)): # if i not in my_group_indices: continue # my_atoms.append(_MatrixCOPALayoutAtom(unique_complete_circuits, unique_nospam_circuits, # circuits_by_unique_nospam_circuits, ds_circuits, # group, helpful_scratch_group, model, dataset, offset, # elindex_outcome_tuples)) # offset += my_atoms[-1].num_elements def _create_atom(args): group, helpful_scratch_group = args return _MatrixCOPALayoutAtom(unique_complete_circuits, unique_nospam_circuits, circuits_by_unique_nospam_circuits, ds_circuits, group, helpful_scratch_group, model, dataset) super().__init__(circuits, unique_circuits, to_unique, unique_complete_circuits, _create_atom, list(zip(groups, helpful_scratch)), num_tree_processors, num_param_dimension_processors, param_dimensions, param_dimension_blk_sizes, resource_alloc, verbosity)
def __init__(self, circuits, model, dataset=None, max_cache_size=None, num_sub_tables=None, num_table_processors=1, num_param_dimension_processors=(), param_dimensions=(), param_dimension_blk_sizes=(), resource_alloc=None, verbosity=0): unique_circuits, to_unique = self._compute_unique_circuits(circuits) aliases = circuits.op_label_aliases if isinstance( circuits, _CircuitList) else None ds_circuits = _lt.apply_aliases_to_circuits(unique_circuits, aliases) unique_complete_circuits = [ model.complete_circuit(c) for c in unique_circuits ] unique_povmless_circuits = [ model.split_circuit(c, split_prep=False)[1] for c in unique_complete_circuits ] max_sub_table_size = None # was an argument but never used; remove in future if (num_sub_tables is not None and num_sub_tables > 1) or max_sub_table_size is not None: circuit_table = _PrefixTable(unique_povmless_circuits, max_cache_size) groups = circuit_table.find_splitting(max_sub_table_size, num_sub_tables, verbosity=verbosity) else: groups = [set(range(len(unique_complete_circuits)))] #atoms = [] #elindex_outcome_tuples = _collections.OrderedDict( # [(unique_i, list()) for unique_i in range(len(unique_circuits))]) #offset = 0 #for group in groups: # atoms.append(_MapCOPALayoutAtom(unique_complete_circuits, ds_circuits, to_orig, group, # model, dataset, offset, elindex_outcome_tuples, max_cache_size)) # offset += atoms[-1].num_elements def _create_atom(group): return _MapCOPALayoutAtom(unique_complete_circuits, ds_circuits, group, model, dataset, max_cache_size) super().__init__(circuits, unique_circuits, to_unique, unique_complete_circuits, _create_atom, groups, num_table_processors, num_param_dimension_processors, param_dimensions, param_dimension_blk_sizes, resource_alloc, verbosity) # For time dependent calcs: # connect unique -> orig indices of final layout now that base class has created it # (don't do this before because the .circuits of this local layout may not be *all* the circuits, # or in the same order - this is only true in the *global* layout. unique_to_orig = { unique_i: orig_i for orig_i, unique_i in self._to_unique.items() } # unique => orig. indices for atom in self.atoms: for expanded_circuit_i, unique_i in atom.unique_indices_by_expcircuit.items( ): atom.orig_indices_by_expcircuit[ expanded_circuit_i] = unique_to_orig[unique_i]
def create_from(cls, circuits, model=None, dataset=None, param_dimensions=(), resource_alloc=None): """ Creates a simple layout from a list of circuits. Optionally, a model can be used to "complete" (add implied prep or POVM layers) circuits, and a dataset to restrict the layout's elements to the observed outcomes. Parameters ---------- circuits : list of Circuits The circuits to include in the layout. Note that the produced layout may not retain the ordering of these circuits internally, but that it's `.global_layout` does. model : Model, optional A model used to "complete" the circuits (add implied prep and/or POVM layers). Usually this is a/the model that will be used to compute outcomes probabilities using this layout. If `None`, then each element of `circuits` is assumed to be a complete circuit, i.e., to begin with a state preparation layer and end with a POVM layer. dataset : DataSet, optional If not None, restrict what is simplified to only those probabilities corresponding to non-zero counts (observed outcomes) in this data set. param_dimensions : tuple, optional A tuple containing, optionally, the parameter-space dimension used when taking first and second derivatives with respect to the circuit outcome probabilities. resource_alloc : ResourceAllocation, optional The resources available for computing circuit outcome probabilities. Returns ------- CircuitOutcomeProbabilityArrayLayout """ circuits = circuits if isinstance( circuits, _CircuitList) else _CircuitList(circuits) unique_circuits, to_unique = cls._compute_unique_circuits(circuits) unique_complete_circuits = [model.complete_circuit(c) for c in unique_circuits] \ if (model is not None) else unique_circuits[:] ds_circuits = _lt.apply_aliases_to_circuits(unique_circuits, circuits.op_label_aliases) # Create a dict of the "present outcomes" of each circuit, defined as those outcomes # for which `dataset` contains data (if `dataset is None` treat *all* outcomes as present). # Note: `circuits` may have duplicates; this is ok: `dataset` doesn't have duplicates so outcomes are the same. # Note2: dict keys are integer unique-circuit indices rather than complete circuits for hashing speed. # If we don't need to expand the instruments and POVMs, then just use the outcomes # given in the dataset or by the op container. if dataset is not None: present_outcomes = { i: dataset[ds_c].outcomes for i, ds_c in enumerate(ds_circuits) } else: present_outcomes = { i: model.circuit_outcomes(c) for i, c in enumerate(unique_circuits) } # Step3: create a dictionary of element indices by concatenating the present outcomes of all # the circuits in order. elindex_outcome_tuples = _collections.OrderedDict() k = 0 for i, c in enumerate(circuits): num_outcomes = len(present_outcomes[i]) elindex_outcome_tuples[i] = tuple([ (k + j, outcome) for j, outcome in enumerate(present_outcomes[i]) ]) k += num_outcomes return cls(circuits, unique_circuits, to_unique, elindex_outcome_tuples, unique_complete_circuits, param_dimensions, resource_alloc)
def two_delta_logl_per_circuit(model, dataset, circuits=None, min_prob_clip=1e-6, prob_clip_interval=(-1e6, 1e6), radius=1e-4, poisson_picture=True, op_label_aliases=None, dof_calc_method=None, wildcard=None, mdc_store=None, comm=None): """ Twice the per-circuit difference between the maximum and actual log-likelihood. Contributions are aggregated over each circuit's outcomes, but no further. Optionally (when `dof_calc_method` is not None) returns parallel vectors containing the Nsigma (# std deviations from mean) and the p-value relative to expected chi^2 distribution for each sequence. Parameters ---------- model : Model Model of parameterized gates dataset : DataSet Probability data circuits : list of (tuples or Circuits), optional Each element specifies a circuit to include in the log-likelihood sum. Default value of None implies all the circuits in dataset should be used. min_prob_clip : float, optional The minimum probability treated normally in the evaluation of the log-likelihood. A penalty function replaces the true log-likelihood for probabilities that lie below this threshold so that the log-likelihood never becomes undefined (which improves optimizer performance). prob_clip_interval : 2-tuple or None, optional (min,max) values used to clip the probabilities predicted by models during MLEGST's search for an optimal model (if not None). if None, no clipping is performed. radius : float, optional Specifies the severity of rounding used to "patch" the zero-frequency terms of the log-likelihood. poisson_picture : boolean, optional Whether the log-likelihood-in-the-Poisson-picture terms should be included in the returned logl value. op_label_aliases : dictionary, optional Dictionary whose keys are operation label "aliases" and whose values are tuples corresponding to what that operation label should be expanded into before querying the dataset. Defaults to the empty dictionary (no aliases defined) e.g. op_label_aliases['Gx^3'] = ('Gx','Gx','Gx') dof_calc_method : {"all", "modeltest"} How `model`'s number of degrees of freedom (parameters) are obtained when computing the number of standard deviations and p-value relative to a chi2_k distribution, where `k` is additional degrees of freedom possessed by the maximal model. wildcard : WildcardBudget A wildcard budget to apply to this log-likelihood computation. This increases the returned log-likelihood value by adjusting (by a maximal amount measured in TVD, given by the budget) the probabilities produced by `model` to optimially match the data (within the bugetary constraints) evaluating the log-likelihood. mdc_store : ModelDatasetCircuitsStore, optional An object that bundles cached quantities along with a given model, dataset, and circuit list. If given, `model` and `dataset` and `circuits` should be set to None. comm : mpi4py.MPI.Comm, optional When not None, an MPI communicator for distributing the computation across multiple processors. Returns ------- twoDeltaLogL_terms : numpy.ndarray Nsigma, pvalue : numpy.ndarray Only returned when `dof_calc_method` is not None. """ from ..objectivefns import objectivefns as _objfns obj_cls = _objfns.PoissonPicDeltaLogLFunction if poisson_picture else _objfns.DeltaLogLFunction obj = _objfns._objfn(obj_cls, model, dataset, circuits, {'min_prob_clip': min_prob_clip, 'radius': radius}, {'prob_clip_interval': prob_clip_interval}, op_label_aliases, comm, None, ('percircuit',), (), mdc_store) if wildcard: assert(poisson_picture), "Wildcard budgets can only be used with `poisson_picture=True`" obj.percircuit() # objfn used within wildcard objective fn must be pre-evaluated obj = _objfns.LogLWildcardFunction(obj, model.to_vector(), wildcard) two_dlogl_percircuit = 2 * obj.layout.allgather_local_array('c', obj.percircuit()) if dof_calc_method is None: return two_dlogl_percircuit elif dof_calc_method == "all": mdl_dof = model.num_params elif dof_calc_method == "modeltest": mdl_dof = model.num_modeltest_params else: raise ValueError("Invalid `dof_calc_method` arg: %s" % dof_calc_method) if circuits is not None: ds_strs = _lt.apply_aliases_to_circuits(circuits, op_label_aliases) else: ds_strs = None ds_dof = dataset.degrees_of_freedom(ds_strs) k = max(ds_dof - mdl_dof, 1) # HACK - just take a single average #dof per circuit to use as chi_k distribution! k = int(_np.ceil(k / (1.0 * len(circuits)))) nsigma = (two_dlogl_percircuit - k) / _np.sqrt(2 * k) pvalue = _np.array([1.0 - _stats.chi2.cdf(x, k) for x in two_dlogl_percircuit], 'd') return two_dlogl_percircuit, nsigma, pvalue
def two_delta_logl(model, dataset, circuits=None, min_prob_clip=1e-6, prob_clip_interval=(-1e6, 1e6), radius=1e-4, poisson_picture=True, op_label_aliases=None, dof_calc_method=None, wildcard=None, mdc_store=None, comm=None): """ Twice the difference between the maximum and actual log-likelihood. Optionally also can return the Nsigma (# std deviations from mean) and p-value relative to expected chi^2 distribution (when `dof_calc_method` is not None). This function's arguments are supersets of :function:`logl`, and :function:`logl_max`. This is a convenience function, equivalent to `2*(logl_max(...) - logl(...))`, whose value is what is often called the *log-likelihood-ratio* between the "maximal model" (that which trivially fits the data exactly) and the model given by `model`. Parameters ---------- model : Model Model of parameterized gates dataset : DataSet Probability data circuits : list of (tuples or Circuits), optional Each element specifies a circuit to include in the log-likelihood sum. Default value of None implies all the circuits in dataset should be used. min_prob_clip : float, optional The minimum probability treated normally in the evaluation of the log-likelihood. A penalty function replaces the true log-likelihood for probabilities that lie below this threshold so that the log-likelihood never becomes undefined (which improves optimizer performance). prob_clip_interval : 2-tuple or None, optional (min,max) values used to clip the probabilities predicted by models during MLEGST's search for an optimal model (if not None). if None, no clipping is performed. radius : float, optional Specifies the severity of rounding used to "patch" the zero-frequency terms of the log-likelihood. poisson_picture : boolean, optional Whether the log-likelihood-in-the-Poisson-picture terms should be included in the computed log-likelihood values. op_label_aliases : dictionary, optional Dictionary whose keys are operation label "aliases" and whose values are tuples corresponding to what that operation label should be expanded into before querying the dataset. Defaults to the empty dictionary (no aliases defined) e.g. op_label_aliases['Gx^3'] = ('Gx','Gx','Gx') dof_calc_method : {None, "all", "modeltest"} How `model`'s number of degrees of freedom (parameters) are obtained when computing the number of standard deviations and p-value relative to a chi2_k distribution, where `k` is additional degrees of freedom possessed by the maximal model. If None, then `Nsigma` and `pvalue` are not returned (see below). wildcard : WildcardBudget A wildcard budget to apply to this log-likelihood computation. This increases the returned log-likelihood value by adjusting (by a maximal amount measured in TVD, given by the budget) the probabilities produced by `model` to optimially match the data (within the bugetary constraints) evaluating the log-likelihood. mdc_store : ModelDatasetCircuitsStore, optional An object that bundles cached quantities along with a given model, dataset, and circuit list. If given, `model` and `dataset` and `circuits` should be set to None. comm : mpi4py.MPI.Comm, optional When not None, an MPI communicator for distributing the computation across multiple processors. Returns ------- twoDeltaLogL : float 2*(loglikelihood(maximal_model,data) - loglikelihood(model,data)) Nsigma, pvalue : float Only returned when `dof_calc_method` is not None. """ from ..objectivefns import objectivefns as _objfns obj_cls = _objfns.PoissonPicDeltaLogLFunction if poisson_picture else _objfns.DeltaLogLFunction obj = _objfns._objfn(obj_cls, model, dataset, circuits, {'min_prob_clip': min_prob_clip, 'radius': radius}, {'prob_clip_interval': prob_clip_interval}, op_label_aliases, comm, None, ('terms',), (), mdc_store) if wildcard: assert(poisson_picture), "Wildcard budgets can only be used with `poisson_picture=True`" obj.terms() # objfn used within wildcard objective fn must be pre-evaluated obj = _objfns.LogLWildcardFunction(obj, model.to_vector(), wildcard) two_delta_logl = 2 * obj.fn() # gathers internally if dof_calc_method is None: return two_delta_logl elif dof_calc_method == "modeltest": mdl_dof = model.num_modeltest_params elif dof_calc_method == "all": mdl_dof = model.num_params else: raise ValueError("Invalid `dof_calc_method` arg: %s" % dof_calc_method) if circuits is not None: ds_strs = _lt.apply_aliases_to_circuits(circuits, op_label_aliases) else: ds_strs = None ds_dof = dataset.degrees_of_freedom(ds_strs) k = max(ds_dof - mdl_dof, 1) if ds_dof <= mdl_dof: _warnings.warn("Max-model params (%d) <= model params (%d)! Using k == 1." % (ds_dof, mdl_dof)) nsigma = (two_delta_logl - k) / _np.sqrt(2 * k) pvalue = 1.0 - _stats.chi2.cdf(two_delta_logl, k) return two_delta_logl, nsigma, pvalue