def mutual_information_feature_selection(dataset, features, target, num_reads=5000): """Run the MIFS algorithm on a QPU solver""" # Set up a QPU sampler that embeds to a fully-connected graph of all the variables sampler = DWaveCliqueSampler() # For each number of features, k, penalize selection of fewer or more features selected_features = np.zeros((len(features), len(features))) bqm = mutual_information_bqm(dataset, features, target) # This ensures that the soltion will satisfy the constraints. penalty = maximum_energy_delta(bqm) for k in range(1, len(features) + 1): kbqm = add_combination_penalty(bqm, k, penalty) sample = sampler.sample(kbqm, label='Example - MI Feature Selection', num_reads=num_reads).first.sample for fi, f in enumerate(features): selected_features[k - 1, fi] = sample[f] return selected_features
def test_properties(self): sampler = DWaveCliqueSampler(failover=True) def mocksample(*args, **kwargs): count = getattr(mocksample, 'count', 0) if count: return dimod.SampleSet.from_samples([], energy=0., vartype='SPIN') else: mocksample.count = count + 1 raise SolverOfflineError sampler.child.sample = mocksample G = sampler.target_graph qlr = sampler.qpu_linear_range qqr = sampler.qpu_quadratic_range self.assertIs(G, sampler.target_graph) self.assertIs(qlr, sampler.qpu_linear_range) self.assertIs(qqr, sampler.qpu_quadratic_range) sampler.sample_ising({}, {}) self.assertIsNot(G, sampler.target_graph) self.assertIsNot(qlr, sampler.qpu_linear_range) self.assertIsNot(qqr, sampler.qpu_quadratic_range)
def test_default(self): sampler = DWaveCliqueSampler() def mocksample(*args, **kwargs): raise SolverOfflineError sampler.child.sample = mocksample with self.assertRaises(SolverOfflineError): sampler.sample_ising({}, {})
def test_pegasus(self): try: sampler = DWaveCliqueSampler(solver=dict(topology__type='pegasus')) except (ValueError, ConfigFileError, SolverNotFoundError): raise unittest.SkipTest("no Pegasus-structured QPU available") dimod.testing.assert_sampler_api(sampler) # submit a maximum ferromagnet bqm = dimod.AdjVectorBQM('SPIN') for u, v in itertools.combinations(sampler.largest_clique(), 2): bqm.quadratic[u, v] = -1 sampler.sample(bqm).resolve()
def test_noretry(self): sampler = DWaveCliqueSampler(failover=True, retry_interval=-1) def mocksample(*args, **kwargs): raise SolverOfflineError sampler.child.sample = mocksample def mocktrigger(*args, **kwargs): raise SolverNotFoundError sampler.child.trigger_failover = mocktrigger with self.assertRaises(SolverNotFoundError): sampler.sample_ising({}, {})
def get_sampler(topology): if topology in _SAMPLERS: return _SAMPLERS[topology] try: _SAMPLERS[topology] = DWaveCliqueSampler(solver=dict( topology__type=topology.lower())) return _SAMPLERS[topology] except (ValueError, ConfigFileError, SolverNotFoundError): raise unittest.SkipTest(f"no {topology}-structured QPU available")
def run_demo(): # Read the feature-engineered data into a pandas dataframe # Data obtained from http://biostat.mc.vanderbilt.edu/DataSets demo_path = os.path.dirname(os.path.abspath(__file__)) data_path = os.path.join(demo_path, 'data', 'formatted_titanic.csv') dataset = pd.read_csv(data_path) # Rank the MI between survival and every other variable scores = {} features = list(set(dataset.columns).difference(('survived',))) for feature in features: scores[feature] = mutual_information(prob(dataset[['survived', feature]].values), 0) labels, values = zip(*sorted(scores.items(), key=lambda pair: pair[1], reverse=True)) # Plot the MI between survival and every other variable plt.figure() ax1 = plt.subplot(1, 2, 1) ax1.set_title("Mutual Information") ax1.set_ylabel('MI Between Survival and Feature') plt.xticks(np.arange(len(labels)), labels, rotation=90) plt.bar(np.arange(len(labels)), values) # The Titanic dataset provides a familiar, intuitive example available in the public # domain. In itself, however, it is not a good fit for solving by sampling. Run naively on # this dataset, it finds numerous good solutions but is unlikely to find the exact optimal solution. # There are many techniques for reformulating problems for the D-Wave system that can # improve performance on various metrics, some of which can help narrow down good solutions # to closer approach an optimal solution. # This demo solves the problem for just the highest-scoring features. # Select 8 features with the top MI ranking found above. keep = 8 sorted_scores = sorted(scores.items(), key=lambda pair: pair[1], reverse=True) dataset = dataset[[column[0] for column in sorted_scores[0:keep]] + ["survived"]] features = list(set(dataset.columns).difference(('survived',))) # Build a QUBO that maximizes MI between survival and a subset of features bqm = dimod.BinaryQuadraticModel.empty(dimod.BINARY) # Add biases as (negative) MI with survival for each feature for feature in features: mi = mutual_information(prob(dataset[['survived', feature]].values), 1) bqm.add_variable(feature, -mi) # Add interactions as (negative) MI with survival for each set of 2 features for f0, f1 in itertools.combinations(features, 2): cmi_01 = conditional_mutual_information(prob(dataset[['survived', f0, f1]].values), 1, 2) cmi_10 = conditional_mutual_information(prob(dataset[['survived', f1, f0]].values), 1, 2) bqm.add_interaction(f0, f1, -cmi_01) bqm.add_interaction(f1, f0, -cmi_10) # Set up a QPU sampler with a fully-connected graph of all the variables sampler = DWaveCliqueSampler() # For each number of features, k, penalize selection of fewer or more features selected_features = np.zeros((len(features), len(features))) # Specify the penalty based on the maximum change in the objective # that could occur by flipping a single variable. This ensures # that the ground state will satisfy the constraints. penalty = maximum_energy_delta(bqm) for k in range(1, len(features) + 1): kbqm = bqm.copy() kbqm.update(dimod.generators.combinations(features, k, strength=penalty)) # Determines the penalty sample = sampler.sample(kbqm, num_reads=10000).first.sample for fi, f in enumerate(features): selected_features[k-1, fi] = sample[f] # Plot the best feature set per number of selected features ax2 = plt.subplot(1, 2, 2) ax2.set_title("Best Feature Selection") ax2.set_ylabel('Number of Selected Features') ax2.set_xticks(np.arange(len(features))) ax2.set_xticklabels(features, rotation=90) ax2.set_yticks(np.arange(len(features))) ax2.set_yticklabels(np.arange(1, len(features)+1)) # Set a grid on minor ticks ax2.set_xticks(np.arange(-0.5, len(features)), minor=True) ax2.set_yticks(np.arange(-0.5, len(features)), minor=True) ax2.grid(which='minor', color='black') ax2.imshow(selected_features, cmap=colors.ListedColormap(['white', 'red'])) plots_path = os.path.join(demo_path, "plots.png") plt.savefig(plots_path, bbox_inches="tight") print("Your plots are saved to {}".format(plots_path))
class MockPegasusDWaveSampler(MockDWaveSampler): def __init__(self, **config): super().__init__() self.properties.update(topology=dict(shape=[6], type='pegasus')) G = dnx.pegasus_graph(6) self.nodelist = list(G.nodes) self.edgelist = list(G.edges) with unittest.mock.patch('dwave.system.samplers.clique.DWaveSampler', MockChimeraDWaveSampler): chimera_sampler = DWaveCliqueSampler() with unittest.mock.patch('dwave.system.samplers.clique.DWaveSampler', MockPegasusDWaveSampler): pegasus_sampler = DWaveCliqueSampler() @dimod.testing.load_sampler_bqm_tests(chimera_sampler) @dimod.testing.load_sampler_bqm_tests(pegasus_sampler) class TestDWaveCliqueSampler(unittest.TestCase): def test_api(self): dimod.testing.assert_sampler_api(chimera_sampler) dimod.testing.assert_sampler_api(pegasus_sampler) def test_clique(self): self.assertEqual(len(chimera_sampler.clique(2)), 2)
for e in G.edges(): #edge_weights[e] = 2*rng.binomial(1, .5)-1 edge_weights[e] = rng.standard_normal() H = {} for n in G.nodes(): col = [] for m in G.nodes(): if (n, m) in G.edges(): col.append(edge_weights[(min(n, m), max(n, m))]) else: col.append(0) H[n] = col pd.DataFrame.from_dict(H).to_csv("Clique_Gauss_Hamiltonian_{k}".format(k = i)) t_1 = time.time() #sampler for sparse graphs #sampleset = EmbeddingComposite(DWaveSampler()).sample_ising({}, edge_weights, num_reads = 1000) #sampler for clique graphs sampleset = DWaveCliqueSampler().sample_ising({}, edge_weights, num_reads = 1000) t_2 = time.time() sampleset.to_pandas_dataframe().to_csv("Clique_Gauss_Sampler_Data_{k}".format(k=i)) time_data = pd.read_csv("Time_Data") time_data["Clique_Gauss_Hamiltonian_{k}".format(k = i)] = [t_2 - t_1] time_data.to_csv("Time_Data")