def test_knn_memory(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") dim = 3 n = 20 np.random.seed(47) bags = Features([np.random.randn(np.random.randint(30, 100), dim) for _ in xrange(n)]) tdir = tempfile.mkdtemp() div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8') Ks = (3, 4) est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks, memory=tdir) res1 = est.fit_transform(bags) with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l: res2 = est.transform(bags) assert len(l.records) == 0 assert np.all(res1 == res2) with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l: res3 = est.fit_transform(bags) for r in l.records: assert not r.message.startswith("Getting divergences") assert np.all(res1 == res3)
def test_knn_memory(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") dim = 3 n = 20 np.random.seed(47) bags = Features( [np.random.randn(np.random.randint(30, 100), dim) for _ in xrange(n)]) tdir = tempfile.mkdtemp() div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8') Ks = (3, 4) est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks, memory=tdir) res1 = est.fit_transform(bags) with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l: res2 = est.transform(bags) assert len(l.records) == 0 assert np.all(res1 == res2) with LogCapture('skl_groups.divergences.knn', level=logging.INFO) as l: res3 = est.fit_transform(bags) for r in l.records: assert not r.message.startswith("Getting divergences") assert np.all(res1 == res3)
def test_knn_kl(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") # verified by hand # Dhat(P||Q) = \log m/(n-1) + d / n \sum_{i=1}^n \log \nu_k(i)/rho_k(i) x = np.reshape([0., 1, 3], (3, 1)) y = np.reshape([.2, 1.2, 3.2, 7.2], (4, 1)) n = x.shape[0] m = y.shape[0] x_to_y = np.log( m / (n - 1)) + 1 / n * (np.log(1.2 / 3) + np.log(.8 / 2) + np.log(1.8 / 3)) y_to_x = np.log(n / (m - 1)) + 1 / m * (np.log(.8 / 3) + np.log(1.2 / 2) + np.log(2.2 / 3) + np.log(6.2 / 6)) msg = "got {}, expected {}" est = KNNDivergenceEstimator(div_funcs=['kl'], Ks=[2], clamp=False) res = est.fit_transform([x, y]).squeeze() assert res[0, 0] == 0 assert res[1, 1] == 0 assert np.allclose(res[0, 1], x_to_y), msg.format(res[0, 1], x_to_y) assert np.allclose(res[1, 0], y_to_x), msg.format(res[1, 0], y_to_x)
def divergence_gen(gen, gt_db, batch=1000, metric='kl', normalize=False, n_bins=100, whitening=True, classes=None, **kwargs): """ Given a generator and the gt function (the one generator tries to approximate), we measure the discrepancy of the generated from the gt signals. """ # # generate some samples. batch = gt_db.shape[0] if classes is None: gen_samples = gen_images(gen, n=batch, batchsize=batch) else: # # conditional case. gen_csamples, n_ms = [], int(batch // len(classes) + 10) for cl in classes: x = gen_images_with_condition(gen, n=n_ms, c=cl, batchsize=n_ms) gen_csamples.append(x) gen_csamples = np.concatenate(gen_csamples, 0) gen_samples = gen_csamples[:gt_db.shape[0]] if len(gt_db.shape) != 2: gt_db = gt_db.reshape((batch, -1)) if len(gen_samples.shape) != 2: gen_samples = gen_samples.reshape((batch, -1)) if gen_samples.dtype == np.uint8: gen_samples = gen_samples.astype(np.float32) if normalize: # # Given that gen_images have a range [0, 255], normalize # # the images in the [-1, 1] range for the KNN. gen_samples1 = gen_samples / 127.5 - 1 else: gen_samples1 = gen_samples if metric == 'ndb': global ndb if ndb is None: ndb = NDB(training_data=gt_db, number_of_bins=n_bins, whitening=whitening) metric_val = ndb.evaluate(gen_samples) chainer.reporter.report({'ndb': metric_val['NDB']}) chainer.reporter.report({'JS': metric_val['JS']}) diver = metric_val['NDB'] else: # # define an estimator (e.g. KL divergence). est = KNNDivergenceEstimator(div_funcs=[metric], Ks=[3], clamp=False) # # fit and return the result. res_diver = est.fit_transform([gt_db, gen_samples]) try: diver = res_diver[0, 1] except: diver = res_diver[0][0][0, 1] chainer.reporter.report({'kl': diver}) return diver
def kNNdiv_Kernel(X_white, kernel, Knn=3, div_func='renyi:.5', Nref=None, compwise=True, njobs=1, W_ica_inv=None): ''' `div_func` kNN divergence estimate between some data X_white and a distribution specified by Kernel. ''' if isinstance(Knn, int): Knns = [Knn] elif isinstance(Knn, list): Knns = Knn # if component wise there should be X_white.shape[1] # kernels for each componenets if compwise: if X_white.shape[1] != len(kernel): raise ValueError # construct reference "bag" if compwise: ref_dist = np.zeros((Nref, X_white.shape[1])) for icomp in range(X_white.shape[1]): samp = kernel[icomp].sample(Nref) if isinstance(samp, tuple): ref_dist[:, icomp] = samp[0].flatten() else: ref_dist[:, icomp] = samp.flatten() else: samp = kernel.sample(Nref) if isinstance(samp, tuple): ref_dist = samp[0] else: ref_dist = samp if W_ica_inv is not None: ref_dist = np.dot(ref_dist, W_ica_inv.T) # estimate divergence kNN = KNNDivergenceEstimator(div_funcs=[div_func], Ks=Knns, version='slow', clamp=False, n_jobs=njobs) feat = Features([X_white, ref_dist]) div_knn = kNN.fit_transform(feat) if len(Knns) == 1: return div_knn[0][0][0][1] div_knns = np.zeros(len(Knns)) for i in range(len(Knns)): div_knns[i] = div_knn[0][i][0][1] return div_knns
def test_knn_js(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") # verified by hand x = np.reshape([0., 1, 3, 6], (4, 1)) n = 4 y = np.reshape([.2, 1.2, 3.2, 6.2, 10.2], (5, 1)) m = 5 M = 2 right_js = ( np.log(n + m - 1) + psi(M) + 1/(2*n) * ( # x weight is 1/7, y weight is 4/35, quantile 1/4 np.log(.2) - psi(1) # 0 => .2(y), 1(x) + np.log(.8) - psi(2) # 1 => 1.2(y), .2(y) + np.log(1.8) - psi(2) # 3 => 3.2(y), 1.2(y) + np.log(2.8) - psi(2) # 6 => 6.2(y), 3.2(y) ) + 1/(2*m) * ( # x weight is 5/36, y weight is 1/9, quantile 1/4 np.log(.2) - psi(1) # .2 => 0(x) + np.log(1) - psi(2) # 1.2 => 1(x), .2(y) + np.log(2) - psi(2) # 3.2 => 3(x), 1.2(y) + np.log(3) - psi(2) # 6.2 => 6(x), 3.2(y) + np.log(4.2) - psi(2) # 10.2 => 6.2(y), 6(x) ) - 1/2 * np.log(n-1) - 1/(2*n) * ( np.log(3) + np.log(2) + np.log(3) + np.log(5)) - 1/2 * np.log(m-1) - 1/(2*m) * ( np.log(3) + np.log(2) + np.log(3) + np.log(4) + np.log(7)) ) msg = "got {}, expected {}" est = KNNDivergenceEstimator(div_funcs=['js'], Ks=[2], clamp=False) res = est.fit([x]).transform([y]) assert res.shape == (1, 1, 1, 1) res = res[0, 0, 0, 0] assert np.allclose(res, right_js, atol=1e-6), msg.format(res, right_js)
def distribution_divergence(X_s, X_l, k=10): """ This function computes l2 and js divergences from samples of two distributions. The implementation use `skl-groups`, which implements non-parametric estimation of divergences. Args: + X_s: a numpy array containing point cloud in state space + X_e: a numpy array containing point cloud in latent space """ # We discard cases with too large dimensions if X_s.shape[1] > 50: return {'l2_divergence': -1., 'js_divergence': -1.} # We instantiate the divergence object div = KNNDivergenceEstimator(div_funcs=['l2', 'js'], Ks=[k], n_jobs=4, clamp=True) # We turn both data to float32 X_s = X_s.astype(np.float32) X_l = X_l.astype(np.float32) # We generate Features f_s = Features(X_s, n_pts=[X_s.shape[0]]) f_l = Features(X_l, n_pts=[X_l.shape[0]]) # We create the knn graph div.fit(X=f_s) # We compute the divergences l2, js = div.transform(X=f_l).squeeze() # We construct the returned dictionnary output = {'l2_divergence': l2, 'js_divergence': js} return output
def kNNdiv_gauss(X_white, cov_X, Knn=3, div_func='renyi:.5', gauss=None, Nref=None, njobs=1): ''' `div_func` kNN divergence estimate between X_white and a reference Gaussian with covariance matrix cov_X. ''' if gauss is None: if Nref is None: raise ValueError gauss = np.random.multivariate_normal( np.zeros(X_white.shape[1]), cov_X, size=Nref) # Gaussian reference distribution if gauss.shape[1] != X_white.shape[1]: raise ValueError( 'dimension between X_white and Gaussian reference distribution do not match' ) if isinstance(Knn, int): Knns = [Knn] elif isinstance(Knn, list): Knns = Knn kNN = KNNDivergenceEstimator(div_funcs=[div_func], Ks=Knns, version='slow', clamp=False, n_jobs=njobs) feat = Features([X_white, gauss]) div_knn = kNN.fit_transform(feat) if len(Knns) == 1: return div_knn[0][0][0][1] div_knns = np.zeros(len(Knns)) for i in range(len(Knns)): div_knns[i] = div_knn[0][i][0][1] return div_knns
def test_knn_js(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") # verified by hand x = np.reshape([0., 1, 3, 6], (4, 1)) n = 4 y = np.reshape([.2, 1.2, 3.2, 6.2, 10.2], (5, 1)) m = 5 M = 2 right_js = ( np.log(n + m - 1) + psi(M) + 1 / (2 * n) * ( # x weight is 1/7, y weight is 4/35, quantile 1/4 np.log(.2) - psi(1) # 0 => .2(y), 1(x) + np.log(.8) - psi(2) # 1 => 1.2(y), .2(y) + np.log(1.8) - psi(2) # 3 => 3.2(y), 1.2(y) + np.log(2.8) - psi(2) # 6 => 6.2(y), 3.2(y) ) + 1 / (2 * m) * ( # x weight is 5/36, y weight is 1/9, quantile 1/4 np.log(.2) - psi(1) # .2 => 0(x) + np.log(1) - psi(2) # 1.2 => 1(x), .2(y) + np.log(2) - psi(2) # 3.2 => 3(x), 1.2(y) + np.log(3) - psi(2) # 6.2 => 6(x), 3.2(y) + np.log(4.2) - psi(2) # 10.2 => 6.2(y), 6(x) ) - 1 / 2 * np.log(n - 1) - 1 / (2 * n) * (np.log(3) + np.log(2) + np.log(3) + np.log(5)) - 1 / 2 * np.log(m - 1) - 1 / (2 * m) * (np.log(3) + np.log(2) + np.log(3) + np.log(4) + np.log(7))) msg = "got {}, expected {}" est = KNNDivergenceEstimator(div_funcs=['js'], Ks=[2], clamp=False) res = est.fit([x]).transform([y]) assert res.shape == (1, 1, 1, 1) res = res[0, 0, 0, 0] assert np.allclose(res, right_js, atol=1e-6), msg.format(res, right_js)
def test_knn_kl(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") # verified by hand # Dhat(P||Q) = \log m/(n-1) + d / n \sum_{i=1}^n \log \nu_k(i)/rho_k(i) x = np.reshape([0., 1, 3], (3, 1)) y = np.reshape([.2, 1.2, 3.2, 7.2], (4, 1)) n = x.shape[0] m = y.shape[0] x_to_y = np.log(m / (n-1)) + 1/n * ( np.log(1.2 / 3) + np.log(.8 / 2) + np.log(1.8 / 3)) y_to_x = np.log(n / (m-1)) + 1/m * ( np.log(.8 / 3) + np.log(1.2 / 2) + np.log(2.2 / 3) + np.log(6.2 / 6)) msg = "got {}, expected {}" est = KNNDivergenceEstimator(div_funcs=['kl'], Ks=[2], clamp=False) res = est.fit_transform([x, y]).squeeze() assert res[0, 0] == 0 assert res[1, 1] == 0 assert np.allclose(res[0, 1], x_to_y), msg.format(res[0, 1], x_to_y) assert np.allclose(res[1, 0], y_to_x), msg.format(res[1, 0], y_to_x)
def test_knn_sanity_slow(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") dim = 3 n = 20 np.random.seed(47) bags = Features( [np.random.randn(np.random.randint(30, 100), dim) for _ in xrange(n)]) # just make sure it runs div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8') Ks = (3, 4) est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks) res = est.fit_transform(bags) assert res.shape == (len(div_funcs), len(Ks), n, n) assert np.all(np.isfinite(res)) # test that JS blows up when there's a huge difference in bag sizes # (so that K is too low) assert_raises( ValueError, partial(est.fit_transform, bags + [np.random.randn(1000, dim)])) # test fit() and then transform() with JS, with different-sized test bags est = KNNDivergenceEstimator(div_funcs=('js', ), Ks=(5, )) est.fit(bags, get_rhos=True) with LogCapture('skl_groups.divergences.knn', level=logging.WARNING) as l: res = est.transform([np.random.randn(300, dim)]) assert res.shape == (1, 1, 1, len(bags)) assert len(l.records) == 1 assert l.records[0].message.startswith('Y_rhos had a lower max_K') # test that passing div func more than once raises def blah(df): est = KNNDivergenceEstimator(div_funcs=[df, df]) return est.fit(bags) assert_raises(ValueError, lambda: blah('kl')) assert_raises(ValueError, lambda: blah('renyi:.8')) assert_raises(ValueError, lambda: blah('l2'))
def kNNdiv_general( X, Y, Knn=3, div_func='kl', alpha=None, njobs=1, ): #renyi:.5 """ kNN divergence estimate for samples drawn from any two arbitrary distributions. """ if Y.shape[1] != X.shape[1]: raise ValueError( 'dimension between X_white and Gaussian reference distribution do not match' ) if isinstance(Knn, int): Knns = [Knn] elif isinstance(Knn, list): Knns = Knn if alpha is not None: div_func = div_func + ':%s' % alpha kNN = KNNDivergenceEstimator(div_funcs=[div_func], Ks=Knns, version='slow', clamp=False, n_jobs=njobs) feat = Features([X, Y]) div_knn = kNN.fit_transform(feat) if len(Knns) == 1: return div_knn[0][0][0][1] div_knns = np.zeros(len(Knns)) for i in range(len(Knns)): div_knns[i] = div_knn[0][i][0][1] return div_knns
def computePairwiseSimilarities2(patients, y): """ Compute the pairwise similarity between bags using Dougal code Inputs: - patients: the collection of patient features - y: labels (number of abnormal nodes) for each patient. Used to fit the KNNDivergenceEstimator Returns: - sims: the pairwise similarities between each patient * Note: sims is a NxN symmetric matrix, where N is the number of patients """ # pass the features and labels to scikit-learn Features feats = Features(patients, labels=y) # directly from Dougal # note: learning methods won't use the labels, this is for conveinence # estimate the distances between the bags (patients) using KNNDivergenceEstimator # details: use the kl divergence, find 3 nearest neighbors # not sure what the pairwise picker line does? # rbf and projectPSD help ensure the data is separable? distEstModel = Pipeline( [ # div_funcs=['kl'], rewrite this to actually use PairwisePicker correctly next time ('divs', KNNDivergenceEstimator(div_funcs=['kl'], Ks=[3], n_jobs=-1, version='fast')), ('pick', PairwisePicker((0, 0))), ('symmetrize', Symmetrize()), ('rbf', RBFize(gamma=1, scale_by_median=True)), ('project', ProjectPSD()) ]) # return the pairwise similarities between the bags (patients) sims = distEstModel.fit_transform(feats) return sims
def test_knn_sanity_slow(): if not have_flann: raise SkipTest("No flann, so skipping knn tests.") dim = 3 n = 20 np.random.seed(47) bags = Features([np.random.randn(np.random.randint(30, 100), dim) for _ in xrange(n)]) # just make sure it runs div_funcs = ('kl', 'js', 'renyi:.9', 'l2', 'tsallis:.8') Ks = (3, 4) est = KNNDivergenceEstimator(div_funcs=div_funcs, Ks=Ks) res = est.fit_transform(bags) assert res.shape == (len(div_funcs), len(Ks), n, n) assert np.all(np.isfinite(res)) # test that JS blows up when there's a huge difference in bag sizes # (so that K is too low) assert_raises( ValueError, partial(est.fit_transform, bags + [np.random.randn(1000, dim)])) # test fit() and then transform() with JS, with different-sized test bags est = KNNDivergenceEstimator(div_funcs=('js',), Ks=(5,)) est.fit(bags, get_rhos=True) with LogCapture('skl_groups.divergences.knn', level=logging.WARNING) as l: res = est.transform([np.random.randn(300, dim)]) assert res.shape == (1, 1, 1, len(bags)) assert len(l.records) == 1 assert l.records[0].message.startswith('Y_rhos had a lower max_K') # test that passing div func more than once raises def blah(df): est = KNNDivergenceEstimator(div_funcs=[df, df]) return est.fit(bags) assert_raises(ValueError, lambda: blah('kl')) assert_raises(ValueError, lambda: blah('renyi:.8')) assert_raises(ValueError, lambda: blah('l2'))
def blah(df): est = KNNDivergenceEstimator(div_funcs=[df, df]) return est.fit(bags)
def computeSubjSubjKernel(subjects, div='KL', numNeighbors=3): """ Start by computing the pairwise similarities between subject using Dougal's code. Then, for HE and KL, symmetrize, RBFize, and project the similarities onto a positive semi-definite space. Inputs: - subjects: the collection of patient features - div: which divergence to use. Options are - 'KL': Kullback-Leibler divergence, 'kl' in the function (default) - 'HE': Hellinger divergence, 'hellinger' in the function - 'MMD': Maximum Mean Discrepancy, calls another function - numNeighbors: how many neighbors to look at. Default is 3 Returns: - kernel: the kernel calculated using the pairwise similarities between each subject * Note: kernel is a NxN symmetric matrix, where N is the number of subjects """ # pass the features and labels to scikit-learn Features feats = Features(subjects) # directly from Dougal # specify the divergence to use if div == 'KL': # estimate the distances between the bags (patients) using KNNDivergenceEstimator # details: use the kl divergence, find 3 nearest neighbors # not sure what the pairwise picker line does? # rbf and projectPSD help ensure the data is separable? distEstModel = Pipeline( [ # div_funcs=['kl'], rewrite this to actually use PairwisePicker correctly next time ('divs', KNNDivergenceEstimator(div_funcs=['kl'], Ks=[numNeighbors], n_jobs=-1, version='fast')), ('pick', PairwisePicker((0, 0))), ('symmetrize', Symmetrize()) # ('rbf', RBFize(gamma=1, scale_by_median=True)), # ('project', ProjectPSD()) ]) # return the pairwise similarities between the bags (patients) sims = distEstModel.fit_transform(feats) # Great, we have the similarities and they're symmetric # Now RBFize them, but do the scale by median by hand rbf = RBFize(gamma=1, scale_by_median=False) simsMedian = np.median(sims[np.triu_indices_from(sims)]) medianScaledSims = sims / simsMedian rbfedSims = rbf.fit_transform(medianScaledSims) # Final step in building the kernel: project the rbf'ed similarities # onto a positive semi-definite space psd = ProjectPSD() kernel = psd.fit_transform(rbfedSims) elif div == 'HE': # estimate the distances between the bags (patients) using KNNDivergenceEstimator # details: use the hellinger divergence, find 3 nearest neighbors # not sure what the pairwise picker line does? # rbf and projectPSD help ensure the data is separable? distEstModel = Pipeline( [ # div_funcs=['kl'], rewrite this to actually use PairwisePicker correctly next time ('divs', KNNDivergenceEstimator(div_funcs=['hellinger'], Ks=[numNeighbors], n_jobs=-1, version='fast')), ('pick', PairwisePicker((0, 0))), ('symmetrize', Symmetrize()) # ('rbf', RBFize(gamma=1, scale_by_median=True)), # ('project', ProjectPSD()) ]) # return the pairwise similarities between the bags (patients) sims = distEstModel.fit_transform(feats) # Great, we have the similarities and they're symmetric # Now RBFize them, but do the scale by median by hand rbf = RBFize(gamma=1, scale_by_median=False) simsMedian = np.median(sims[np.triu_indices_from(sims)]) # medianScaledSims = sims/simsMedian # rbfedSims = rbf.fit_transform(medianScaledSims) rbfedSims = rbf.fit_transform(sims) # Final step in building the kernel: project the rbf'ed similarities # onto a positive semi-definite space psd = ProjectPSD() kernel = psd.fit_transform(rbfedSims) elif div == 'MMD': # start by getting the median pairwise squared distance between subject, # used as a heuristic for choosing the bandwidth of the inner RBF kernel subset = np.vstack(feats) subset = subset[np.random.choice(subset.shape[0], min(2000, subset.shape[0]), replace=False)] subsetSquaredDists = euclidean_distances(subset, squared=True) featsMedianSquaredDist = np.median( subsetSquaredDists[np.triu_indices_from(subsetSquaredDists, k=numNeighbors)], overwrite_input=True) # now we need to determine gamma (scaling factor, inverse of sigma) # This was initially done in the library, but Kayhan believes there's # a multiplication instead of a division, so it's being done by hand firstGamma = 1 / featsMedianSquaredDist # calculate the mmds mmds, mmkDiagonals = mmd.rbf_mmd(feats, gammas=firstGamma, squared=True, ret_X_diag=True) # now let's turn the squared MMD distances into a kernel # symmetrize it sym = Symmetrize() mmds = sym.fit_transform(mmds) # get the median squared MMD distance mmdMedianSquaredDist = np.median(mmds[np.triu_indices_from( mmds, k=numNeighbors)]) kernel = np.exp(np.multiply(mmds, -1 / mmdMedianSquaredDist)) else: print("Error: divergence entered is not valid.") return -1 return kernel
def train_KNNDivergence(divergence, X_tr, y_tr, X_ts, y_ts, k=5, C=1, name=''): from skl_groups.divergences import KNNDivergenceEstimator from skl_groups.kernels import PairwisePicker, Symmetrize, RBFize, ProjectPSD """ Parameters ---------- divergence: string, Type of divergence to use when estimating distance among distribution. Options 'kl','renyi:.8','tsallis:.8','hellinger','bc','l2','linear', 'jensen-shannon'. X_tr: array-like Training data y_tr: array-like Training output X_ts: array-like Test data y_ts: array-like Test output k: int, optional default=5 Number of k-nearest niehgbours to use for the estimation of the distances. C: float, optional default=1 Regularization parameter for SVM. """ warnings.simplefilter('ignore') pipeline = [ ('divs', KNNDivergenceEstimator(div_funcs=[divergence], Ks=[k])), ('pick', PairwisePicker((0, 0))), ('symmetrize', Symmetrize()), ('rbf', RBFize(gamma=1, scale_by_median=True)), ('project', ProjectPSD()), ] classification = isinstance(y_tr[0][0], str) or isinstance( y_tr[0][0], bool) or isinstance(y_tr[0][0], np.bool_) if classification: pipeline.append(('svm', SVC(C=C, kernel='precomputed')), ) else: pipeline.append(('svm', SVR(C=C, kernel='precomputed')), ) model = Pipeline(pipeline) X_tr = [x for x in X_tr] y_tr = [y for y in y_tr] X_ts = [x for x in X_ts] y_ts = [y for y in y_ts] # X_tr = list(X_tr) # y_tr - list(y_tr) # X_ts = list(X_ts) # y_ts = list(y_ts) model.fit(X_tr, y_tr) preds = model.predict(X_ts) pd.DataFrame.from_dict({ 'preds': preds, 'labels': np.array(y_ts).flatten() }).to_csv(name + '.csv') if classification: train_score = accuracy_score( np.array(y_tr).flatten(), model.predict(X_tr)) test_score = accuracy_score(np.array(y_ts).flatten(), preds) else: train_score = mean_squared_error(y_tr, model.predict(X_tr)) test_score = mean_squared_error(y_ts, model.predict(X_ts)) # wandb.log({'train_mse': train_score, 'test_mse': test_score}) #print(train_score, test_score) return train_score, test_score