def test_private_gradvalues_noise(self): """Test if the noise std is around expected.""" runs = 100 alpha = 0.0001 for use_norm_accumulation in [True, False]: for microbatch in [1, 10, self.ntrain]: for noise_multiplier in [0.1, 10.0]: for l2_norm_clip in [0.01, 0.1]: gv_priv = objax.Jit( objax.privacy.dpsgd.PrivateGradValues( self.loss, self.model_vars, noise_multiplier, l2_norm_clip, microbatch, batch_axis=(0, 0), use_norm_accumulation=use_norm_accumulation)) # Repeat the run and collect all gradients. g_privs = [] for i in range(runs): g_priv, v_priv = gv_priv(self.data, self.labels) g_privs.append( np.concatenate( [g_n.reshape(-1) for g_n in g_priv])) np.testing.assert_allclose(v_priv[0], self.loss( self.data, self.labels)[0], atol=1e-7) g_privs = np.array(g_privs) # Compute empirical std and expected std. std_empirical = np.std(g_privs, axis=0, ddof=1) std_theoretical = l2_norm_clip * noise_multiplier / ( self.ntrain // microbatch) # Conduct chi-square test for correct expected standard # deviation. chi2_value = ( runs - 1) * std_empirical**2 / std_theoretical**2 chi2_cdf = chi2.cdf(chi2_value, runs - 1) self.assertTrue( np.all(alpha <= chi2_cdf) and np.all(chi2_cdf <= 1.0 - alpha)) # Conduct chi-square test for incorrect expected standard # deviations: expect failure. chi2_value = (runs - 1) * std_empirical**2 / ( 1.25 * std_theoretical)**2 chi2_cdf = chi2.cdf(chi2_value, runs - 1) self.assertFalse( np.all(alpha <= chi2_cdf) and np.all(chi2_cdf <= 1.0 - alpha)) chi2_value = (runs - 1) * std_empirical**2 / ( 0.75 * std_theoretical)**2 chi2_cdf = chi2.cdf(chi2_value, runs - 1) self.assertFalse( np.all(alpha <= chi2_cdf) and np.all(chi2_cdf <= 1.0 - alpha))
def chiSqQuant1(x, y, num_states_x, num_states_y): if num_states_x == 1 or num_states_y == 1: return (1, 0) _, x = np.unique(x, return_inverse=True) _, y = np.unique(y, return_inverse=True) x = x - min(x) y = y - min(y) n_mat = hist3(x, y, range(num_states_x), range(num_states_y)) p = np.sum(n_mat, axis=1) # ? w = np.sum(n_mat, axis=0) # ? nullerp = len(p) - np.count_nonzero(p) # ? nullerw = len(w) - np.count_nonzero(w) lengthX = len(x) T = 0 for i in range(num_states_x): for j in range(num_states_y): if p[i] and w[j] != 0: n_star = (p[i] * w[j] + 0.0) / (lengthX + 0.0) T += (n_mat[i][j] - n_star + 0.0)**2 / n_star * 1.0 degrees = (num_states_x - 1 - nullerp) * (num_states_y - 1 - nullerw) if degrees == 0: degrees = 1 result = 1 - chi2.cdf(T, degrees) return (result, T)
def BartlettTest(x, y): # Analyze data k = 2 Ni = np.array([len(x), len(y)]) s_i = np.array([np.std(x, ddof=1), np.std(y, ddof=1)]) N = Ni.sum() S_pool = 0 for i in range(k): S_pool += (Ni[i] - 1) * s_i[i]**2 S_pool = S_pool / (N - k) # Perform test statistics Numerator1 = (N - k) * np.log(S_pool) Numerator2 = 0 for i in range(k): Numerator2 += (Ni[i] - 1) * np.log(s_i[i]**2) Denominator = 0 for i in range(k): Denominator += 1 / (Ni[i] - 1) Denominator = (Denominator - 1 / (N - k)) / (3 * (k - 1)) + 1 X2 = (Numerator1 - Numerator2) / Denominator # Compute p value pValue = 1 - chi2.cdf(X2, k - 1) return X2, pValue
def chiSqQuant1(x, y, num_states_x, num_states_y): if num_states_x == 1 or num_states_y == 1: return (1, 0) _, x = np.unique(x, return_inverse=True) _, y = np.unique(y, return_inverse=True) x = x - min(x) y = y - min(y) n_mat = hist3(x, y, range(num_states_x), range(num_states_y)) p = np.sum(n_mat, axis=1) # ? w = np.sum(n_mat, axis=0) # ? nullerp = len(p) - np.count_nonzero(p) # ? nullerw = len(w) - np.count_nonzero(w) lengthX = len(x) T = 0 for i in range(num_states_x): for j in range(num_states_y): if p[i] and w[j] != 0: n_star = (p[i] * w[j]+ 0.0) / (lengthX+0.0) T += (n_mat[i][j] - n_star + 0.0) ** 2 / n_star*1.0 degrees = (num_states_x - 1 - nullerp) * (num_states_y - 1 - nullerw) if degrees == 0: degrees = 1 result = 1 - chi2.cdf(T, degrees) return (result, T)
def gating_ok(self, MHD, sensor): ############ # TODO Step 3: return True if measurement lies inside gate, otherwise False ############ # limit = chi2.cdf(0.995, df=2) # if MHD < limit: # return True # else: # return False df = None gate_val = None if sensor.name == 'lidar': #While fine tuning the algorihm, we find that it's better to have a larger gate threshold for lidar #which means current lidar noise is a bit underestimated df = 2 gate_val = params.gating_threshold_lidar if sensor.name == 'camera': gate_val = params.gating_threshold df = 1 x = MHD * MHD limit = chi2.cdf(x, df) if sensor.name == 'lidar': print("lidar chisqr = {}".format(limit)) if limit < gate_val: return True return False
def Qstatistic(series, m, ljung_box=False): """Returns 'Q_m' and the corresponding pvalue. Q here refers to the Box and Pierce (1970) statistic. (Inspired by CLM p.49) @param series: The series on which to compute Q. @param m: The order up to which sum squared autocorrelations. @param ljung_box: If True, Q will be corrected according to Ljung and Box (1978). @return: Q_m, pvalue """ T = len(series) # Handling Ljung and Box correction correction = lambda k: 1.0 if ljung_box: correction = lambda k: 1.0/(T-k) Q_m = 0.0 for k in range(1, m+1): # Last index is not returned by range... Q_m += correction(k) * autocorrelation(series, k, biased=True)[0]**2 Q_m *= T if ljung_box: Q_m *= (T+2) # Q_m is asymptotically distributed Chi^2(m). return Q_m, 1-chi2.cdf(abs(Q_m), m)
def Qstatistic(series, m, ljung_box=False): """Returns 'Q_m' and the corresponding pvalue. Q here refers to the Box and Pierce (1970) statistic. (Inspired by CLM p.49) @param series: The series on which to compute Q. @param m: The order up to which sum squared autocorrelations. @param ljung_box: If True, Q will be corrected according to Ljung and Box (1978). @return: Q_m, pvalue """ T = len(series) # Handling Ljung and Box correction correction = lambda k: 1.0 if ljung_box: correction = lambda k: 1.0 / (T - k) Q_m = 0.0 for k in range(1, m + 1): # Last index is not returned by range... Q_m += correction(k) * autocorrelation(series, k, biased=True)[0]**2 Q_m *= T if ljung_box: Q_m *= (T + 2) # Q_m is asymptotically distributed Chi^2(m). return Q_m, 1 - chi2.cdf(abs(Q_m), m)
def LikelihoodRatioTest(Model1, Model2, Delta_DOF): L1 = Model1.llf L2 = Model2.llf LRT = 2 * (L2 - L1) p = 1 - chi2.cdf(LRT, Delta_DOF) return p
def g_huber2(t, r, qH=0.8, cH=None, bH=None, aH=None): """ Computes g(t) of the Huber distribution Possible input combinations: t, r t, r, qH t, r, cH, bH, aH . This option is provided to improve performance because it allows to avoid the calculation of the constants cH, bH and aH in every loop iteration Args: t : 1darray of size N, squared Mahalanobis distances r : int, dimension qH : float, tuning parameter, standard value 0.8, choose qH > 0.701 cH : float, tuning parameter bH : float, tuning parameter aH : float, tuning parameter Returns: g: 1darray of size N, g(t) of Huber distribution Raises: ValueError: If incorrect combination of inputs """ if sum([s is None for s in [cH, bH, aH]]) != 3 and sum( [s is None for s in [cH, bH, aH]]) != 0: raise ValueError("Incorrect combination of inputs") igamma = lambda a, b: gammaincc(a, b) * gamma(a) if sum([s is None for s in [cH, bH, aH]]) == 3: cH = np.sqrt(chi2.ppf(qH, r)) bH = chi2.cdf(cH**2, r + 2) + cH**2 / r * (1 - chi2.cdf(cH**2, r)) aH = gamma(r / 2) / np.pi**(r / 2) / ( (2 * bH)**(r / 2) * (gamma(r / 2) - igamma(r / 2, cH**2 / (2 * bH))) + (2 * bH * cH**r * np.exp(-cH**2 / (2 * bH))) / (cH**2 - bH * r)) g = np.zeros(len(t)) g[t <= cH**2] = aH * np.exp(-t[t <= cH**2] / (2 * bH)) g[t > cH**2] = aH * (np.exp(1) * t[t > cH**2] / cH**2)**(-cH**2 / (2 * bH)) return g
def main(): bound = ''.ljust(40, '*') start = ' [ STARTED ] '.center(40, '*') end = ' [ FINISHED ] '.center(40, '*') print(bound + '\n' + start + '\n' + bound) fileName = 'lesson7_mahal_diamonds.csv' if fileName == 'lesson7_mahal_dataset_0.csv': # Read data # Taken from https://jamesmccaffrey.wordpress.com/2017/11/09/example-of-calculating-the-mahalanobis-distance/ # We assumed that the three variables are independent data = pd.read_csv(fileName) data.head() test = pd.DataFrame([[66, 640, 44], [69, 595, 38]], columns=list(['height', 'score', 'age'])) elif fileName == 'lesson7_mahal_diamonds.csv': # Read data # Taken from https://www.machinelearningplus.com/statistics/mahalanobis-distance/ data = pd.read_csv(fileName).iloc[:, [0, 4, 6]] data.head() test = data[['carat', 'depth', 'price']].head(5) else: sys.exit('[ERROR]: File not found!') # Mahalanobis distance test['mahalanobis'] = mahalanobis(test, data, None) test.head() # Probability (1-p) with which we are certain that, when the # squared Mahalanobis distance is greater than the critical # value (cv) associated with this probability and the proper DOF, # then the test vector (tv) is an outlier p = 0.001 dof = 3 cv = chi2.ppf(1 - p, dof) outlier = [] critical_value = [] for i in range(len(test['mahalanobis'])): critical_value.append(cv) if test['mahalanobis'][i] > cv: outlier.append('true') else: outlier.append('false') test['critical-value'] = np.asarray(critical_value) test['p_value'] = 1 - chi2.cdf(test['mahalanobis'], dof) test['outlier'] = np.asarray(outlier) print(test) print(bound + '\n' + end + '\n' + bound) return 0
def eta_huber2(t, r, qH=0.8, cH=None, bH=None): """ Computes eta(t) of the Huber distribution Possible input combinations: t, r t, r, qH t, r, cH, bH . This option is provided to improve performance because it allows to avoid the calculation of the constants cH, bH and aH in every loop iteration Args: t : 1darray of size N, squared Mahalanobis distances r : int, dimension qH : float, tuning parameter, standard value 0.8, choose qH > 0.701 cH : float, tuning parameter bH : float, tuning parameter Returns: eta: 1darray of size N, eta(t) of Huber distribution Raises: ValueError: If incorrect combination of inputs """ if sum([s is None for s in [cH, bH]]) != 2 and sum([s is None for s in [cH, bH]]) != 0: raise ValueError("Incorrect combination of inputs") if sum([s is None for s in [cH, bH]]) == 2: cH = np.sqrt(chi2.ppf(qH, r)) bH = chi2.cdf(cH**2, r + 2) + cH**2 / r * (1 - chi2.cdf(cH**2, r)) eta = np.zeros(len(t)) eta[t > cH**2] = -cH**2 / (2 * bH * t[t > cH**2]**2) return eta
def chi2stats(data): xs, stdevs, x, k, l, df, name = data sample_chi2 = chi2_old(x, k, l, xs, stdevs) print(name) print(sample_chi2) print(chi_squared.isf(.05, df)) print(1 - chi_squared.cdf(sample_chi2, df)) plt.plot(xs, "o") plt.plot([model(x, k, l, i) for i in range(len(xs))]) plt.title(name) plt.show()
def chisquare_pvalue(obs, ex): """ Given a 2x2 contingency table both observed and expected, returns the corresponding chisquared p-value. @param obs An array (list of lists or numpy array) of observed values @param obs An array (list of lists or numpy array) of expected values """ _sum = 0 for i in range(0, 2): for j in range(0, 2): _sum = _sum + (obs[i][j] - ex[i][j])**2 / ex[i][j] chi = 1 - chi2.cdf(_sum, 1) return chi
def _fisher_hsic_test(self, X, R, max_p_stat): """Conduct statistical test by HSIC and Fisher's method.""" fisher_stat = 0 n_features = X.shape[1] if n_features == 1: fisher_stat, fisher_p = hsic_test_gamma(X, R) else: for i in range(n_features): _, hsic_p = hsic_test_gamma(X[:, [i]], R) fisher_stat += np.inf if hsic_p == 0 else -2 * np.log(hsic_p) if fisher_stat > max_p_stat: break fisher_p = 1 - chi2.cdf(fisher_stat, df=2 * n_features) return fisher_p, fisher_stat
def gating_ok(self, MHD, sensor): ############ # TODO Step 3: return True if measurement lies inside gate, otherwise False ############ df = None gate_val = None if sensor.name == 'lidar': df = 2 gate_val = params.gating_threshold_lidar if sensor.name == 'camera': gate_val = params.gating_threshold df = 1 x = MHD * MHD per = chi2.cdf(x, df) if sensor.name == 'lidar': print("lidar chisqr = {}".format(per)) if per < gate_val: return True return False
def jarque_bera(self,alpha=0.05): """Returns the Jarque-Bera test of normality based on kurtosis and skewness Requires > 2000 samples Returns test statistics and the p-value Original in scipy: mu = x.mean() diffx = x - mu skewness = (1 / n * np.sum(diffx**3)) / (1 / n * np.sum(diffx**2))**(3 / 2.) kurtosis = (1 / n * np.sum(diffx**4)) / (1 / n * np.sum(diffx**2))**2 jb_value = n / 6 * (skewness**2 + (kurtosis - 3)**2 / 4) p = 1 - distributions.chi2.cdf(jb_value, 2) Look at scipy.stats.jarque_bera""" self._finalize() JB = self.vcount/6*(self.vskewness**2 + 1/4*((self.vkurtosis-3)**2)) if chi2 is None: p = "scipy missing" else: p = 1 - chi2.cdf(JB,2) return JB,p
def survdiff(time, status, group, weight_type=None, strata=None, **kwargs): """ Test for the equality of two survival distributions. Parameters: ----------- time : array-like The event or censoring times. status : array-like The censoring status variable, status=1 indicates that the event occured, status=0 indicates that the observation was censored. group : array-like Indicators of the two groups weight_type : string The following weight types are implemented: None (default) : logrank test fh : Fleming-Harrington, weights by S^(fh_p), requires exponent fh_p to be provided as keyword argument; the weights are derived from S defined at the previous event time, and the first weight is always 1. gb : Gehan-Breslow, weights by the number at risk tw : Tarone-Ware, weights by the square root of the number at risk strata : array-like Optional stratum indicators for a stratified test Returns -------- chisq : The chi-square (1 degree of freedom) distributed test statistic value pvalue : The p-value for the chi^2 test """ # TODO: extend to handle more than two groups time = np.asarray(time) status = np.asarray(status) group = np.asarray(group) gr = np.unique(group) if len(gr) != 2: raise ValueError("logrank only supports two groups") if strata is None: obs, var = _survdiff(time, status, group, weight_type, gr, **kwargs) else: strata = np.asarray(strata) stu = np.unique(strata) obs, var = 0., 0. for st in stu: # could be more efficient? ii = (strata == st) obs1, var1 = _survdiff(time[ii], status[ii], group[ii], weight_type, gr, **kwargs) obs += obs1 var += var1 zstat = obs / np.sqrt(var) # The chi^2 test statistic and p-value. chisq = zstat**2 pvalue = 1 - chi2.cdf(chisq, 1) return chisq, pvalue
def _main(): # Pick an arbitrary number of labels n_classes = 10 # Number of samples to generate n_samples = 10000 X, y = make_multilabel_classification(n_samples=n_samples, n_features=50, n_classes=n_classes, n_labels=5, length=50, allow_unlabeled=False, sparse=False, return_indicator='dense', return_distributions=False, random_state=None) # X is (10000,50), y is (10000,10) with an average of 5 labels per sample X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Just dump the correlation matrix to confirm that there is some inter-label correlation # along with the t-statistics and chi2 values to show that it is not insignificant df = pd.DataFrame(y) print('n_samples={}, corr'.format(n_samples)) tvalue1 = 0.05 / np.sqrt((1 - 0.05**2) / (n_samples - 2)) tvalue2 = 0.1 / np.sqrt((1 - 0.1**2) / (n_samples - 2)) print('{}/sqrt( (1-{}**2)/(n_samples-2) ) == {}'.format( 0.05, 0.05, tvalue1)) print('{}/sqrt( (1-{}**2)/(n_samples-2) ) == {}'.format(0.1, 0.1, tvalue2)) print('X-squared value: {}, {}'.format(0.05, chi2.cdf(tvalue1, 2))) print('X-squared value: {}, {}'.format(0.1, chi2.cdf(tvalue2, 2))) print(df.corr()) input_dim = X_train.shape[1] # First, build some hand-tooled network, obviously guessing at the optimal meta-parameters print('hand-tooling single fully-connected network...') model = Sequential() model.add(Dense(1000, activation='relu', input_dim=input_dim)) model.add(Dropout(0.1)) model.add(Dense(600, activation='relu')) model.add(Dropout(0.1)) model.add(Dense(n_classes, activation='sigmoid')) sgd = SGD(lr=0.01, decay=1e-6, nesterov=True, momentum=0.9) model.compile(loss='binary_crossentropy', optimizer=sgd) model.fit(X_train, y_train, epochs=5, batch_size=int(n_samples * 0.2)) predictions = model.predict(X_test) print(classification_report(y_test, predictions.round())) # Now, specify some random, also non-optimal ranges to play with and create several # models within this space. In the end, we will combine the predictions from each # of them in two-stages. 1. Output n_ensembles predictions for each of the n class # labels. These predictions train a ridge on it's particular class label. 2. Combine # all of the ridge predictions using another ridge and produce the predictions. n_ensembles = 100 print( 'ridge regression of {} semi-random estimators...'.format(n_ensembles)) models = [] for _ in xrange(n_ensembles): estimator_type = random.choice(['neuralnet']) if estimator_type == 'neuralnet': model = Sequential() for i in xrange(np.random.randint(2, 8)): n_neurons = np.random.randint(400, 1200) n_dropout = random.choice([0.1, 0.2, 0.3, 0.4, 0.5]) model.add( Dense(n_neurons, activation='relu', input_dim=input_dim)) model.add(Dropout(n_dropout)) model.add(Dense(n_classes, activation='sigmoid')) sgd = SGD(lr=random.choice([0.01, 0.02]), decay=1e-6, nesterov=random.choice([True, False]), momentum=random.choice([0.8, 0.9])) model.compile(loss='binary_crossentropy', optimizer=sgd) model.fit(X_train, y_train, epochs=np.random.randint(5, 8), batch_size=int(n_samples * random.choice([0.1, 0.2, 0.3]))) elif estimator_type == 'forest': model = RandomForestClassifier( n_estimators=random.choice(range(300, 600, 50))) model.fit(X_train, y_train) models.append(model) def ridgeit(X, models): ridge_predictions = np.array([]) for model in models: X_ridge = model.predict(X) if len(ridge_predictions): ridge_predictions = np.hstack((ridge_predictions, X_ridge)) else: ridge_predictions = X_ridge return ridge_predictions print('building ridges...') ridge = { i: {model: LogisticRegression(penalty='l2') for model in models} for i in xrange(n_classes) } ensemble = {i: LogisticRegression(penalty='l2') for i in xrange(n_classes)} for label in xrange(n_classes): y_ridge = y_train[:, label] for model in models: X_ridge = model.predict(X_train) ridge[label][model].fit(X_ridge, y_ridge) ridge_predictions = ridgeit(X_train, ridge[label]) ensemble[label].fit(ridge_predictions, y_ridge) print('making predictions...') y_predictions = np.zeros(y_test.shape) for label in xrange(n_classes): ridge_predictions = ridgeit(X_test, ridge[label]) predictions = ensemble[label].predict(ridge_predictions) y_predictions[:, label] = predictions print(classification_report(y_test, y_predictions)) return 0
def GW_CPA_test(loss1: np.ndarray, loss2: np.ndarray, tau: int, alpha: float = 0.05, conditional: bool = False, verbose: bool = True): """ Giacomini-White Conditional Predictive Ability Test Parameters ---------- loss1: numpy array losses of model 1 loss2: numpy array losses of model 2 tau: int the past information treated as 'available' for the test. unconditional: boolean, True if unconditional (DM test), False if conditional (GW test). verbose: boolean, True if prints of test are needed Returns ------- test_stat: test statistic of the conditional predictive ability test crit_val: critical value of the chi-square test for a 5% confidence level p-vals: (k,) p-value of the test """ assert len(loss1) == len(loss2) lossdiff = loss1 - loss2 t = len(loss1) instruments = np.ones_like(loss1) if conditional: instruments = np.hstack((instruments[:t - tau], lossdiff[:-tau])) lossdiff = lossdiff[tau:] t = t - tau reg = instruments * lossdiff if tau == 1: res_beta = np.linalg.lstsq(reg, np.ones((t)), rcond=None)[0] err = np.ones((t, 1)) - reg.dot(res_beta) r2 = 1 - np.mean(err**2) test_stat = t * r2 else: zbar = np.mean(reg, axis=0) n_lags = tau - 1 omega = Newey_West(Z=reg, n_lags=n_lags) test_stat = np.expand_dims(t*zbar, axis=0).dot(np.linalg.inv(omega)).\ dot(zbar) test_stat *= np.sign(np.mean(lossdiff)) q = reg.shape[1] crit_val = chi2.ppf(1 - alpha, df=q) p_val = 1 - chi2.cdf(test_stat, q) av_diff_loss = np.mean(loss1 - loss2) s = '+' if np.mean(loss1 - loss2) > 0 else '-' if verbose: if conditional: print('\nConditional test:\n') if not conditional: print('\nUnconditional test:\n') print(f'Forecast horizon: {tau}, Nominal Risk Level: {alpha}') print(f'Test-statistic: {test_stat} ({s})') print(f'Critical value: {crit_val}') print(f'p-value: {p_val}\n') return test_stat, crit_val, p_val
lambda freq, M, T: PlanckMod(freq, D, M, T, beta=k), freq[nonan], flux[i][::2][nonan], sigma=flux[i][1::2][nonan], p0=data4[i, 1::2], maxfev=max_iter) # Chi-squared Statistic # model flux values flux_chi2 = PlanckMod(freq[nonan], data[i, 11], *popt[j, i], beta=k) # reduced chi-sq chi2[j, i] = sum( ((flux_chi2 - flux[i][::2][nonan]) / flux[i][1::2][nonan])** 2) / len(nonan[0]) # probability of chi-sq with (no. of datapoints) d.o.f. prob[j, i] = chisq.cdf(len(nonan[0]) * chi2[j, i], len(nonan[0])) # median chi-sq value CHI2[j, 0] = np.median(chi2[j][np.where(np.isnan(chi2[j]) == False)]) # mean chi-sq value CHI2[j, 1] = np.mean(chi2[j][np.where(np.isnan(chi2[j]) == False)]) # Best-fit parameters bopt = np.argmin(CHI2[:, 0]) # arg of optimal beta (based on median value) M, T = popt[bopt].T # mass & temperature fits # fit uncertainties dM, dT = np.array([np.sqrt(np.diag(pcov[bopt, i])) for i in range(len(data))]).T logM = np.log10(M / Msol) # log mass in solar units alldata = np.column_stack((T, logM)) # all data (used in HistPlot method) # Exporting best-fit Parameters
& ~wrong_predictions[j, :, :], axis=0) b_not_a = np.sum(~wrong_predictions[i, :, :] & wrong_predictions[j, :, :], axis=0) # An alterantive to the standard McNemar test is to include a # continuity correction term, resulting in: # mcnemar_corr_score = np.square(np.abs(a_not_b - b_not_a) - 1) / (a_not_b + b_not_a) # I tested both and came the conclusion, that we cannot reject the null hypotesis # for neither. The standard test however provide results that are easier to visualize. mcnemar_score[k, :] = np.square(a_not_b - b_not_a) / (a_not_b + b_not_a) k += 1 mcnemar_name += [names[i] + " vs " + names[j]] mcnemar = pd.DataFrame(1 - chi2.cdf(mcnemar_score, 1), index=mcnemar_name, columns=diagnosis) # p-value # Save results mcnemar.to_excel("./outputs/tables/mcnemar.xlsx", float_format='%.3f') mcnemar.to_csv("./outputs/tables/mcnemar.csv", float_format='%.3f') # %% Kappa score classifiers (Supplementary Table 2(a)) names = ["DNN", "cardio.", "emerg.", "stud."] predictors = [y_neuralnet, y_cardio, y_emerg, y_student] kappa_name = [] kappa_score = np.empty((6, 6)) k = 0 for i in range(4):
L_max = 2 * K_true # search range # Tukey: cT = 4.685 # t: nu = 3 # Huber: qant = 0.8 igamma = lambda a, b: gammaincc(a, b) * gamma(a) cH = np.sqrt(chi2.ppf(qant, r)) bH = chi2.cdf(cH**2, r + 2) + cH**2 / r * (1 - chi2.cdf(cH**2, r)) aH = gamma(r / 2) / np.pi**(r / 2) / ( (2 * bH)**(r / 2) * (gamma(r / 2) - igamma(r / 2, cH**2 / (2 * bH))) + (2 * bH * cH**r * np.exp(-cH**2 / (2 * bH))) / (cH**2 - bH * r)) #%% Density definitions g = [ partial(t6.g_gaus, r=r), partial(t6.g_t, r=r, nu=nu), partial(t6.g_huber2, r=r, cH=cH, bH=bH, aH=aH) ] rho = [ partial(t6.rho_gaus, r=r), partial(t6.rho_t, r=r, nu=nu),
def my_ancova(outcomelist,maindf,df14,df13=None,loglambdas=None,xvars=['hh_size','child_total','Loc'],outcomekind='Outcome',missing_y=0): xvars=list(set(xvars)) maindf=maindf.copy() if not loglambdas is None: maindf['loglambda']=loglambdas['lambda'] xvars+=['loglambda'] print xvars print "loglambdas shape ", loglambdas.shape bothdf=[] for x in outcomelist: ydf=df14[[x]] ydf.rename(columns={x:("%s" % x).capitalize()},inplace=True) ydf=ydf.stack() if not df13 is None: bdf=df13[[x]].rename(columns={x:("%s" % x).capitalize()}).stack() rdict=dict(zip(xvars,["%s_%s" % (s,("%s" % x).capitalize()) for s in xvars])) xdf=maindf[xvars] xdf.index=pd.MultiIndex.from_tuples([(i,("%s" % x).capitalize()) for i in xdf.index]) if not df13 is None: xdf=pd.concat([xdf,pd.DataFrame({'Baseline_%s' % ("%s" % x).capitalize():bdf})],axis=1) locations=pd.get_dummies(xdf['Loc'],prefix='Loc_%s' % ("%s" % x).capitalize()) del xdf['Loc'] xdf.rename(columns=rdict,inplace=True) xdf=xdf.join(locations) xdf.replace(to_replace=np.NaN,value=0,inplace=True) # Add row to restrict location dummies to sum to one ydf=pd.concat([ydf,pd.DataFrame([0],index=[(0,("%s" % x).capitalize())])]) #xdf=pd.concat([xdf,pd.DataFrame([s.startswith('Loc_')+0. for s in xdf.columns],index=xdf.columns,columns=[(0,("%s" % x).capitalize())]).T]) R=[s.startswith('Loc_')+0. for s in xdf.columns] R=pd.DataFrame(R,index=xdf.columns,columns=[(0,("%s" % x).capitalize())]).T xdf=pd.concat([xdf,R]) xdf[0]=ydf if missing_y==0: xdf[0].fillna(value=0,inplace=True) xdf.dropna(how='any',inplace=True) bothdf.append(xdf) mydf=pd.concat(bothdf).fillna(value=0) X=mydf.iloc[:,1:] X.to_pickle('/tmp/myX.df') y=mydf[[0]] b,se=ols(X,y) e=y-X.dot(b.T) e.rename(columns={0:'Resid'},inplace=True) e.index.names=['HH',outcomekind] testdf=pd.merge(df[['TUP','CTL']].reset_index(),e.reset_index(),how='outer',on=['HH']) testdf.set_index(['HH',outcomekind],inplace=True) testdf.dropna(inplace=True) TUP=testdf['TUP'].mul(testdf['Resid']) TUP=TUP.unstack() Control=testdf['CTL'].mul(testdf['Resid']).unstack() e=(e-e.mean()).unstack() # Test of significant differences between treatment and control: # Weighting matrix: A=np.matrix((TUP-Control).cov().as_matrix()).I g=np.matrix((TUP-Control).mean()) J=e.shape[0]*g*A*g.T # Chi2 statistic p=1-chi2.cdf(J,e.shape[1]) chi2test="Chi2 test: %f (%f)" % (J,p) N=pd.Series([d.shape[0]-1 for d in bothdf],index=[d.index.levels[1][0] for d in bothdf]) resultdf=pd.DataFrame({'TUP':TUP.mean(),'CTL':Control.mean(),'$N$':N}) sedf=pd.DataFrame({'TUP':TUP.std()/np.sqrt(resultdf['$N$']),'CTL':Control.std()/np.sqrt(resultdf['$N$'])}) resultdf['Diff.']=resultdf['TUP']-resultdf['CTL'] sedf['Diff.']=np.sqrt(sedf['TUP']**2 + sedf['CTL']**2) sedf[r'$\log\lambda$']=e.std().as_matrix()/np.sqrt(resultdf['$N$']) tstats=pd.DataFrame({'TUP':resultdf['TUP']/sedf['TUP'], 'CTL':resultdf['CTL']/sedf['CTL'], 'Diff.':resultdf['Diff.']/sedf['Diff.']}) if not loglambdas is None: llb=b.filter(like='loglambda_') resultdf[r'$\log\lambda$']=llb.iloc[0,:].as_matrix() tstats[r'$\log\lambda$']=resultdf['$\log\lambda$'].as_matrix()/sedf[r'$\log\lambda$'] return resultdf,sedf,tstats,chi2test
# calculating the p-value for problem 1: from scipy.stats.distributions import chi2 print("example 1: ", 1 - chi2.cdf(22.15, 2)) from scipy.stats import chisquare print( "example 1: ", chisquare([138, 83, 64, 64, 67, 84], [115.14, 85.5, 84.36, 86.86, 64.5, 63.64], 3)) # calculating the p-value for problem 2: from scipy.stats import norm print("example2: ", 1.0 - norm.cdf(1.28)) # example 3: import pandas as pd mpg = pd.read_csv("../data/jp-us-mpg.dat", delim_whitespace=True) print("example 3: ", mpg.head()) from numpy import mean print("Japan: ", mean(mpg["Japan"].dropna())) print("USA: ", mean(mpg["US"].dropna())) from numpy import var japan = mpg["Japan"].dropna() us = mpg["US"].dropna() jp_var = (var(japan) * len(japan)) / (float(len(japan) - 1)) us_var = (var(us) * len(us)) / (float(len(us) - 1))
def chi2_approx(calc_stat, x, y): n = x.shape[0] stat = calc_stat(x, y) pvalue = 1 - chi2.cdf(stat * n + 1, 1) return stat, pvalue
def survdiff(time, status, group, weight_type=None, strata=None, entry=None, **kwargs): """ Test for the equality of two survival distributions. Parameters ---------- time : array_like The event or censoring times. status : array_like The censoring status variable, status=1 indicates that the event occurred, status=0 indicates that the observation was censored. group : array_like Indicators of the two groups weight_type : str The following weight types are implemented: None (default) : logrank test fh : Fleming-Harrington, weights by S^(fh_p), requires exponent fh_p to be provided as keyword argument; the weights are derived from S defined at the previous event time, and the first weight is always 1. gb : Gehan-Breslow, weights by the number at risk tw : Tarone-Ware, weights by the square root of the number at risk strata : array_like Optional stratum indicators for a stratified test entry : array_like Entry times to handle left truncation. The subject is not in the risk set on or before the entry time. Returns ------- chisq : The chi-square (1 degree of freedom) distributed test statistic value pvalue : The p-value for the chi^2 test """ time = np.asarray(time) status = np.asarray(status) group = np.asarray(group) gr = np.unique(group) if strata is None: obs, var = _survdiff(time, status, group, weight_type, gr, entry, **kwargs) else: strata = np.asarray(strata) stu = np.unique(strata) obs, var = 0., 0. for st in stu: # could be more efficient? ii = (strata == st) obs1, var1 = _survdiff(time[ii], status[ii], group[ii], weight_type, gr, entry, **kwargs) obs += obs1 var += var1 chisq = obs.dot(np.linalg.solve(var, obs)) # (O - E).T * V^(-1) * (O - E) pvalue = 1 - chi2.cdf(chisq, len(gr) - 1) return chisq, pvalue
mcnemar_score = np.empty((6, 6)) k = 0 for i in range(4): for j in range(i+1, 4): a_not_b = np.sum(wrong_predictions[i, :, :] & ~wrong_predictions[j, :, :], axis=0) b_not_a = np.sum(~wrong_predictions[i, :, :] & wrong_predictions[j, :, :], axis=0) # An alterantive to the standard McNemar test is to include a # continuity correction term, resulting in: # mcnemar_corr_score = np.square(np.abs(a_not_b - b_not_a) - 1) / (a_not_b + b_not_a) # I tested both and came the conclusion, that we cannot reject the null hypotesis # for neither. The standard test however provide results that are easier to visualize. mcnemar_score[k, :] = np.square(a_not_b - b_not_a) / (a_not_b + b_not_a) k += 1 mcnemar_name += [names[i] + " vs " + names[j]] mcnemar = pd.DataFrame(1-chi2.cdf(mcnemar_score, 1), index=mcnemar_name, columns=diagnosis) # p-value # Save results mcnemar.to_excel("./outputs/tables/mcnemar.xlsx", float_format='%.3f') mcnemar.to_csv("./outputs/tables/mcnemar.csv", float_format='%.3f') # %% Kappa score classifiers (Supplementary Table 2(a)) names = ["DNN", "cardio.", "emerg.", "stud."] predictors = [y_neuralnet, y_cardio, y_emerg, y_student] kappa_name = [] kappa_score = np.empty((6, 6)) k = 0 for i in range(4): for j in range(i+1, 4): y_pred_1 = predictors[i]
def main(): # %% Read datasets # Get two annotators y_cardiologist1 = pd.read_csv(INPUT_ANNOTATION_DIR + '/cardiologist1.csv').values y_cardiologist2 = pd.read_csv(INPUT_ANNOTATION_DIR + '/cardiologist2.csv').values # Get true values y_true = pd.read_csv(INPUT_ANNOTATION_DIR + '/gold_standard.csv').values # Get residents and students performance y_cardio = pd.read_csv(INPUT_ANNOTATION_DIR + '/cardiology_residents.csv').values y_emerg = pd.read_csv(INPUT_ANNOTATION_DIR + '/emergency_residents.csv').values y_student = pd.read_csv(INPUT_ANNOTATION_DIR + '/medical_students.csv').values # get y_score for different models y_score_list = [np.load(m_output) for m_output in MODEL_OUTPUT_LIST] # %% Get average model model # Get micro average precision micro_avg_precision = [average_precision_score(y_true[:, :6], y_score[:, :6], average='micro') for y_score in y_score_list] # get ordered index index = np.argsort(micro_avg_precision) print('Micro average precision') print(np.array(micro_avg_precision)[index]) # get 6th best model (immediatly above median) out 10 different models k_dnn_best = index[0] y_score_best = y_score_list[k_dnn_best] # Get threshold that yield the best precision recall _, _, threshold = get_optimal_precision_recall(y_true, y_score_best) mask = y_score_best > threshold # Get neural network prediction # This data was also saved in INPUT_ANNOTATION_DIR + '/dnn.csv' y_neuralnet = np.zeros_like(y_score_best) y_neuralnet[mask] = 1 # %% Generate table with scores for the average model (Table 2) scores_list = [] for y_pred in [y_neuralnet, y_cardio, y_emerg, y_student]: # Compute scores scores = get_scores(y_true, y_pred, score_fun) # Put them into a data frame scores_df = pd.DataFrame(scores, index=diagnosis, columns=score_fun.keys()) # Append scores_list.append(scores_df) # Concatenate dataframes scores_all_df = pd.concat(scores_list, axis=1, keys=['DNN', 'cardio.', 'emerg.', 'stud.']) # Change multiindex levels scores_all_df = scores_all_df.swaplevel(0, 1, axis=1) scores_all_df = scores_all_df.reindex(level=0, columns=score_fun.keys()) # Save results scores_all_df.to_excel(OUTPUT_DIR + "/tables/scores.xlsx", float_format='%.3f') scores_all_df.to_csv(OUTPUT_DIR + "/tables/scores.csv", float_format='%.3f') # %% Plot precision recall curves (Figure 2) for k, name in enumerate(diagnosis): precision_list = [] recall_list = [] threshold_list = [] average_precision_list = [] fig, ax = plt.subplots() lw = 2 t = ['bo', 'rv', 'gs', 'kd'] for j, y_score in enumerate(y_score_list): # Get precision-recall curve precision, recall, threshold = precision_recall_curve(y_true[:, k], y_score[:, k]) recall[np.isnan(recall)] = 0 # change nans to 0 precision[np.isnan(precision)] = 0 # change nans to 0 # Plot if is the choosen option if j == k_dnn_best: ax.plot(recall, precision, color='blue', alpha=0.7) # Compute average precision average_precision = average_precision_score(y_true[:, k], y_score[:, k]) precision_list += [precision] recall_list += [recall] average_precision_list += [average_precision] threshold_list += [threshold] # Plot shaded region containing maximum and minimun from other executions recall_all = np.concatenate(recall_list) recall_all = np.sort(recall_all) # sort recall_all = np.unique(recall_all) # remove repeated entries recall_vec = [] precision_min = [] precision_max = [] for r in recall_all: p_max = [max(precision[recall == r]) for recall, precision in zip(recall_list, precision_list)] p_min = [min(precision[recall == r]) for recall, precision in zip(recall_list, precision_list)] recall_vec += [r, r] precision_min += [min(p_max), min(p_min)] precision_max += [max(p_max), max(p_min)] ax.plot(recall_vec, precision_min, color='blue', alpha=0.3) ax.plot(recall_vec, precision_max, color='blue', alpha=0.3) ax.fill_between(recall_vec, precision_min, precision_max, facecolor="blue", alpha=0.3) # Plot iso-f1 curves f_scores = np.linspace(0.1, 0.95, num=15) for f_score in f_scores: x = np.linspace(0.0000001, 1, 1000) y = f_score * x / (2 * x - f_score) ax.plot(x[y >= 0], y[y >= 0], color='gray', ls=':', lw=0.7, alpha=0.25) # Plot values in for npred in range(4): ax.plot(scores_list[npred]['Recall'][k], scores_list[npred]['Precision'][k], t[npred], label=predictor_names[npred]) plt.xticks(fontsize=16) plt.yticks(fontsize=16) ax.set_xlim([0.0, 1.0]) ax.set_ylim([0.0, 1.02]) if k in [3, 4, 5]: ax.set_xlabel('Recall (Sensitivity)', fontsize=17) if k in [0, 3]: ax.set_ylabel('Precision (PPV)', fontsize=17) # plt.title('Precision-Recall curve (' + name + ')') if k == 0: plt.legend(loc="lower left", fontsize=17) else: ax.legend().remove() plt.tight_layout() plt.savefig(OUTPUT_DIR + '/figures/precision_recall_{0}.pdf'.format(name)) # %% Confusion matrices (Supplementary Table 1) M = [[confusion_matrix(y_true[:, k], y_pred[:, k], labels=[0, 1]) for k in range(nclasses)] for y_pred in [y_neuralnet, y_cardio, y_emerg, y_student]] M_xarray = xr.DataArray(np.array(M), dims=['predictor', 'diagnosis', 'true label', 'predicted label'], coords={'predictor': ['DNN', 'cardio.', 'emerg.', 'stud.'], 'diagnosis': diagnosis, 'true label': ['not present', 'present'], 'predicted label': ['not present', 'present']}) confusion_matrices = M_xarray.to_dataframe('n') confusion_matrices = confusion_matrices.reorder_levels([1, 2, 3, 0], axis=0) confusion_matrices = confusion_matrices.unstack() confusion_matrices = confusion_matrices.unstack() confusion_matrices = confusion_matrices['n'] confusion_matrices.to_excel(OUTPUT_DIR + "/tables/confusion matrices.xlsx", float_format='%.3f') confusion_matrices.to_csv(OUTPUT_DIR + "/tables/confusion matrices.csv", float_format='%.3f') #%% Compute scores and bootstraped version of these scores bootstrap_nsamples = 1000 percentiles = [2.5, 97.5] scores_resampled_list = [] scores_percentiles_list = [] for y_pred in [y_neuralnet, y_cardio, y_emerg, y_student]: # Compute bootstraped samples np.random.seed(123) # NEVER change this =P n, _ = np.shape(y_true) samples = np.random.randint(n, size=n * bootstrap_nsamples) # Get samples y_true_resampled = np.reshape(y_true[samples, :], (bootstrap_nsamples, n, nclasses)) y_doctors_resampled = np.reshape(y_pred[samples, :], (bootstrap_nsamples, n, nclasses)) # Apply functions scores_resampled = np.array([get_scores(y_true_resampled[i, :, :], y_doctors_resampled[i, :, :], score_fun) for i in range(bootstrap_nsamples)]) # Sort scores scores_resampled.sort(axis=0) # Append scores_resampled_list.append(scores_resampled) # Compute percentiles index i = [int(p / 100.0 * bootstrap_nsamples) for p in percentiles] # Get percentiles scores_percentiles = scores_resampled[i, :, :] # Convert percentiles to a dataframe scores_percentiles_df = pd.concat([pd.DataFrame(x, index=diagnosis, columns=score_fun.keys()) for x in scores_percentiles], keys=['p1', 'p2'], axis=1) # Change multiindex levels scores_percentiles_df = scores_percentiles_df.swaplevel(0, 1, axis=1) scores_percentiles_df = scores_percentiles_df.reindex(level=0, columns=score_fun.keys()) # Append scores_percentiles_list.append(scores_percentiles_df) # Concatenate dataframes scores_percentiles_all_df = pd.concat(scores_percentiles_list, axis=1, keys=predictor_names) # Change multiindex levels scores_percentiles_all_df = scores_percentiles_all_df.reorder_levels([1, 0, 2], axis=1) scores_percentiles_all_df = scores_percentiles_all_df.reindex(level=0, columns=score_fun.keys()) #%% Print box plot (Supplementary Figure 1) # Convert to xarray scores_resampled_xr = xr.DataArray(np.array(scores_resampled_list), dims=['predictor', 'n', 'diagnosis', 'score_fun'], coords={ 'predictor': predictor_names, 'n': range(bootstrap_nsamples), 'diagnosis': ['1dAVb', 'RBBB', 'LBBB', 'SB', 'AF', 'ST'], 'score_fun': list(score_fun.keys())}) # Remove everything except f1_score for sf in score_fun: fig, ax = plt.subplots() f1_score_resampled_xr = scores_resampled_xr.sel(score_fun=sf) # Convert to dataframe f1_score_resampled_df = f1_score_resampled_xr.to_dataframe(name=sf).reset_index(level=[0, 1, 2]) # Plot seaborn ax = sns.boxplot(x="diagnosis", y=sf, hue="predictor", data=f1_score_resampled_df) # Save results plt.xticks(fontsize=16) plt.yticks(fontsize=16) plt.xlabel("") plt.ylabel("", fontsize=16) if sf == "F1 score": plt.legend(fontsize=17) else: ax.legend().remove() plt.tight_layout() plt.savefig(OUTPUT_DIR + '/figures/boxplot_bootstrap_{}.pdf'.format(sf)) scores_resampled_xr.to_dataframe(name='score').to_csv(OUTPUT_DIR + '/figures/boxplot_bootstrap_data.txt') #%% McNemar test (Supplementary Table 3) # Get correct and wrong predictions for each of them (cm >= 2 correspond to wrong predictions) wrong_predictions = np.array([affer_results(y_true, y_pred)[4] >= 2 for y_pred in [y_neuralnet, y_cardio, y_emerg, y_student]]) # Compute McNemar score names = ["DNN", "cardio.", "emerg.", "stud."] mcnemar_name = [] mcnemar_score = np.empty((6, 6)) k = 0 for i in range(4): for j in range(i+1, 4): a_not_b = np.sum(wrong_predictions[i, :, :] & ~wrong_predictions[j, :, :], axis=0) b_not_a = np.sum(~wrong_predictions[i, :, :] & wrong_predictions[j, :, :], axis=0) # An alterantive to the standard McNemar test is to include a # continuity correction term, resulting in: # mcnemar_corr_score = np.square(np.abs(a_not_b - b_not_a) - 1) / (a_not_b + b_not_a) # I tested both and came the conclusion, that we cannot reject the null hypotesis # for neither. The standard test however provide results that are easier to visualize. mcnemar_score[k, :] = np.square(a_not_b - b_not_a) / (a_not_b + b_not_a) k += 1 mcnemar_name += [names[i] + " vs " + names[j]] mcnemar = pd.DataFrame(1-chi2.cdf(mcnemar_score, 1), index=mcnemar_name, columns=diagnosis) # p-value # Save results mcnemar.to_excel(OUTPUT_DIR + "/tables/mcnemar.xlsx", float_format='%.3f') mcnemar.to_csv(OUTPUT_DIR + "/tables/mcnemar.csv", float_format='%.3f') # %% Kappa score classifiers (Supplementary Table 2(a)) names = ["DNN", "cardio.", "emerg.", "stud."] predictors = [y_neuralnet, y_cardio, y_emerg, y_student] kappa_name = [] kappa_score = np.empty((6, 6)) k = 0 for i in range(4): for j in range(i+1, 4): y_pred_1 = predictors[i] y_pred_2 = predictors[j] # Get "confusion matrix" negative_negative, positive_positive, positive_negative, negative_positive, _ = \ affer_results(y_pred_1, y_pred_2) p_p = positive_positive.sum(axis=0) p_n = positive_negative.sum(axis=0) n_p = negative_positive.sum(axis=0) n_n = negative_negative.sum(axis=0) total_sum = p_p + p_n + n_p + n_n # Relative agreement r_agree = (p_p + n_n) / total_sum # Empirical probability of both saying yes p_yes = (p_p + p_n) * (p_p + n_p) / total_sum**2 # Empirical probability of both saying no p_no = (n_n + n_p) * (n_n + p_n) / total_sum**2 # Empirical probability of agreement p_agree = p_yes + p_no # Kappa score kappa_score[k, :] = (r_agree - p_agree) / (1 - p_agree) k += 1 kappa_name += [names[i] + " vs " + names[j]] kappa = pd.DataFrame(kappa_score, index=kappa_name, columns=diagnosis) # p-value # Save results kappa.to_excel(OUTPUT_DIR + "/tables/kappa.xlsx", float_format='%.3f') kappa.to_csv(OUTPUT_DIR + "/tables/kappa.csv", float_format='%.3f') # %% Kappa score dataset generation (Supplementary Table 2(b)) # Compute kappa score kappa_list = [] names_list = [] raters = [('DNN', y_neuralnet), ('Cert. cardiol. 1', y_cardiologist1), ('Certif. cardiol. 2', y_cardiologist2)] for r1, r2 in combinations(raters, 2): name1, y1 = r1 name2, y2 = r2 negative_negative, positive_positive, positive_negative, negative_positive, _ = \ affer_results(y1, y2) p_p = positive_positive.sum(axis=0) p_n = positive_negative.sum(axis=0) n_p = negative_positive.sum(axis=0) n_n = negative_negative.sum(axis=0) total_sum = p_p + p_n + n_p + n_n # Relative agreement r_agree = (p_p + n_n) / total_sum # Empirical probability of both saying yes p_yes = (p_p + p_n) * (p_p + n_p) / total_sum ** 2 # Empirical probability of both saying no p_no = (n_n + n_p) * (n_n + p_n) / total_sum ** 2 # Empirical probability of agreement p_agree = p_yes + p_no # Kappa score kappa = (r_agree - p_agree) / (1 - p_agree) kappa_list.append(kappa) names_list.append('{} vs {}'.format(name1, name2)) kappas_annotators_and_DNN = pd.DataFrame(np.stack(kappa_list), columns=diagnosis, index=names_list) print(kappas_annotators_and_DNN) kappas_annotators_and_DNN.to_excel(OUTPUT_DIR + "/tables/kappas_annotators_and_DNN.xlsx", float_format='%.3f') kappas_annotators_and_DNN.to_csv(OUTPUT_DIR + "/tables/kappas_annotators_and_DNN.csv", float_format='%.3f') # %% Compute scores and bootstraped version of these scores on alternative splits bootstrap_nsamples = 1000 scores_resampled_list = [] scores_percentiles_list = [] for name in ['normal_order', 'date_order', 'individual_patients', 'base_model']: print(name) # Get data yn_true = y_true yn_score = np.load('./dnn_predicts/other_splits/model_'+name+'.npy') if not name == 'base_model' else y_score_best # Compute threshold nclasses = np.shape(yn_true)[1] opt_precision, opt_recall, threshold = get_optimal_precision_recall(yn_true, yn_score) mask_n = yn_score > threshold yn_pred = np.zeros_like(yn_score) yn_pred[mask_n] = 1 # Compute bootstraped samples np.random.seed(123) # NEVER change this =P n, _ = np.shape(yn_true) samples = np.random.randint(n, size=n * bootstrap_nsamples) # Get samples y_true_resampled = np.reshape(yn_true[samples, :], (bootstrap_nsamples, n, nclasses)) y_doctors_resampled = np.reshape(yn_pred[samples, :], (bootstrap_nsamples, n, nclasses)) # Apply functions scores_resampled = np.array([get_scores(y_true_resampled[i, :, :], y_doctors_resampled[i, :, :], score_fun) for i in range(bootstrap_nsamples)]) # Sort scores scores_resampled.sort(axis=0) # Append scores_resampled_list.append(scores_resampled) # Compute percentiles index i = [int(p / 100.0 * bootstrap_nsamples) for p in percentiles] # Get percentiles scores_percentiles = scores_resampled[i, :, :] # Convert percentiles to a dataframe scores_percentiles_df = pd.concat([pd.DataFrame(x, index=diagnosis, columns=score_fun.keys()) for x in scores_percentiles], keys=['p1', 'p2'], axis=1) # Change multiindex levels scores_percentiles_df = scores_percentiles_df.swaplevel(0, 1, axis=1) scores_percentiles_df = scores_percentiles_df.reindex(level=0, columns=score_fun.keys()) # Append scores_percentiles_list.append(scores_percentiles_df) # %% Print box plot on alternative splits (Supplementary Figure 2 (a)) scores_resampled_xr = xr.DataArray(np.array(scores_resampled_list), dims=['predictor', 'n', 'diagnosis', 'score_fun'], coords={ 'predictor': ['random', 'by date', 'by patient', 'original DNN'], 'n': range(bootstrap_nsamples), 'diagnosis': ['1dAVb', 'RBBB', 'LBBB', 'SB', 'AF', 'ST'], 'score_fun': list(score_fun.keys())}) # Remove everything except f1_score sf = 'F1 score' fig, ax = plt.subplots() f1_score_resampled_xr = scores_resampled_xr.sel(score_fun=sf) # Convert to dataframe f1_score_resampled_df = f1_score_resampled_xr.to_dataframe(name=sf).reset_index(level=[0, 1, 2]) # Plot seaborn ax = sns.boxplot(x="diagnosis", y=sf, hue="predictor", data=f1_score_resampled_df, order=['1dAVb', 'SB', 'AF', 'ST', 'RBBB', 'LBBB'], palette=sns.color_palette("Set1", n_colors=8)) plt.axvline(3.5, color='black', ls='--') plt.axvline(5.5, color='black', ls='--') plt.axvspan(3.5, 5.5, alpha=0.1, color='gray') # Save results plt.xticks(fontsize=16) plt.yticks(fontsize=16) plt.xlabel("") plt.ylabel("F1 score", fontsize=16) plt.legend(fontsize=17) plt.ylim([0.4, 1.05]) plt.xlim([-0.5, 5.5]) plt.tight_layout() plt.savefig(OUTPUT_DIR + '/figures/boxplot_bootstrap_other_splits_{0}.pdf'.format(sf)) f1_score_resampled_df.to_csv(OUTPUT_DIR + '/figures/boxplot_bootstrap_other_splits_data.txt', index=False)
testdf=pd.merge(df[['TUP','CTL']].reset_index(),e.reset_index(),how='outer',on=['HH']) testdf.set_index(['HH','Good'],inplace=True) TUP=testdf['TUP'].mul(testdf['Resid']).dropna().unstack() CTL=testdf['CTL'].mul(testdf['Resid']).dropna().unstack() e=(e-e.mean()).unstack() # Test of significant differences between treatment and control: # Weighting matrix: A=np.matrix((TUP-CTL).cov().as_matrix()).I g=np.matrix((TUP-CTL).mean()) J=e.shape[0]*g*A*g.T # Chi2 statistic p=1-chi2.cdf(J,e.shape[1]) chi2test="Chi2 test: %f (%f)" % (J,p) N=pd.Series([d.shape[0]-1 for d in bothdf],index=[d.index.levels[1][0] for d in bothdf]) resultdf=pd.DataFrame({'TUP':TUP.mean(),'CTL':CTL.mean(),'$N$':N}) sedf=pd.DataFrame({'TUP':TUP.std()/np.sqrt(resultdf['$N$']),'CTL':CTL.std()/np.sqrt(resultdf['$N$'])}) resultdf['Diff.']=resultdf['TUP']-resultdf['CTL'] sedf['Diff.']=np.sqrt((sedf['TUP']**2) + (sedf['CTL']**2)) # Use svd (with missing data) to construct beta & log lambda myb,myl = get_loglambdas(e,TEST=True) myb.index=myb.index.droplevel(0)
def Mscat(X, loss, losspar=None, invCx=None, printitn=0, MAX_ITER=1000, EPS=1.0e-5): def tloss_consistency_factor(p, v): ''' computes the concistency factor b = (1/p) E[|| x ||^2 u_v( ||x||^2)] when x ~N_p(0,I). ''' sfun = lambda x: (x**(p / 2) / (v + x) * np.exp(-x / 2)) c = 2**(p / 2) * sp.special.gamma(p / 2) q = (1/c)*\ sp.integrate.quad(sfun,0,np.inf)[0] return ((v + p) / p) * q #consistency factor X = np.asarray(X) n, p = X.shape realdata = np.isrealobj(X) # SCM initial start invC = np.linalg.pinv(X.conj().T @ X / n) if invCx == None else np.copy(invCx) if loss == 'Huber': ufun = lambda t, c: ((t <= c) + (c / t) * (t > c)) # weight function u(t) q = 0.9 if losspar == None else losspar if np.isreal(q) and np.isfinite(q) and 0 < q and q < 1: if realdata: upar = chi2.ppf(q, df=p) # threshold for Huber's weight u(t;.) b = chi2.cdf( upar, p + 2) + (upar / p) * (1 - q) # consistency factor else: upar = chi2.ppf(q, 2 * p) / 2 b = chi2.cdf(2 * upar, 2 * (p + 1)) + (upar / p) * (1 - q) else: raise ValueError( 'losspar is a real number in [0,1] and not %s for Huber-loss' % q) const = 1 / (b * n) if loss == 't-loss': # d.o.f v=3 is used as the default parameter for t-loss # otherwise use d.o.f. v that was given upar = 3 if losspar == None else losspar if not np.isreal(upar) or not np.isfinite(upar) or upar < 0: raise ValueError( 'losspar should be a real number greater 0 and not %s for t-loss' % q) if realdata and upar != 0: # this is for real data ufun = lambda t, v: 1 / (v + t) # weight function b = tloss_consistency_factor(p, upar) const = (upar + p) / (b * n) if not realdata and upar != 0: # this is for complex data ufun = lambda t, v: 1 / (v + 2 * t) # weight function b = tloss_consistency_factor(2 * p, upar) const = (upar + 2 * p) / (b * n) if upar == 0: # Tylers M-estimator ufun = lambda t, v: 1 / t const = p / n for i in range(MAX_ITER): t = np.real(np.sum((X @ invC) * np.conj(X), axis=1)) # norms C = const * X.conj().T @ (X * ufun(t, upar)[:, None]) d = np.max(np.sum(np.abs(np.eye(p) - invC @ C), axis=1)) if printitn > 0 and (i + 1) % printitn == 0: print("At iter = %d, dis=%.6f\n" % (i, d)) invC = np.linalg.pinv(C) if d <= EPS: break if i == MAX_ITER: print("WARNING! Slow convergence: the error of the solution is %f\n'" % d) return C, invC, i, i == MAX_ITER - 1