def __init__(self, params): super().__init__(params) self.loc = params[0] self.scale = np.exp(params[1]) self.var = self.scale**2 self.df = np.exp(params[2]) self.dist = dist(loc=self.loc, scale=self.scale, df=self.df)
def __init__(self, params, temp_scale=1.0): self.loc = params[0] self.scale = np.exp(params[1] / temp_scale) + 1e-8 self.var = self.scale**2 + 1e-8 self.shp = self.loc.shape self.dist = dist(loc=self.loc, scale=self.scale)
def __init__(self, params): # save the parameters self._params = params # create other objects that will be useful later self.logmu = params[0] self.mu = np.exp(self.logmu) self.dist = dist(mu=self.mu)
def __init__(self, params): super().__init__(params) self.loc = params[0] self.scale = np.exp(params[1]) self.var = self.scale**2 # fixed df self.df = np.ones_like(self.loc) * self.fixed_df self.dist = dist(loc=self.loc, scale=self.scale, df=self.df)
def __init__(self, params, temp_scale = 1.0): mu = params[0] logsigma = params[1] sigma = np.exp(logsigma) self.mu = mu self.sigma = sigma self.dist = dist(s=sigma, scale=np.exp(mu))
def __init__(self, params): super().__init__(params) self.loc = params[0] #Sankalp: adding the clipping line on 10/16/2020. q = np.clip(params[1], a_min=-1e1, a_max=1e1) self.scale = np.exp(q) #Sankalp: adding the clipping line on 10/16/2020. self.scale = np.clip(self.scale, a_min=0, a_max=1e1) self.var = np.float_power(self.scale, 2) self.dist = dist(loc=self.loc, scale=self.scale)
def generate_mv_surv(N, mu0, mu1, S00, S11, S01): mu = [mu0, mu1] S = [[S00, S01], [S10, S11]] R = dist(mean=mu, cov=S) raw = np.array([R.rvs() for _ in range(N)]) T = np.min(raw, axis=1) E = raw[:, 0] < raw[:, 1] print('Prevalence:', np.mean(E)) print('mu_E:', mu0) print('Marginal Mean of E:', np.mean(T[np.where(E == 1)])) print('mu_C:', mu1) print('Marginal Mean of C:', np.mean(T[np.where(E == 0)])) Y = Y_join(T, E) return Y
def __init__(self, params): self.scale = np.exp(params[0]) self.dist = dist(scale=self.scale)
def update_records(self, update_messages, human): if not update_messages: return self grouped_update_messages = self.group_by_received_at(update_messages) for received_at, update_messages in grouped_update_messages.items(): # num days x num clusters cluster_cards = np.zeros((max(self.clusters_by_day.keys())+1, max(self.clusters.keys())+1)) update_cards = np.zeros((max(self.clusters_by_day.keys())+1, 1)) # figure out the cardinality of each day's message set for day, clusters in self.clusters_by_day.items(): for cluster_id, messages in clusters.items(): cluster_cards[day][cluster_id] = len(messages) for update_message in update_messages: update_cards[update_message.day] += 1 # find the nearest cardinality cluster perfect_signatures = np.where((cluster_cards == update_cards).all(axis=0))[0] if not any(perfect_signatures): # calculate the wasserstein distance between every signature scores = [] for cluster_idx in range(cluster_cards.shape[1]): scores.append(dist(cluster_cards[:, cluster_idx], update_cards.reshape(-1))) best_cluster = int(np.argmin(scores)) # for each day for day in range(len(update_cards)): cur_cardinality = int(cluster_cards[day, best_cluster]) target_cardinality = int(update_cards[day]) # if (and while) the cardinality is not what it should be, as determined by the update_messages while cur_cardinality - target_cardinality != 0: # print(f"day: {day}, cur_cardinality: {cur_cardinality}, target_cardinality: {target_cardinality}") # if we need to remove messages from this cluster on this day, if cur_cardinality > target_cardinality: best_score = -1 best_message = None new_cluster_id = None # then for each message in that day/cluster, for message in self.clusters_by_day[day][best_cluster]: for cluster_id, messages in self.clusters_by_day[day].items(): if cluster_id == best_cluster: continue # and for each alternative cluster on that day for candidate_cluster_message in messages: # check if it's a good cluster to move this message to score = self.score_two_messages(decode_message(candidate_cluster_message), message) if (score > best_score or not best_message): best_message = message new_cluster_id = cluster_id # if there are no other clusters on that day make a new cluster if not best_message: best_message = message message = decode_message(message) new_cluster_id = hash_to_cluster(message) best_message = decode_message(best_message) # for the message which best fits another cluster, move it there self.update_record(best_cluster, new_cluster_id, best_message, best_message) cur_cardinality -= 1 # print(f"removing from cluster {best_cluster} to cluster {new_cluster_id} on day {day}") #otherwise we need to add messages to this cluster/day else: # so look for messages which closely match our update messages, and add them for update_message in update_messages: if update_message.day == day: break best_score = -2 best_message = None old_cluster_id = None for cluster_id, messages in self.clusters_by_day[day].items(): for message in messages: score = self.score_two_messages(update_message, message) if (score > best_score and cluster_id != best_cluster): best_message = message old_cluster_id = cluster_id best_message = decode_message(best_message) updated_message = Message(best_message.uid, update_message.new_risk, best_message.day, best_message.unobs_id) # print(f"adding from cluster {old_cluster_id} to cluster {best_cluster} on day {day}") self.update_record(old_cluster_id, best_cluster, best_message, updated_message) cur_cardinality += 1 else: best_cluster = self.score_clusters(update_messages, perfect_signatures) for update_message in update_messages: best_score = -1 best_message = self.clusters_by_day[update_message.day][best_cluster][0] for risk_message in self.clusters_by_day[update_message.day][best_cluster]: score = self.score_two_messages(update_message, risk_message) if score > best_score: best_message = risk_message best_message = decode_message(best_message) updated_message = Message(best_message.uid, update_message.new_risk, best_message.day, best_message.unobs_id) self.update_record(best_cluster, best_cluster, best_message, updated_message) return self
def __init__(self, params): self._params = params self.loc = params[0] self.scale = np.exp(params[1]) self.dist = dist(s=self.scale, scale=np.exp(self.loc)) self.eps = 1e-5
def __init__(self, params): self.loc = params[0] self.var = np.ones_like(self.loc) self.scale = np.ones_like(self.loc) self.shape = self.loc.shape self.dist = dist(loc=self.loc, scale=self.scale)
def __init__(self, params): self._params = params self.loc = params[0] self.logscale = params[1] self.scale = np.exp(params[1]) self.dist = dist(loc=self.loc, scale=self.scale)
def __init__(self, params): self.logit = params[0] self.prob = sp.special.expit(self.logit) self.dist = dist(p=self.prob)
def __init__(self, params, temp_scale=1.0): self.loc = params[0] self.scale = np.exp(params[1]) self.dist = dist(s=self.scale, scale=np.exp(self.loc))
'lognorm': res['lognorm'] } results = pd.concat([results, pd.DataFrame(out, index=[0])], axis=0, sort=False) results.reset_index(drop=True).to_csv(logfile, index=False) # joint[i] = res['joint'] # lognorm[i] = res['lognorm'] # plt.plot(covs, joint, label='joint') # plt.plot(covs, single, label='lognormal') # plt.ylabel('est. E') # plt.xlabel('cov(E, C)') # plt.legend() # plt.show() mu = [mu0, mu1] S = [[S00, S01], [S10, S11]] R = dist(mean=mu, cov=S) raw = np.array([R.rvs() for _ in range(N)]) T = np.min(raw, axis=1) E = raw[:, 0] < raw[:, 1] print('Prevalence:', np.mean(E)) print('mu_E:', mu0) print('Marginal Mean of E:', np.mean(T[np.where(E == 1)])) print('mu_C:', mu1) print('Marginal Mean of C:', np.mean(T[np.where(E == 0)]))
def __init__(self, params): # pylint: disable=super-init-not-called self._params = params self.scale = np.exp(params[0]) self.dist = dist(scale=self.scale)
def __init__(self, params): self.loc = 0 self.scale = np.exp(params[0]) self.shp = self.scale.shape self.dist = dist(scale=self.scale)
"passer_player_id", "defteam", ]) target = "pass_touchdown" X = df.drop(columns=[target]).values Y = df[target].values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=SEED) # baseline not using predictor data avg_tds = np.mean(Y_train) y_dist = dist(avg_tds) naive_NLL = -y_dist.logpmf(Y_test).mean() print("Mean squared error using only the mean: {:.4f}".format( mean_squared_error(np.repeat(avg_tds, len(Y_test)), Y_test))) print( "Poisson negative log liklihood without using predictor variables: {:.4f}" .format(naive_NLL)) ngb = NGBRegressor(Dist=Poisson) ngb.fit(X_train, Y_train) Y_preds = ngb.predict(X_test) Y_dists = ngb.pred_dist(X_test)
def __init__(self, params): self.loc = params[0] self.scale = np.exp(params[1]) self.var = self.scale**2 self.dist = dist(loc=self.loc, scale=self.scale)