def recover(data): mu = np.mean(data) sigma = np.var(data) init_params = [(mu + 0.1, sigma), (mu - 0.1, sigma)] weight, distributions, ll = mixem.em( data, [NormalDistribution(mu, sigma) for mu_sigma in init_params]) print(weight, distributions, ll)
def train_lambda(data): weights, distributions, ll = mixem.em(np.array(data), [ mixem.distribution.NormalDistribution(0, 1), mixem.distribution.NormalDistribution(3, 4), mixem.distribution.NormalDistribution(7, 8) ]) return weights
def recover(data): init_params = np.random.choice(data, size=2) weight, distributions, ll = mixem.em(data, [ExponentialDistribution(l) for l in init_params]) print(weight, distributions, ll)
def fitDataBMM(data, depth, lowerLimit, upperLimit, init_proportions, components=2): """ Fit data and return Binomial Mixture Model""" if components > 9: logging.error('Too many components specified. Max components 9.') exit() distros = [] # Init distros for i in range(components): distros.append(BinomialDistribution(init_proportions[i], depth)) # Format data as pairs of success and trials data_points = [] for x,y in zip(data[:,:-2].flatten(), np.repeat(data[:,-1],4)): # do filtering on each proportion if x > lowerLimit and x < upperLimit: charCount = x*0.01*y # convert proportion to count data_points.append([charCount,y]) else: continue data = np.array(data_points) weights, distros, log_like = mixem.em(data, distros, initial_weights=None, progress_callback=None, max_iterations=500, tol_iters=200, tol=0.1) return BinomialMixture(weights, distros, log_like)
def recover(data): weight, distributions, ll = mixem.em(data, [ GeometricDistribution(0.8), GeometricDistribution(0.1), ]) print(weight, distributions, ll)
def recover(data): init_params = np.random.choice(data, size=2) weight, distributions, ll = mixem.em( data, [ExponentialDistribution(l) for l in init_params]) print(weight, distributions, ll)
def train_lambda(data): # plt.scatter(np.array(range(data.shape[0])), data) # plt.show(); weights, distributions, ll = mixem.em(np.sort(np.array(data)), [ mixem.distribution.NormalDistribution(0, 1), mixem.distribution.NormalDistribution(0.3, 5), mixem.distribution.NormalDistribution(1, 9) ]) return weights
def recover(data): mu = np.mean(data) sigma = np.var(data) init_params = [(np.array([mu + 0.1]), np.diag([sigma])), (np.array([mu - 0.1]), np.diag([sigma]))] weight, distributions, ll = mixem.em(data, [MultivariateNormalDistribution(mu, sigma) for mu, sigma in init_params]) print(weight, distributions, ll)
def train_lambda(data): ''' train_lambda takes training data and returns the lambdas which will used in the EM algorithm. We use a implementation of the Expectation-Maximization (EM) algorithm called "mixem" to tune the parameters lambda in the interpolation. https://pypi.python.org/pypi/mixem ''' weights, distributions, ll = mixem.em(np.sort(np.array(data)), [mixem.distribution.NormalDistribution(0,1),mixem.distribution.NormalDistribution(0.3,5),mixem.distribution.NormalDistribution(1,9)]) return weights
def recover(data): mu = np.mean(data) sigma = np.var(data) init_params = [ (np.array([mu + 0.1]), np.diag([sigma])), (np.array([mu - 0.1]), np.diag([sigma])) ] # start = time.time() weight, distributions, ll = mixem.em(data, [MultivariateNormalDistribution(mu, sigma) for mu, sigma in init_params]) print(weight, distributions, ll)
def recover(data): mu = np.mean(data) sigma = np.var(data) init_params = [ (mu + 0.1, sigma), (mu - 0.1, sigma) ] weight, distributions, ll = mixem.em(data, [NormalDistribution(mu, sigma) for mu_sigma in init_params]) print(weight, distributions, ll)
def main(): data = pd.read_csv(os.path.join(os.path.dirname(__file__), "faithful.csv")) data = np.array(data) init_params = [ (np.array((2, 50)), np.identity(2)), (np.array((4, 80)), np.identity(2)), ] weight, distributions, ll = mixem.em(data, [MultivariateNormalDistribution(mu, sigma) for mu, sigma in init_params], initial_weights=[0.3, 0.7]) print(weight, distributions, ll)
def main(): startTime = time.time() folderName = "assignment-2-data-files/" fileName = "P1M1L1.txt" data, lengthData = readTrainTxt(folderName + fileName) data = takeBatches(data, 1000) init_params = [(0, 2), (4, 2), (8, 2), (12, 2)] weight, distributions, ll = mixem.em( data, [NormalDistribution(mu, sigma) for (mu, sigma) in init_params]) print(weight, distributions, ll) print(time.time() - startTime)
def recover(data): mu = [np.mean(data[0, :]), np.mean(data[1, :])] sigma = [np.var(data[0, :]), np.var(data[1, :])] #print mu,sigma init_params = [ (np.array((mu[0] - 1, mu[0] + 1)), np.identity(2)), (np.array((mu[1] - 1, mu[1] + 1)), np.identity(2)), ] start = time.time() weight, distributions, ll, iteration = mixem.em(data, [ MultivariateNormalDistribution(mu, sigma) for mu, sigma in init_params ]) #print(weight, distributions, ll) #print 'iterate time: ' + str(time.time() - start) + ' seconds' return weight, distributions, iteration, (time.time() - start)
def get_normals_mixture(x, data): dist_list_norm = [ mixem.distribution.NormalDistribution(mu=0.5, sigma=1), mixem.distribution.NormalDistribution(mu=2, sigma=1) ] norm_mixture = Data_Rep(data, dist_list_norm) #removed ref to train set norm_weights, norm_dists, norm_log_l = mixem.em(data, dist_list_norm, max_iterations=200, progress_callback=None) post_scipy_dist_1_norm, post_scipy_dist_2_norm = norm_mixture.scipy_dists norm_pdf1 = [post_scipy_dist_1_norm.pdf(i) for i in x] norm_pdf2 = [post_scipy_dist_2_norm.pdf(i) for i in x] norm_joint_pdf = [ norm_weights[0] * norm_pdf1[i] + norm_weights[1] * norm_pdf2[i] for i in range(len(norm_pdf1)) ] return norm_mixture, norm_joint_pdf
def organize_data(x, data1, data2, dist1, dist2): data = np.concatenate((data1, data2)) np.random.shuffle(data) # train_set = data[1000:] # val_set = data[:1000] # sorted_val = np.array(sorted(val_set)) dist_list = [dist1, dist2] mixture = Data_Rep(data, dist_list) post_scipy_dist_1, post_scipy_dist_2 = mixture.scipy_dists # took out train/val split here weights, distributions, log_l = mixem.em(data, dist_list, max_iterations=200, progress_callback=None) pdf1 = [post_scipy_dist_1.pdf(i) for i in x] pdf2 = [post_scipy_dist_2.pdf(i) for i in x] joint_pdf = [ weights[0] * pdf1[i] + weights[1] * pdf2[i] for i in range(len(pdf1)) ] return mixture, joint_pdf, data
def _mixture_cdf(self, data, dist_list): self.weights, self.distributions, self.log_l = mixem.em(data, dist_list, max_iterations=200, progress_callback=None) self.scipy_dists = self.get_scipy_dists(self.distributions) return lambda query: sum([w * dist.cdf(query) for w, dist in zip(self.weights, self.scipy_dists)])
# prepare some data data = pd.read_csv("faithful.csv") #print(data.head()) # output to static HTML file output_file("Fitting_Data_Contour.html", title="Old Faithful Data") # create a new plot with a title and axis labels ''' fig = figure(title="Old Faithful Data", x_axis_label="Eruption duration (minutes)", y_axis_label="Waiting time (minutes)") fig.scatter(x=data.eruptions, y=data.waiting) show(fig); ''' weights, distributions, ll, iteration = mixem.em(np.array(data), [ mixem.distribution.MultivariateNormalDistribution(np.array( (2, 50)), np.identity(2)), mixem.distribution.MultivariateNormalDistribution(np.array( (4, 80)), np.identity(2)), ]) N = 100 x = np.linspace(np.min(data.eruptions), np.max(data.eruptions), num=N) y = np.linspace(np.min(data.waiting), np.max(data.waiting), num=N) xx, yy = np.meshgrid(x, y, indexing="ij") #print x,y # Convert meshgrid into a ((N*N), 2) array of coordinates xxyy = np.array([xx.flatten(), yy.flatten()]).T # Compute model probabilities p = mixem.probability(xxyy, weights, distributions).reshape((N, N)) #print p fig2 = figure(title="Fitted Old Faithful Data",