def GenerateOutcomes(x, z, num_cont, num_bin): """ Following the generating procedure defined by Madras in Algorithm 2 """ # As defined by Madras num_z = z.shape[1] w = -11 beta_a = 6 # Algorithm 2 # horizontal concatenation xz = np.concatenate((x, z), 1) W = np.ones(xz.shape[1])*.5 # lists to store generated values y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1 = list(), list(), list(), list() mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1 = list(), list(), list(), list() # loop over observations because all need individual beta sample for obs in xz: # sample new beta beta_cont = choice([0, .1, .2, .3, .4], num_cont, p=[.5, .125, .125, .125, .125]) beta_bin = choice([0, .1, .2, .3, .4], num_bin, p=[.6, .1, .1, .1, .1]) beta_z = choice([.4, .6], num_z, p=[.5, .5]) # in x, continuous variables come first beta = np.concatenate((beta_cont, beta_bin, beta_z), 0) # calculate y dist mu1 = np.matmul(np.exp(obs + W), beta) mu_t0_a0.append(mu1) mu2 = np.matmul(obs, beta)-w mu_t1_a0.append(mu2) mu3 = np.matmul(np.exp(obs + W), beta) + beta_a mu_t0_a1.append(mu3) mu4 = np.matmul(obs, beta) - w + beta_a mu_t1_a1.append(mu4) # sample new y y_t0_a0.append(np.random.normal(mu1, 1, 1)[0]) y_t1_a0.append(np.random.normal(mu2, 1, 1)[0]) y_t0_a1.append(np.random.normal(mu3, 1, 1)[0]) y_t1_a1.append(np.random.normal(mu4, 1, 1)[0]) plt_entries = {'y_t0_a0': y_t0_a0, 'y_t1_a0': y_t1_a0, 'y_t0_a1': y_t0_a1, 'y_t1_a1': y_t1_a1} plt.figure() plt.title('Generated data') for label, entry in plt_entries.items(): plt.hist(entry, label=label, alpha=0.5, bins=20) plt.legend() plt.show() y_all = np.transpose(np.vstack((y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1))) mu_all = np.transpose(np.vstack((mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1))) # column names should be consistent with above vstack y_column = 'y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1' mu_column = 'mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1' return y_all, mu_all, y_column, mu_column
def draw_hist(heights): #创建直方图 #第一个参数为待绘制的定量数据,不同于定性数据,这里并没有事先进行频数统计 #第二个参数为划分的区间个数 plt.hist(heights, 100) plt.xlabel('Heights') plt.ylabel('Frequency') plt.title('Heights Of Male Students') plt.show()
def draw_cumulative_hist(heights): #创建累积曲线 #第一个参数为待绘制的定量数据 #第二个参数为划分的区间个数 #normed参数为是否无量纲化 #histtype参数为'step',绘制阶梯状的曲线 #cumulative参数为是否累积 plt.hist(heights, 20, normed=True, histtype='step', cumulative=True) plt.xlabel('Heights') plt.ylabel('Frequency') plt.title('Heights Of Male Students') plt.show()
''' import json from textblob import TextBlob from wordcloud import WordCloud import matplotlib.pylot as plt # Get the JSON data tweetFile = open("tweets.json", "r") tweetData = json.load(tweetFile) tweetFile.close() polarity_values = [] for tweet in tweetData: tweets.append(tweet["text"]) giant_string = " ".join(tweets) tb = TextBlob(tweet_text) print("{}: {}".format(tweet_text, tb.polarity)) polarity_values.append(tb.polarity) # bins = [-1, -0.5, 0, 0.5, 1] plt.hist(polarity_values, bins) plt.title("tweet polarity") plt.ylabel("Count of tweets") plt.xlabel("Polarity") plt.show()
X_add, y_add = mydat['features'], mydat['labels'] with open('./mydata/train.p', mode='rb') as f: mytrain = pickle.load(f) X_mytrain, y_mytrain = mytrain['features'], mytrain['labels'] with open('./mydata/test.p', mode='rb') as f: mytest = pickle.load(f) X_mytest, y_mytest = mytest['features'], mytest['labels'] X_train = np.append(X_train_, X_mytrain, axis = 0) y_train = np.append(y_train_, y_mytrain) X_test = np.append(X_test_, X_mytest, axis = 0) y_test = np.append(y_test_, y_mytest) plt.hist(y_train, bins=50, color='#FF69B4') #============================================================================== # Failed attempt to train the model by incrementally increase the number of # near-zero training examples. #============================================================================== #def limit(X, y, s = 700): # bad = [k for k,v in enumerate(y) if v in [0, -.25, .25]] # good = list(set(range(0, len(y)))-set(bad)) # new = good + [bad[i] for i in np.random.randint(0,len(bad),s)] # X,y = X[new,], y[new] # return X, y #