dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0) inputs = dataset_total[len(dataset_total)-len(dataset_test) - 60 : ].values inputs = inputs.reshape(-1,1) inputs = sc.transform(inputs) X_test = [] for i in range(60,80): X_test.append(inputs[i-60:i, 0]) X_test = np.array(X_test) X_test = np.reshape(X_test,(X_test.shape[0], X_test.shape[1], 1)) predicted_stock_price = regressor.predict(X_test) # inversing the scaling normalization predicted_stock_price = sc.inverse_transform(predicted_stock_price) # Plotting the graphs plt.plot(real_stock_price, color = 'red', label = 'Real Google Stock Price') plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Google Stock Price') plt.title('Google Stock Price Prediction') plt.xlable('Time') plt.ylabe('Google Stock Price') plt.legend() plt.show()
# Using the Elbow Method to find the optimal number of clusters from sklearn.cluster import KMeans wcss = [] for i in range(1, 11): kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) kmeans.fit(x) wcss.append(kmeans.inertia_) plt.plot(range(1, 11), wcss) plt.title('The Elbow Method') plt.xlabel('Number of Clusters') plt.ylabe('WCSS') plt.show() # Applying K-Means to the dataset kmeans = KMeans(n_clusters=5, init='k-means++', max_iter=300, n_init=10, random_state=0) y_kmeans = kmeans.fit_predict(x) # Visualising the Clusters plt.scatter(x[y_kmeans == 0, 0], x[y_kmeans == 0, 1], s=50, c='red',
#统计一下在qlist 共出现了多少个单词?总共出现了多少个不同的单词 #这里需要做简单的分词,英文用空格 qlist, alist = read_corpus(qa_corpus_path) q_dict = get_dict(qlist) word_total_q = sum(q_dict.values()) n_distinctive_words_q = len(q_dict) print('There are {} words and {} distinctive tokens in question texts'.format( word_total_q, n_distinctive_words_q)) print(word_total_q) #todo :统计一下qlist中每个单词出现频率,并把这些频率排一下序 #使用matplotlib里的plot函数,y是词频 plt.bar(np.arange(10000), list(q_dict.value())[100:10100]) plt.ylabe('Frequency') plt.xlabe('Word Order') plt.title('Word Frequencies of the Question Corpus') plt.show() a_dict = get_dict(alist) print('The 10 frequentist words in question list (qlist) are :\n{}'.format( '|'.join(get_topk(10, q_dict)))) class TextNormalizer: def __init__(self, stopwords, filter_vocab, re_cleaners): self.lemmatizer = WordNetLemmatizer() self.filter_vocab = filter_vocab self.stopwords = stopwords self.re_cleaners = re_cleaners
voltage_list = [] f = open('teraterm.txt', 'r') for line in f: data_string = line data_string = '0x'+ data_string print '\nData received: %s' % data_string, data = int(data_string, 0) code = data >> 8 last_byte = data & 0xFF voltage = code_to_voltage(code, 5) print 'Voltage calculated: %f V' % voltage voltage_list.append(voltage) if (last_byte == 0x85): DF = 256 elif (last_byte == 0xA5): DF = 1024 elif (last_byte == 0xC5): DF = 4096 elif (last_byte == 0xE5): DF = 16384 else: DF = 0 print 'DF : %d' % DF plt.plot(voltage_list) plt.axis([0, 500, -5, 5]) plt.xlabel('Samples') plt.ylabe('Voltage (V)')
plt.show() #visulaisng the test set results from matplotlib.colors import ListedColormap X_set, Y_set = X_test, Y_test X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green', 'blue'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(Y_set)): plt.scatter(X_set[Y_set == j, 0], X_set[Y_set == j, 1], c=ListedColormap(('red', 'green', 'blue'))(i), label=j) plt.xlabe("LD1") plt.ylabe("LD2") plt.tiltle("KNN(on training set)") plt.legend() plt.show()
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values #all the inputs of january 2017 inputs = inputs.reshape(-1, 1) # stock minus january inputs = sc.transform(inputs) X_test = [] for i in range(60, 80): # test has 20 days X_test.append(inputs[i - 60:i, 0]) #zero colomn and 60 rows for each day X_test = np.array(X_test) X_test = np.reshape( X_test, (X_test.shape[0], X_test.shape[1], 1) ) #batch size total days, timsteps 60, inputsize #new indicator price of another stock that is dependent predicted_stock_price = regressor.predict(X_test) #go back to non scaling the data predicted_stock_price = sc.inverse_transform(predicted_stock_price) #using the matplot to plot the data plt.plot(real_stock_price, color='red', label='Real Google Stock Price') # data and label to it plt.plot(predicted_stock_price, color='red', label='Real Google Stock Price') #data and label to it plt.title("google stock price prediction") # title plt.xlabe('Time') plt.ylabe('google stock price') plt.legend() # to includ the legend in the char with no input plt.show() ## we can increased its accurary by changing the scoring method to accuracy or neg_mean_squared_error
df = pd.read_stata ('C:\\Users\\Inspiron\\Desktop\\fp.dta') ## examining the size of the database df.shape ## examining the first lines of the database df.head () ## using a histogram for visual analysis of the distribution of billionaries per country numbil0_2008 = df [(df ['year'] == 2008) & ( df ['country'] != 'United States')].loc [:, 'numbil0'] plt.subplots (figsize = (12, 8)) plt.hist (numbil0_2008, bins = 30) plt.xlim (xmin = 0) plt.grid () plt.xlabel ('Number of billionaries in 2008') plt.ylabe ('Count') plt.savefig ('histogram.png') plt.show ()
dataset = pd.read_csv('Ads_CTR_Optimisation.csv') import random N = 10000 d = 10 ads_selected = [] numbers_of_rewards_1 = [0]*d numbers_of_rewards_0 = [0]*d total_reward = 0 for n in range(0,N): ad = 0 max_random = 0 for i in range(0,d): random_beta = random.betavariate(numbers_of_rewards_1[i] + 1,numbers_of_rewards_0[i] +1) if random_beta > max_random: max_random = random_beta ad = i ads_selected.append(ad) reward = dataset.values[n,ad] if reward == 1: numbers_of_rewards_1[ad] = numbers_of_rewards_1[ad] + 1 else: numbers_of_rewards_0[ad] = numbers_of_rewards_0[ad] + 1 total_reward = total_reward + reward plt.hist(ads_selected) plt.title('Histogram of ads selections') plt.xlabel('Ads') plt.ylabe('Number of times each ad was selected') plt.show()