dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total)-len(dataset_test) - 60 : ].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs) 

X_test = []
for i in range(60,80):
    X_test.append(inputs[i-60:i, 0])

X_test = np.array(X_test)
X_test = np.reshape(X_test,(X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = regressor.predict(X_test)

# inversing the scaling normalization
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

# Plotting the graphs
plt.plot(real_stock_price, color = 'red', label = 'Real Google Stock Price')
plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlable('Time')
plt.ylabe('Google Stock Price')
plt.legend()
plt.show()






# Using the Elbow Method to find the optimal number of clusters
from sklearn.cluster import KMeans
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i,
                    init='k-means++',
                    max_iter=300,
                    n_init=10,
                    random_state=0)
    kmeans.fit(x)
    wcss.append(kmeans.inertia_)
plt.plot(range(1, 11), wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabe('WCSS')
plt.show()

# Applying K-Means to the dataset
kmeans = KMeans(n_clusters=5,
                init='k-means++',
                max_iter=300,
                n_init=10,
                random_state=0)
y_kmeans = kmeans.fit_predict(x)

# Visualising the Clusters
plt.scatter(x[y_kmeans == 0, 0],
            x[y_kmeans == 0, 1],
            s=50,
            c='red',
示例#3
0
#统计一下在qlist 共出现了多少个单词?总共出现了多少个不同的单词
#这里需要做简单的分词,英文用空格
qlist, alist = read_corpus(qa_corpus_path)
q_dict = get_dict(qlist)
word_total_q = sum(q_dict.values())
n_distinctive_words_q = len(q_dict)
print('There are {} words and {} distinctive tokens in question texts'.format(
    word_total_q, n_distinctive_words_q))
print(word_total_q)

#todo :统计一下qlist中每个单词出现频率,并把这些频率排一下序
#使用matplotlib里的plot函数,y是词频

plt.bar(np.arange(10000), list(q_dict.value())[100:10100])
plt.ylabe('Frequency')
plt.xlabe('Word Order')
plt.title('Word Frequencies of the Question Corpus')
plt.show()

a_dict = get_dict(alist)
print('The 10 frequentist words in question list (qlist) are :\n{}'.format(
    '|'.join(get_topk(10, q_dict))))


class TextNormalizer:
    def __init__(self, stopwords, filter_vocab, re_cleaners):
        self.lemmatizer = WordNetLemmatizer()
        self.filter_vocab = filter_vocab
        self.stopwords = stopwords
        self.re_cleaners = re_cleaners
示例#4
0
voltage_list = []
f = open('teraterm.txt', 'r')
for line in f:
    data_string = line
    data_string = '0x'+ data_string
    print '\nData received: %s' % data_string,
    
    data = int(data_string, 0)
    code = data >> 8
    last_byte = data & 0xFF
    voltage = code_to_voltage(code, 5)
    print 'Voltage calculated: %f V' % voltage
    voltage_list.append(voltage)
    
    if (last_byte == 0x85): 
        DF = 256 
    elif (last_byte == 0xA5): 
        DF = 1024
    elif (last_byte == 0xC5): 
        DF = 4096
    elif (last_byte == 0xE5): 
        DF = 16384
    else:
        DF = 0
    print 'DF : %d' % DF

plt.plot(voltage_list)
plt.axis([0, 500, -5, 5])
plt.xlabel('Samples')
plt.ylabe('Voltage (V)')
示例#5
0
plt.show()

#visulaisng the test set results
from matplotlib.colors import ListedColormap
X_set, Y_set = X_test, Y_test
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
              stop=X_set[:, 0].max() + 1,
              step=0.01),
    np.arange(start=X_set[:, 1].min() - 1,
              stop=X_set[:, 1].max() + 1,
              step=0.01))
plt.contourf(X1,
             X2,
             classifier.predict(np.array([X1.ravel(),
                                          X2.ravel()]).T).reshape(X1.shape),
             alpha=0.75,
             cmap=ListedColormap(('red', 'green', 'blue')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(Y_set)):
    plt.scatter(X_set[Y_set == j, 0],
                X_set[Y_set == j, 1],
                c=ListedColormap(('red', 'green', 'blue'))(i),
                label=j)
plt.xlabe("LD1")
plt.ylabe("LD2")
plt.tiltle("KNN(on training set)")
plt.legend()
plt.show()
示例#6
0
inputs = dataset_total[len(dataset_total) - len(dataset_test) -
                       60:].values  #all the inputs of january 2017
inputs = inputs.reshape(-1, 1)  # stock minus january
inputs = sc.transform(inputs)

X_test = []
for i in range(60, 80):  # test has 20 days
    X_test.append(inputs[i - 60:i, 0])  #zero colomn and 60 rows for each day

X_test = np.array(X_test)
X_test = np.reshape(
    X_test, (X_test.shape[0], X_test.shape[1], 1)
)  #batch size total days,  timsteps 60, inputsize #new indicator price of another stock that is dependent

predicted_stock_price = regressor.predict(X_test)
#go back to non scaling the data
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

#using the matplot to plot the data
plt.plot(real_stock_price, color='red',
         label='Real Google Stock Price')  # data and label to it
plt.plot(predicted_stock_price, color='red',
         label='Real Google Stock Price')  #data and label to it
plt.title("google stock price prediction")  # title
plt.xlabe('Time')
plt.ylabe('google stock price')
plt.legend()  # to includ the legend in the char with no input
plt.show()

## we can increased its accurary by changing the scoring method  to accuracy or neg_mean_squared_error
df = pd.read_stata ('C:\\Users\\Inspiron\\Desktop\\fp.dta')

##  examining the size of the database

df.shape

##  examining the first lines of the database

df.head ()

## using a histogram for visual analysis of the distribution of billionaries per country

numbil0_2008 = df [(df ['year'] == 2008) & (
    df ['country'] != 'United States')].loc [:, 'numbil0']

plt.subplots (figsize = (12, 8))

plt.hist (numbil0_2008, bins = 30)

plt.xlim (xmin = 0)

plt.grid ()

plt.xlabel ('Number of billionaries in 2008')

plt.ylabe ('Count')

plt.savefig ('histogram.png')

plt.show ()
dataset = pd.read_csv('Ads_CTR_Optimisation.csv')

import random
N = 10000
d = 10
ads_selected = []
numbers_of_rewards_1 = [0]*d
numbers_of_rewards_0 = [0]*d
total_reward = 0
for n in range(0,N):
    ad = 0
    max_random = 0
    for i in range(0,d):
        random_beta = random.betavariate(numbers_of_rewards_1[i] + 1,numbers_of_rewards_0[i] +1)
        if random_beta > max_random:
            max_random = random_beta
            ad = i
    ads_selected.append(ad)
    reward = dataset.values[n,ad]
    if reward == 1:
        numbers_of_rewards_1[ad] = numbers_of_rewards_1[ad] + 1
    else:
        numbers_of_rewards_0[ad] = numbers_of_rewards_0[ad] + 1
    total_reward = total_reward + reward
    
plt.hist(ads_selected)
plt.title('Histogram of ads selections')
plt.xlabel('Ads')
plt.ylabe('Number of times each ad was selected')
plt.show()