def compressed_test(): train_data, train_labels = utils.read_from_csv( one_hot=True, filename='./data/fashion-mnist_train.csv', header=1) test_data, test_labels = utils.read_from_csv(one_hot=True, filename='./data/10.csv', header=0) run_model(train_data, train_labels, test_data, test_labels)
def split_train_test(): data, labels = utils.read_from_csv( one_hot=True, filename='./data/fashion-mnist_train.csv', header=1) train_data = data[:-train_test_split] train_labels = labels[:-train_test_split] test_data = data[-train_test_split:] test_labels = labels[-train_test_split:] run_model(train_data, train_labels, test_data, test_labels)
def run(ques): """ Scraping data """ scrape() df = pd.read_csv("data_set.csv") pred_tag = Linear_SVC(df, ques) retval = cosine.tf_idf(read_from_csv(filter_this=pred_tag), check_with=ques) with open('f_data.pickle', 'rb') as handle: b = pickle.load(handle) for i in retval: webbrowser.open_new_tab(b[i]['url'])
def compress(): images, labels = utils.read_from_csv(False, './data/fashion-mnist_train.csv') for k in k_vals: o = open(str(k) + '.csv', 'w') compressed = np.zeros(images.shape) for i in range(0, images.shape[0]): if i % 1000 == 0: print i break img = np.reshape(images[i], (28, 28)) lbl = labels[i] u, d, v = np.linalg.svd(img, full_matrices=True, compute_uv=True) d = np.diag(d) uk = u[:, :k] # first k columns of U (m x m becomes m x k) dk = d[:k, : k] # first k rows and columns of d (m x n becomes k x k) vk = v[:k, ] # first k rows of Vt (n x n becomes k x n) k_approx = np.matmul(np.matmul(uk, dk), vk) k_approx_rescaled = (k_approx - np.min(k_approx)) / ( np.max(k_approx) - np.min(k_approx) ) # rescale values to fit between 0 and 1 shrunk = np.uint8(k_approx_rescaled * 255) #im = Image.fromarray(shrunk) #im.show() to_save = np.reshape(shrunk, (1, 784)) output = str(lbl) + ',' for entry in to_save: for elem in entry: output += str(elem) output += ',' output = output[:-1] output += '\n' o.write(output) o.close()
for epoch in range(0, 10): print('epoch', epoch) batched = utils.generate_batches(train_data, train_labels, batch_size=100) i = 0 for batch in batched: if i % 10 == 0: print('running batch', i, '...') sess.run(train_step, feed_dict={x: batch[0], y_: batch[1]}) i += 1 return sess, y, y_, x def test(sess, y, y_, test_data, test_labels, x): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(sess.run(accuracy, feed_dict={x: test_data, y_: test_labels})) data, labels = utils.read_from_csv(one_hot=True, filename='./data/fashion-mnist_train.csv') train_data = data[:-train_test_split] train_labels = labels[:-train_test_split] test_data = data[-train_test_split:] test_labels = labels[-train_test_split:] sess, y, y_, x = train(train_data, train_labels) test(sess, y, y_, test_data, test_labels, x)
}) # Check to make sure the datatype of the attribute is the same # as what we're adding. pass # Lookup page URI try: get_page_json = parse_json(api.page.get) page_uri = get_page_json(name=pagename)['objects'][0]['resource_uri'] except IndexError: # No page with that name, let's continue continue api.page_info.post({ 'page': page_uri, 'attribute': attribute, 'value': value }) if len(sys.argv) > 1: print "Parsing data from file " + sys.argv[1] extracted_data = read_from_csv(open(sys.argv[1], "r")) else: print "Usage: %s [data_file.csv]" % sys.argv[0] for pagename, data in extracted_data.iteritems(): import_on(pagename, data)
import numpy as np import csv from neuron import Neuron from utils import read_from_csv, normalize from som import Som n1, n2, n3, = read_from_csv('Iris.csv') norm_data = normalize(n1) som = Som(10, norm_data) som.fit(100, 1e-2) pass
def users_of_file(path): (head, users) = read_from_csv(path, func=lambda x: x.split(',')[0] ) return set(users)
def sort_train_user(): (head, data) = read_from_csv(TRAIN_USER) data_sorted = sort_raw_data(data) write_to_csv(TRAIN_USER_SORTED, head, data_sorted)
def run(ques): parse_and_store() print(cosine.tf_idf(read_from_csv(), check_with=ques))
from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC import utils import numpy as np log_reg = LogisticRegression() naive_bayes = GaussianNB() knn = KNeighborsClassifier() decision_tree = DecisionTreeClassifier() svm = SVC() print 'reading data...' features, labels = utils.read_from_csv(one_hot = False, filename = './data/fashion-mnist_train.csv') print 'fitting...' log_reg.fit(features[:-100], labels[:-100]) print(log_reg) log_reg_pred = log_reg.predict(features[-100:]) print 'Logistic Regression Results:' print '- - - - - - - - - - - - - - - - - - - - - - - - - - - -' print(metrics.classification_report(labels[-100:], log_reg_pred)) print(metrics.confusion_matrix(labels[-100:], log_reg_pred)) naive_bayes.fit(features[:-100], labels[:-100]) print(naive_bayes) nb_pred = naive_bayes.predict(features[-100:]) print 'Naive Bayes Results:'