def test_init_file(self): log = Diary(self.TXT_PATH, async=False) log.info(self.INFO) log.close() with open(self.TXT_PATH) as f: line = f.readline() self.assertTrue(self.INFO in line)
def test_init_does_not_exist(self): log = Diary(self.NO_EXIST_PATH, async=False) log.info(self.INFO) log.close() with open(self.NO_EXIST_PATH) as f: line = f.readline() self.assertTrue(self.INFO in line)
def test_warn_log_trace(self): log = Diary(self.WARNINGS_LOG_PATH, async=False) log.warn(self.INFO, log_trace=True) log.close() with open(log.log_file.name) as f: self.assertTrue( "logged(event, *args, **kwargs)" in f.read())
class TestDiary(TestCase): def setUp(self): self.EPSILON = 0.01 sub1 = Subject("Biologia", [[3.5, 2.5, 5.0], [1, 1, 1, 1, 1, 1, 1, 1, 0]]) sub2 = Subject("Matematyka", [[2.5, 3.0, 2.5], [1, 1, 0, 1, 0, 1, 0, 1, 1]]) self.subjects = [sub1, sub2] self.andrzej = Student("Andrzej Abacki", self.subjects) self.diary = Diary("AGH", 2016, "data.json") def test_init(self): self.assertEqual( self.diary.get_student("Andrzej Abacki").name, self.andrzej.name) self.assertEqual( len(self.diary.get_student("Andrzej Abacki").subjects), len(self.andrzej.subjects)) def test_franciszek_compute_total_average(self): self.assertAlmostEquals( self.diary.get_student( "Franciszek Fabacki").compute_total_average(), 3.83, 2) def test_get_not_existing_student(self): self.assertIsNone(self.diary.get_student("Klima Nima"))
def test_unicode_PY2_DB_error(self): if not _PY2: return unicode_str = u"\u3002" log = Diary(self.INIT_DIR, async=False, file_name="unicode_test.log", db_name="unicode.db") with self.assertRaises(ValueError, msg="diary does not support logging unicode strings into a database in python2"): log.log(unicode_str)
def setUp(self): self.EPSILON = 0.01 sub1 = Subject("Biologia", [[3.5, 2.5, 5.0], [1, 1, 1, 1, 1, 1, 1, 1, 0]]) sub2 = Subject("Matematyka", [[2.5, 3.0, 2.5], [1, 1, 0, 1, 0, 1, 0, 1, 1]]) self.subjects = [sub1, sub2] self.andrzej = Student("Andrzej Abacki", self.subjects) self.diary = Diary("AGH", 2016, "data.json")
def test_log_event_instance(self): mock_level = "CRITICAL" log = Diary(self.INIT_DIR, db_name="events.db", async=False) e = Event(self.INFO, level=mock_level) log.log(e) self.assertEquals(e.level, mock_level) log.close() with DiaryDB(log.db_file.name) as db: db.assert_event_logged(self.INFO, level=mock_level)
def test_custom_format_init(self): logger = Diary(self.API_DIR, log_format=emergency_format, file_name="EmergencyLogs2.log", db_name="EmergencyDB2.db") logger.log(self.INFO) logger.close() with open(logger.log_file.name) as f: self.assertEquals(f.readline(), emergency_format(logger.last_logged_event) + '\n') with DiaryDB(logger.db_file.name) as db: db.assert_event_logged(self.INFO)
def test_queue_join(self): trials = 10 log = Diary(self.INIT_DIR, async=True, db_name="QUEUE_TEST.db") for i in range(trials): log.log(i) log.close() self.assertFalse(log.thread.is_alive()) with DiaryDB(log.db_file.name) as db: entries = db.cursor.execute("SELECT * FROM logs") self.assertEquals(len(entries.fetchall()), trials)
def test_log_event_formatted(self): log = Diary(self.INIT_DIR, file_name="formatted.log", async=False) e = Event(self.INFO, "LEVEL") e.set_formatter("{info}|{level}") log.log(e) log.close() with open(log.log_file.name) as f: self.assertEquals("{info}|{level}\n".format(info=self.INFO, level="LEVEL"), f.readline()) e.set_formatter(None) # Set Event formatter back to None to not upset other tests
def test_diary_print(self): log = Diary(self.INIT_DIR, file_name="printing.log", also_print=True) info_to_log = "hello there world!!!" log.info(info_to_log) log.close() # Nothing bad happened and stdout is hard to capture reliably with open(log.log_file.name) as f: self.assertTrue(info_to_log in f.readline())
def test_custom_format_event(self): class FormattedEvent(Event): formatter = "|{dt}|{info}|{level_str}|" logger = Diary(self.API_DIR, file_name="formatted.txt", db_name="formattedDB.db", event=FormattedEvent, async=False) logger.log(self.INFO) logger.close() with open(logger.log_file.name) as f: self.assertEquals(f.readline(), logger.last_logged_event.formatted() + '\n') with DiaryDB(logger.db_file.name) as db: db.assert_event_logged(self.INFO, "INFO")
def test_log_event_in_init(self): class PrettyEvent(Event): formatter = "{info}|{level_str}" log = Diary(self.INIT_DIR, file_name="pretty.log", db_name="prettyevents.db", async=False, event=PrettyEvent) log.log(self.INFO) log.close() with DiaryDB(log.db_file.name) as db: db.assert_event_logged(self.INFO) with open(log.log_file.name) as f: self.assertEquals("{info}|{level}\n".format(info=self.INFO, level="INFO"), f.readline())
def test_unicode_PY2(self): if not _PY2: return unicode_str = u"\u3002" log = Diary(os.path.join(self.INIT_DIR, "unicode_test.log"), async=False, encoding="utf-8") log.log(unicode_str) log.close() with codecs.open(log.log_file.name, encoding=log.encoding) as f: line = f.readline() self.assertTrue(unicode_str in line)
def test_unicode_PY3(self): if _PY2: return unicode_str = u"\u3002" log = Diary(self.INIT_DIR, file_name="unicode_test.log", async=False) log.log(unicode_str) log.close() with codecs.open(log.log_file.name, encoding=log.encoding) as f: line = f.readline() self.assertTrue(unicode_str in line)
def test_unicode_event_formatted(self): class PrettyEvent(Event): formatter = "{info}|{level_str}" unicode_str = u"\u3002" log = Diary(os.path.join(self.INIT_DIR, "unicode_test.log"), async=False, encoding="utf-8", event=PrettyEvent) log.log(unicode_str) log.close() with codecs.open(log.log_file.name, encoding=log.encoding) as f: line = f.readline() self.assertTrue(unicode_str in line)
def do_it(self, ev): login = self.login_edit.get() password = self.password_edit.get() diary_id = self.diary_id_edit.get() filename = self.filename_edit.get() split_type = self.split_type.get() if not login: messagebox.showinfo("Error", "Логин не задан") return if not diary_id: messagebox.showinfo("Error", "Адрес сообщества не задан") return if not filename: messagebox.showinfo("Error", "Путь к файлу не задан") return api = Diary() try: api.login(login, password) text_with_header = util.load(filename) prefix = os.path.splitext(filename)[0] text_with_header = util.fix_characters(text_with_header) header, text = find_header(text_with_header) if split_type == 1: post, comments = split_text_with_comments(header, text) util.store(prefix + "_post.txt", post) for i, comment in enumerate(comments): util.store(prefix + "_comment_%d.txt" % (i+1), comment) # Send to diary post_id = api.new_post(post, diary_id) for comment in comments: api.add_comment(post_id, comment) if len(comments) > 0: messagebox.showinfo("Info", "Пост успешно опубликован, тексты комментариев ищите в файлах *comment_N.txt") else: messagebox.showinfo("Info", "Пост успешно опубликован") else: posts = split_text_with_posts(header, text) for i, post in enumerate(posts): util.store(prefix + "_post_%d.txt" % (i + 1), post) # Send to diary for post in posts: api.new_post(post, diary_id) messagebox.showinfo("Info", "Посты успешно опубликованы. Тексты продублированы в файлы *post_N.txt") except Exception as e: messagebox.showinfo("Error", str(e)) return
def test_log(self): FILE_NAME = "test_log.txt" log = Diary(self.INIT_DIR, async=False, file_name=FILE_NAME) self.assertTrue(exists_with_ext(os.path.join( self.INIT_DIR, FILE_NAME ), '.txt') ) log.log(self.INFO) log.logdb.assert_event_logged(self.INFO, level="INFO", limit=1) log.close() self.assertEquals(os.path.split(log.log_file.name)[-1], FILE_NAME) with open(os.path.join(self.INIT_DIR, FILE_NAME)) as f: self.assertTrue(self.INFO in f.readline())
def __init__(self): Gtk.Window.__init__(self, title='Diary') self.set_default_size(800, 600) self.connect('destroy', Gtk.main_quit) main_box = Gtk.Box() size_group = Gtk.SizeGroup(Gtk.SizeGroupMode.HORIZONTAL) # initializing all relevant classes diary = Diary("Test User") textview = Textview() header = Header() searchbar = Searchbar() sidebox = Sidebox() # transfer classes to linker linker = Linker(diary, header, sidebox, searchbar, textview) # transfer linker to header and sidebox header.set_connection_linker(linker) sidebox.set_connection_linker(linker) sidebox.update_year() searchbar.set_connection_linker(linker) searchbar.set_revealer_signal() # connect size_group to header and sidebox header.set_size_group(size_group) sidebox.set_size_group(size_group) sidebox.set_revealer_signal() # setup relevant buttons in header header.set_backbutton() header.set_forwardbutton() header.set_searchbutton() header.set_addbutton() header.set_editbutton() self.set_titlebar(header) # create new side_box to add searchbar and sidebox # and add it to beginning of mainbox side_box = Gtk.VBox() side_box.pack_start(searchbar, False, False, 0) side_box.pack_start(sidebox, True, True, 0) side_box.set_hexpand(False) main_box.pack_start(side_box, False, False, 0) # add separator between side_box and textview separator = Gtk.HSeparator() separator.set_size_request(1, 0) main_box.pack_start(separator, False, False, 0) # add textview to end of mainbox main_box.pack_start(textview, False, True, 0) self.add(main_box)
def _retrieve_from_diary(year, number): """Retrieve the data from the data indicated. """ url_base = compose_url(URL_BASE, year, number) dia = Diary(year, number, url_base) print "Retrieving contents from: %s" % url_base for section in sorted(SECTIONS.keys()): url = compose_url(url_base, section) dia.add_section(SECTIONS[section]) WScrap.scrap_page(url, dia) return dia
def test_custom_everything(self): logger = Diary(self.API_DIR, file_name="withlevel.txt", db_name="level_user_events.db", db=UserActivityDB, event=UserEvent) event_to_log = UserEvent(self.INFO, user_name="super") logger.log(event_to_log, level=critical) logger.close() with open(logger.log_file.name) as f: self.assertTrue(event_to_log.formatted() + '\n', f.readline()) with UserActivityDB(logger.db_file.name) as db: entries = db.cursor.execute("""SELECT * FROM user_activity WHERE log=(?) AND level LIKE (?) AND user=(?)""", (event_to_log.info, event_to_log.level_str, event_to_log.user_name)) entry = entries.fetchone() self.assertEquals(entry[0], event_to_log.dt) self.assertEquals(entry[1], event_to_log.level_str) self.assertEquals(entry[2], event_to_log.info) self.assertEquals(entry[3], event_to_log.user_name)
def test_write(self): FILE_NAME = "test_write.txt" log = Diary(self.INIT_DIR, async=False, file_name=FILE_NAME) simple_event = Event(self.INFO, "LEVEL") self.assertTrue(exists_with_ext(os.path.join( self.INIT_DIR, FILE_NAME ), '.txt') ) log._write(simple_event) log.logdb.assert_event_logged(self.INFO, level="LEVEL") log.close() self.assertEquals(os.path.split(log.log_file.name)[-1], FILE_NAME) self.assertIs(log.last_logged_event, simple_event) with open(os.path.join(self.INIT_DIR, FILE_NAME)) as f: self.assertTrue(self.INFO in f.readline())
def initialize_diary(): diary = Diary(name='digits_vs_letters', path='results', overwrite=False, fig_format='svg') diary.add_notebook('training', verbose=True) diary.add_notebook('validation', verbose=True) return diary
def test_custom_event(self): logger = Diary(self.API_DIR, file_name="UserEvents.txt", event=UserEvent) logger.log("Start logging") logger.info(UserEvent(self.INFO, user_name="admin")) # Directly log events logger.close() with open(logger.log_file.name) as f: contents = f.read() self.assertTrue("Start logging" in contents) self.assertTrue(logger.last_logged_event.formatted() in contents) with DiaryDB(logger.db_file.name) as db: db.assert_event_logged(self.INFO, "INFO")
def initialize_diary(): diary = Diary(STUDENTS, CLASSES, DATES) for i in xrange(NUMBER_OF_SCORES): diary.add_score(random.choice(CLASSES), random.choice(STUDENTS), random.choice(SCORES)) for clazz in CLASSES: for date in DATES: for student in STUDENTS: if random.choice([True, False]): diary.add_attendance(clazz, student, date) return diary
def test_custom_db_formatted_event(self): logger = Diary(self.API_DIR, file_name="withdb.txt", db_name="user_events.db", db=UserActivityDB, event=UserEvent) logger.log("Starting app") event_to_log = UserEvent("Super user logged in", user_name="super") logger.debug(event_to_log) logger.close() with open(logger.log_file.name) as f: contents = f.read() self.assertTrue("Starting app" in contents) self.assertTrue(logger.last_logged_event.formatted() in contents) with UserActivityDB(logger.db_file.name) as db: entries = db.cursor.execute("""SELECT * FROM user_activity WHERE log=(?) AND level LIKE (?) AND user=(?)""", (event_to_log.info, event_to_log.level_str, event_to_log.user_name)) entry = entries.fetchone() self.assertEquals(entry[0], event_to_log.dt) self.assertEquals(entry[1], event_to_log.level_str) self.assertEquals(entry[2], event_to_log.info) self.assertEquals(entry[3], event_to_log.user_name)
def test_levels_setting_levels(self): log = Diary(self.INIT_DIR, db_name="levels.db", async=False) e = Event(self.INFO, level="") log.info(e) self.assertIs(e.level, levels.info) log.warn(e) self.assertIs(e.level, levels.warn) log.error(e) self.assertIs(e.level, levels.error) log.debug(e) self.assertIs(e.level, levels.debug) log.close() with DiaryDB(log.db_file.name) as db: db.assert_event_logged(self.INFO, level="INFO", limit=4) db.assert_event_logged(self.INFO, level="WARN", limit=4) db.assert_event_logged(self.INFO, level="ERROR", limit=4) db.assert_event_logged(self.INFO, level="DEBUG", limit=4)
def main(): dataset_names = ['diabetes', 'ecoli', 'glass', 'heart-statlog', 'ionosphere', 'iris', 'letter', 'mfeat-karhunen', 'mfeat-morphological', 'mfeat-zernike', 'optdigits', 'pendigits', 'sonar', 'vehicle', 'waveform-5000'] data = Data(dataset_names=dataset_names) diary = Diary(name='hempstalk', path='results', overwrite=False, fig_format='svg') diary.add_notebook('cross_validation') # Columns for the DataFrame columns=['Dataset', 'MC iteration', 'N-fold id', 'Actual class', 'Model', 'AUC', 'Prior'] # Create a DataFrame to record all intermediate results df = pd.DataFrame(columns=columns) mc_iterations = 10 n_folds = 10 gammas = {"diabetes":0.00005, "ecoli":0.1, "glass":0.005, "heart-statlog":0.0001, "ionosphere":0.00005, "iris":0.0005, "letter":0.000005, "mfeat-karhunen":0.0001, "mfeat-morphological":0.0000001, "mfeat-zernike":0.000001, "optdigits":0.00005, "pendigits":0.000001, "sonar":0.001, "vehicle":0.00005, "waveform-5000":0.001} for i, (name, dataset) in enumerate(data.datasets.iteritems()): print('Dataset number {}'.format(i)) data.sumarize_datasets(name) for mc in np.arange(mc_iterations): skf = StratifiedKFold(dataset.target, n_folds=n_folds, shuffle=True) test_folds = skf.test_folds for test_fold in np.arange(n_folds): x_train, y_train, x_test, y_test = separate_sets( dataset.data, dataset.target, test_fold, test_folds) n_training = np.alen(y_train) w_auc_fold_dens = 0 w_auc_fold_bag = 0 w_auc_fold_com = 0 prior_sum = 0 for actual_class in dataset.classes: tr_class = x_train[y_train == actual_class, :] tr_class_unique_values = [np.unique(tr_class[:,column]).shape[0] for column in range(tr_class.shape[1])] cols_keep = np.where(np.not_equal(tr_class_unique_values,1))[0] tr_class = tr_class[:,cols_keep] x_test_cleaned = x_test[:,cols_keep] t_labels = (y_test == actual_class).astype(int) prior = np.alen(tr_class) / n_training if np.alen(tr_class) > 1 and not all(t_labels == 0): prior_sum += prior n_c = tr_class.shape[1] if n_c > np.alen(tr_class): n_c = np.alen(tr_class) # Train a Density estimator model_gmm = GMM(n_components=1, covariance_type='diag') model_gmm.fit(tr_class) sv = OneClassSVM(nu=0.1, gamma=0.5) bc = BackgroundCheck(estimator=sv) bc.fit(tr_class) svm_scores = bc.predict_proba(x_test_cleaned)[:, 1] # Generate artificial data new_data = model_gmm.sample(np.alen(tr_class)) # Train a Bag of Trees bag = BaggingClassifier( base_estimator=DecisionTreeClassifier(), n_estimators=10) new_data = np.vstack((tr_class, new_data)) y = np.zeros(np.alen(new_data)) y[:np.alen(tr_class)] = 1 bag.fit(new_data, y) # Combine the results probs = bag.predict_proba(x_test_cleaned)[:, 1] scores = model_gmm.score(x_test_cleaned) com_scores = (probs / np.clip(1.0 - probs, np.float32(1e-32), 1.0)) * (scores-scores.min()) # Generate our new data # FIXME solve problem with #samples < #features pca=True if tr_class.shape[0] < tr_class.shape[1]: pca=False our_new_data = reject.create_reject_data( tr_class, proportion=1, method='uniform_hsphere', pca=pca, pca_variance=0.99, pca_components=0, hshape_cov=0, hshape_prop_in=0.99, hshape_multiplier=1.5) our_new_data = np.vstack((tr_class, our_new_data)) y = np.zeros(np.alen(our_new_data)) y[:np.alen(tr_class)] = 1 # Train Our Bag of Trees our_bag = BaggingClassifier( base_estimator=DecisionTreeClassifier(), n_estimators=10) our_bag.fit(our_new_data, y) # Combine the results our_probs = our_bag.predict_proba(x_test_cleaned)[:, 1] our_comb_scores = (our_probs / np.clip(1.0 - our_probs, np.float32(1e-32), 1.0)) * (scores-scores.min()) # Scores for the Density estimator auc_dens = roc_auc_score(t_labels, scores) # Scores for the Bag of trees auc_bag = roc_auc_score(t_labels, probs) # Scores for the Combined model auc_com = roc_auc_score(t_labels, com_scores) # Scores for our Bag of trees (trained on our data) auc_our_bag = roc_auc_score(t_labels, our_probs) # Scores for our Bag of trees (trained on our data) auc_our_comb = roc_auc_score(t_labels, our_comb_scores) # Scores for the Background Check with SVm auc_svm = roc_auc_score(t_labels, svm_scores) # Create a new DataFrame to append to the original one dfaux = pd.DataFrame([[name, mc, test_fold, actual_class, 'Combined', auc_com, prior], [name, mc, test_fold, actual_class, 'P(T$|$X)', auc_bag, prior], [name, mc, test_fold, actual_class, 'P(X$|$A)', auc_dens, prior], [name, mc, test_fold, actual_class, 'Our Bagg', auc_our_bag, prior], [name, mc, test_fold, actual_class, 'Our Combined', auc_our_comb, prior], [name, mc, test_fold, actual_class, 'SVM_BC', auc_svm, prior]], columns=columns) df = df.append(dfaux, ignore_index=True) # generate_and_save_plots(t_labels, scores, diary, name, mc, test_fold, # actual_class, 'P(X$|$A)') # generate_and_save_plots(t_labels, probs, diary, name, mc, test_fold, # actual_class, 'P(T$|$X)') # generate_and_save_plots(t_labels, com_scores, diary, name, mc, test_fold, # actual_class, 'Combined') # generate_and_save_plots(t_labels, our_probs, diary, name, mc, test_fold, # actual_class, 'Our_Bagg') # generate_and_save_plots(t_labels, our_comb_scores, diary, name, mc, test_fold, # actual_class, 'Our_Combined') # generate_and_save_plots(t_labels, svm_scores, diary, # name, mc, test_fold, # actual_class, 'SVM_BC') # Convert values to numeric df = df.convert_objects(convert_numeric=True) # Group everything except classes dfgroup_classes = df.groupby(by=['Dataset', 'MC iteration', 'N-fold id', 'Model']) # Compute the Prior sum for each dataset, iteration and fold df['Prior_sum'] = dfgroup_classes['Prior'].transform(np.sum) # Compute the individual weighted AUC per each class and experiment df['wAUC'] = df.Prior * df.AUC / df.Prior_sum # Sum the weighted AUC of each class per each experiment series_wAUC = dfgroup_classes['wAUC'].sum() # Transform the series to a DataFrame df_wAUC = series_wAUC.reset_index(inplace=False) # Compute mean and standard deviation of wAUC per Dataset and model final_results = df_wAUC.groupby(['Dataset', 'Model'])['wAUC'].agg([np.mean, np.std]) # Transform the series to a DataFrame final_results.reset_index(inplace=True) # Represent the results in a table format final_table = final_results.pivot_table(values=['mean', 'std'], index=['Dataset'], columns=['Model']) # Export the results in a csv and LaTeX file export_results(final_table)
y = np.hstack((np.ones(np.alen(x)), np.zeros(np.alen(r)))).T model_rej.fit(xr, y) return model_rej def train_classifier_model(x, y): model_clas = svm.SVC(probability=True) #model_clas = tree.DecisionTreeClassifier(max_depth=3) model_clas = model_clas.fit(x, y) return model_clas if __name__ == "__main__": diary = Diary(name='test_rgrpg', path='results', overwrite=False, fig_format='svg') diary.add_notebook('training') diary.add_notebook('validation') # for i in [6]: #range(1,4): n_iterations = 1 n_thresholds = 100 accuracies = np.empty((n_iterations, n_thresholds)) recalls = np.empty((n_iterations, n_thresholds)) for example in [2, 3, 4, 5, 6, 7, 8, 9]: np.random.seed(42) print('Runing example = {}'.format(example)) for iteration in range(n_iterations): #####################################################
def sgd_optimization_gauss(learning_rate=0.13, n_epochs=1000, batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer """ #datasets = load_data(dataset) diary = Diary(name='experiment', path='results') diary.add_notebook('training') diary.add_notebook('validation') diary.add_notebook('data') samples=[4000,10000] diary.add_entry('data', ['samples', samples]) diary.add_entry('data', ['num_classes', len(samples)]) diary.add_entry('data', ['batch_size', batch_size]) #means=[[0,0],[5,5]] #cov=[[[1,0],[0,1]],[[3,0],[0,3]]] #diary.add_entry('data', ['means', means]) #diary.add_entry('data', ['covariance', cov]) #datasets = generate_gaussian_data(means, cov, samples) datasets = generate_opposite_cs_data(samples) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] diary.add_entry('data', ['train_size', len(train_set_y.eval())]) diary.add_entry('data', ['valid_size', len(valid_set_y.eval())]) diary.add_entry('data', ['test_size', len(test_set_y.eval())]) pt = PresentationTier() pt.plot_samples(train_set_x.eval(), train_set_y.eval()) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size delta = 20 x_min = numpy.min(train_set_x.eval(),axis=0) x_max = numpy.max(train_set_x.eval(),axis=0) x1_lin = numpy.linspace(x_min[0], x_max[0], delta) x2_lin = numpy.linspace(x_min[1], x_max[1], delta) MX1, MX2 = numpy.meshgrid(x1_lin, x2_lin) x_grid = numpy.asarray([MX1.flatten(),MX2.flatten()]).T grid_set_x = theano.shared(numpy.asarray(x_grid, dtype=theano.config.floatX), borrow=True) n_grid_batches = grid_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 n_in = train_set_x.eval().shape[-1] n_out = max(train_set_y.eval()) + 1 classifier = LogisticRegression(input=x, n_in=n_in, n_out=n_out) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # Scores grid_scores_model = theano.function(inputs=[], outputs=classifier.scores(), givens={ x: grid_set_x}) training_scores_model = theano.function( inputs=[index], outputs=classifier.scores(), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], } ) validation_scores_model = theano.function( inputs=[index], outputs=classifier.scores(), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], } ) # compute the gradient of cost with respect to theta = (W,b) g_w = T.grad(cost=cost, wrt=classifier.w) g_b = T.grad(cost=cost, wrt=classifier.b) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.w, classifier.w - learning_rate * g_w), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) # Accuracy validation_accuracy_model = theano.function( inputs=[index], outputs=classifier.accuracy(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) training_accuracy_model = theano.function( inputs=[index], outputs=classifier.accuracy(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) # Loss training_error_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) validation_error_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch print('Creating error and accuracy vectors') error_train = numpy.zeros(n_epochs+1) error_val = numpy.zeros(n_epochs+1) accuracy_train = numpy.zeros(n_epochs+1) accuracy_val = numpy.zeros(n_epochs+1) # Results for Isotonic Regression error_train_ir = numpy.zeros(n_epochs+1) error_val_ir = numpy.zeros(n_epochs+1) accuracy_train_ir = numpy.zeros(n_epochs+1) accuracy_val_ir = numpy.zeros(n_epochs+1) best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() ir = IsotonicRegression(increasing=True, out_of_bounds='clip', y_min=0, y_max=1) done_looping = False epoch = 0 CS = None while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break scores_grid = grid_scores_model() fig = pt.update_contourline(grid_set_x.eval(), scores_grid, delta) diary.save_figure(fig, filename='contour_lines', extension='svg') scores_train = numpy.asarray([training_scores_model(i) for i in range(n_train_batches)]).flatten() scores_val = numpy.asarray([validation_scores_model(i) for i in range(n_valid_batches)]).flatten() print('Learning Isotonic Regression from TRAINING set') ir.fit(scores_train, train_set_y.eval()) scores_train_ir = ir.predict(scores_train) print('IR predict validation probabilities') scores_val_ir = ir.predict(scores_val) scores_set = (scores_train, scores_val, scores_train_ir, scores_val_ir) labels_set = (train_set_y.eval(), valid_set_y.eval(), train_set_y.eval(), valid_set_y.eval()) legend = ['train', 'valid', 'iso. train', 'iso. valid'] fig = pt.plot_reliability_diagram(scores_set, labels_set, legend) diary.save_figure(fig, filename='reliability_diagram', extension='svg') # TODO add reliability map scores_set = (scores_train) prob_set = (train_set_y.eval()) fig = pt.plot_reliability_map(scores_set, labels_set, legend) diary.save_figure(fig, filename='reliability_map', extension='svg') fig = pt.plot_histogram_scores(scores_set) diary.save_figure(fig, filename='histogram_scores', extension='svg') # Performance accuracy_train[epoch] = numpy.asarray([training_accuracy_model(i) for i in range(n_train_batches)]).flatten().mean() accuracy_val[epoch] = numpy.asarray([validation_accuracy_model(i) for i in range(n_valid_batches)]).flatten().mean() error_train[epoch] = numpy.asarray([training_error_model(i) for i in range(n_train_batches)]).flatten().mean() error_val[epoch] = numpy.asarray([validation_error_model(i) for i in range(n_valid_batches)]).flatten().mean() accuracy_train_ir[epoch] = compute_accuracy(scores_train_ir, train_set_y.eval()) accuracy_val_ir[epoch] = compute_accuracy(scores_val_ir, valid_set_y.eval()) error_train_ir[epoch] = compute_cross_entropy(scores_train_ir, train_set_y.eval()) error_val_ir[epoch] = compute_cross_entropy(scores_val_ir, valid_set_y.eval()) diary.add_entry('training', [error_train[epoch], accuracy_train[epoch]]) diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]]) accuracy_set = (accuracy_train[1:epoch], accuracy_val[1:epoch], accuracy_train_ir[1:epoch], accuracy_val_ir[1:epoch]) fig = pt.plot_accuracy(accuracy_set, legend) diary.save_figure(fig, filename='accuracy', extension='svg') error_set = (error_train[1:epoch], error_val[1:epoch], error_train_ir[1:epoch], error_val_ir[1:epoch]) fig = pt.plot_error(error_set, legend, 'cross-entropy') diary.save_figure(fig, filename='error', extension='svg') pt.update_contourline(grid_set_x.eval(), scores_grid, delta, clabel=True) end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time))
def test_debug(self): DB_NAME = 'levels.db' log = Diary(self.INIT_DIR, async=False, db_name=DB_NAME) log.debug(self.INFO) log.logdb.assert_event_logged(self.INFO, "DEBUG", 1) log.close()
from diary import Diary if __name__ == '__main__': diary = Diary() diary.file_read() diary.file_safe('2001-10-01', 'Hahaha nothing') diary.file_read()
num_epochs=30 batch_size=5000 inner_batch_size=5000 nb_classes=2 noise_proportion=0.25 score_lin=np.linspace(0,1,100) minibatch_method='lineal' # 'random', 'lineal' n_hidden=[25, 25] output_activation= 'sigmoid' # 'isotonic_regression' # sigmoid if nb_classes == 2: loss='binary_crossentropy' else: loss='categorical_crossentropy' diary = Diary(name='experiment', path='results') diary.add_notebook('hyperparameters') diary.add_entry('hyperparameters', ['train_size', train_size]) diary.add_entry('hyperparameters', ['num_classes', nb_classes]) diary.add_entry('hyperparameters', ['batch_size', batch_size]) diary.add_entry('hyperparameters', ['inner_batch_size', inner_batch_size]) diary.add_entry('hyperparameters', ['minibatch_method', minibatch_method]) diary.add_entry('hyperparameters', ['output_activation', output_activation]) diary.add_entry('hyperparameters', ['loss', loss]) diary.add_entry('hyperparameters', ['optimizer', optimizer.get_config()['name']]) for key, value in optimizer.get_config().iteritems(): diary.add_entry('hyperparameters', [key, value]) diary.add_entry('hyperparameters', ['binarize', binarize]) diary.add_entry('hyperparameters', ['add_noise', add_noise]) diary.add_entry('hyperparameters', ['noise', noise_proportion]) diary.add_notebook('training')
def main(dataset_names=None, estimator_type="kernel", mc_iterations=1, n_folds=10, seed_num=42): if dataset_names is None: dataset_names = ['glass', 'hepatitis', 'ionosphere', 'vowel'] bandwidths_o_norm = { 'glass': 0.09, 'hepatitis': 0.105, 'ionosphere': 0.039, 'vowel': 0.075 } bandwidths_bc = { 'glass': 0.09, 'hepatitis': 0.105, 'ionosphere': 0.039, 'vowel': 0.0145 } bandwidths_t_norm = { 'glass': 0.336, 'hepatitis': 0.015, 'ionosphere': 0.0385, 'vowel': 0.0145 } tuned_mus = { 'glass': [0.094, 0.095, 0.2, 0.0, 0.0, 0.1], 'vowel': [0.0, 0.0, 0.5, 0.5, 0.5, 0.0] } tuned_ms = { 'glass': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'vowel': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] } bandwidth_o_norm = 0.05 bandwidth_t_norm = 0.05 bandwidth_bc = 0.05 # Diary to save the partial and final results diary = Diary(name='results_Tax2008', path='results', overwrite=False, fig_format='svg') # Hyperparameters for this experiment (folds, iterations, seed) diary.add_notebook('parameters', verbose=True) # Summary for each dataset diary.add_notebook('datasets', verbose=False) # Partial results for validation diary.add_notebook('validation', verbose=True) # Final results diary.add_notebook('summary', verbose=True) columns = ['dataset', 'method', 'mc', 'test_fold', 'acc'] df = MyDataFrame(columns=columns) diary.add_entry('parameters', [ 'seed', seed_num, 'mc_it', mc_iterations, 'n_folds', n_folds, 'estimator_type', estimator_type, 'bw_o', bandwidth_o_norm, 'bw_t', bandwidth_t_norm, 'bw_bc', bandwidth_bc ]) data = Data(dataset_names=dataset_names) for name, dataset in data.datasets.iteritems(): if name in ['letter', 'shuttle']: dataset.reduce_number_instances(0.1) export_datasets_description_to_latex(data, path=diary.path) for i, (name, dataset) in enumerate(data.datasets.iteritems()): np.random.seed(seed_num) dataset.print_summary() diary.add_entry('datasets', [dataset.__str__()]) # accuracies_tuned = np.zeros(mc_iterations * n_folds) # if name in bandwidths_o_norm.keys(): # bandwidth_o_norm = bandwidths_o_norm[name] # bandwidth_t_norm = bandwidths_t_norm[name] # bandwidth_bc = bandwidths_bc[name] # else: # bandwidth_o_norm = np.mean(bandwidths_o_norm.values()) # bandwidth_t_norm = np.mean(bandwidths_t_norm.values()) # bandwidth_bc = np.mean(bandwidths_bc.values()) for mc in np.arange(mc_iterations): skf = StratifiedKFold(dataset.target, n_folds=n_folds, shuffle=True) test_folds = skf.test_folds for test_fold in np.arange(n_folds): x_train, y_train, x_test, y_test = separate_sets( dataset.data, dataset.target, test_fold, test_folds) # if name in ['glass', 'hepatitis', 'ionosphere', 'thyroid', # 'iris', 'heart-statlog', 'diabetes', 'abalone', # 'mushroom', 'spambase']: x_test, y_test = generate_outliers(x_test, y_test) # elif name == 'vowel': # x_train = x_train[y_train <= 5] # y_train = y_train[y_train <= 5] # y_test[y_test > 5] = 6 # elif dataset.n_classes > 2: # x_train = x_train[y_train <= dataset.n_classes/2] # y_train = y_train[y_train <= dataset.n_classes/2] # y_test[y_test > dataset.n_classes/2] = dataset.n_classes+1 # else: # continue if estimator_type == "svm": est = OneClassSVM(nu=0.5, gamma=1.0 / x_train.shape[1]) elif estimator_type == "gmm": est = GMM(n_components=1) elif estimator_type == "gmm3": est = GMM(n_components=3) elif estimator_type == "kernel": est = MyMultivariateKernelDensity(kernel='gaussian', bandwidth=bandwidth_bc) estimators = None bcs = None if estimator_type == "kernel": estimators, bcs = fit_estimators( MyMultivariateKernelDensity(kernel='gaussian', bandwidth=bandwidth_bc), x_train, y_train) # Untuned background check bc = BackgroundCheck(estimator=est, mu=0.0, m=1.0) oc = OcDecomposition(base_estimator=bc) if estimators is None: oc.fit(x_train, y_train) else: oc.set_estimators(bcs, x_train, y_train) accuracy = oc.accuracy(x_test, y_test) diary.add_entry('validation', [ 'dataset', name, 'method', 'BC', 'mc', mc, 'test_fold', test_fold, 'acc', accuracy ]) df = df.append_rows([[name, 'BC', mc, test_fold, accuracy]]) e = MyMultivariateKernelDensity(kernel='gaussian', bandwidth=bandwidth_o_norm) oc_o_norm = OcDecomposition(base_estimator=e, normalization="O-norm") if estimators is None: oc_o_norm.fit(x_train, y_train) else: oc_o_norm.set_estimators(estimators, x_train, y_train) accuracy_o_norm = oc_o_norm.accuracy(x_test, y_test) diary.add_entry('validation', [ 'dataset', name, 'method', 'O-norm', 'mc', mc, 'test_fold', test_fold, 'acc', accuracy_o_norm ]) df = df.append_rows( [[name, 'O-norm', mc, test_fold, accuracy_o_norm]]) e = MyMultivariateKernelDensity(kernel='gaussian', bandwidth=bandwidth_t_norm) oc_t_norm = OcDecomposition(base_estimator=e, normalization="T-norm") if estimators is None: oc_t_norm.fit(x_train, y_train) else: oc_t_norm.set_estimators(estimators, x_train, y_train) accuracy_t_norm = oc_t_norm.accuracy(x_test, y_test) diary.add_entry('validation', [ 'dataset', name, 'method', 'T-norm', 'mc', mc, 'test_fold', test_fold, 'acc', accuracy_t_norm ]) df = df.append_rows( [[name, 'T-norm', mc, test_fold, accuracy_t_norm]]) # Tuned background check # if name in tuned_mus.keys(): # mus = tuned_mus[name] # ms = tuned_ms[name] # else: # mus = None # ms = None # bc = BackgroundCheck(estimator=est, mu=0.0, m=1.0) # oc_tuned = OcDecomposition(base_estimator=bc) # oc_tuned.fit(x_train, y_train, mus=mus, ms=ms) # accuracy_tuned = oc_tuned.accuracy(x_test, y_test, mus=mus, # ms=ms) # accuracies_tuned[mc * n_folds + test_fold] = accuracy_tuned # diary.add_entry('validation', ['dataset', name, # 'method', 'BC-tuned', # 'mc', mc, # 'test_fold', test_fold, # 'acc', accuracy_tuned]) # df = df.append_rows([[name, 'BC-tuned', mc, test_fold, # accuracy_tuned]]) export_summary(df, diary)
def test_init_new_db(self): log = Diary(self.NEW_DB_PATH, async=False) log.info(self.INFO) log.close() with DiaryDB(self.NEW_DB_PATH) as db: db.assert_event_logged(self.INFO, level="INFO", limit=1)
try: perms = [ perm['name'] for perm in vk.method('groups.getTokenPermissions')['permissions'] ] if 'manage' not in perms or 'messages' not in perms: call_exit('У ключа недостаточно прав') except ApiError: call_exit('Неверный ключ доступа') try: vk.method('groups.getOnlineStatus', {'group_id': parser['Vk']['group_id']}) except Exception: call_exit('В настройках группы отключены сообщения или неверный id группы') d = Diary(parser['Diary']['diary_login'], parser['Diary']['diary_password'], session) try: d.auth() except ValueError: call_exit('Неверный логин или пароль') except requests.exceptions.HTTPError: call_exit('Электронный дневник не работает. Попробуйте запустить позже') payload = { 'group_id': parser['Vk']['group_id'], 'enabled': 1, 'api_version': '5.92', 'message_new': 1 } try: vk.method('groups.setLongPollSettings', payload)
from diary import Diary import os import time def is_down(website, timeout=10): response = os.system('ping -c 1 -w {timeout} {website}'.format( timeout=timeout, website=website )) if response == 0: return False return True # Create a logger with an output file logger = Diary("google_status.txt") # If a logger should point to a db give it a db # logger = Diary("status.db") while True: if is_down("google.com"): logger.error("GOOGLE IS DOWN!") else: logger.log("Google is up.") time.sleep(5)
def test_custom_level(self): logger = Diary(os.path.join(self.API_DIR)) logger.log("URGENT ATTENTION NEEDED", level=critical) logger.close() with open(logger.log_file.name) as f: self.assertEquals(f.readline(), formats.standard(logger.last_logged_event) + '\n')
def test_set_db_exc(self): log = Diary(self.TXT_PATH) self.assertIsNone(log.db_file) with self.assertRaises(ValueError, msg="Cannot set a database without a database file"): log.set_db()
def main(dataset_names=None, estimator_type="gmm", mc_iterations=20, n_folds=5, n_ensemble=100, seed_num=42): if dataset_names is None: # All the datasets used in Li2014 datasets_li2014 = [ 'abalone', 'balance-scale', 'credit-approval', 'dermatology', 'ecoli', 'german', 'heart-statlog', 'hepatitis', 'horse', 'ionosphere', 'lung-cancer', 'libras-movement', 'mushroom', 'diabetes', 'landsat-satellite', 'segment', 'spambase', 'wdbc', 'wpbc', 'yeast' ] datasets_hempstalk2008 = [ 'diabetes', 'ecoli', 'glass', 'heart-statlog', 'ionosphere', 'iris', 'letter', 'mfeat-karhunen', 'mfeat-morphological', 'mfeat-zernike', 'optdigits', 'pendigits', 'sonar', 'vehicle', 'waveform-5000' ] datasets_others = [ 'diabetes', 'ecoli', 'glass', 'heart-statlog', 'ionosphere', 'iris', 'letter', 'mfeat-karhunen', 'mfeat-morphological', 'mfeat-zernike', 'optdigits', 'pendigits', 'sonar', 'vehicle', 'waveform-5000', 'scene-classification', 'tic-tac', 'autos', 'car', 'cleveland', 'dermatology', 'flare', 'page-blocks', 'segment', 'shuttle', 'vowel', 'zoo', 'abalone', 'balance-scale', 'credit-approval', 'german', 'hepatitis', 'lung-cancer' ] # Datasets that we can add but need to be reduced datasets_to_add = ['MNIST'] dataset_names = list( set(datasets_li2014 + datasets_hempstalk2008 + datasets_others)) # Diary to save the partial and final results diary = Diary(name='results_Li2014', path='results', overwrite=False, fig_format='svg') # Hyperparameters for this experiment (folds, iterations, seed) diary.add_notebook('parameters', verbose=True) # Summary for each dataset diary.add_notebook('datasets', verbose=False) # Partial results for validation diary.add_notebook('validation', verbose=True) # Final results diary.add_notebook('summary', verbose=True) columns = ['dataset', 'method', 'mc', 'test_fold', 'acc', 'logloss'] df = MyDataFrame(columns=columns) diary.add_entry('parameters', [ 'seed', seed_num, 'mc_it', mc_iterations, 'n_folds', n_folds, 'n_ensemble', n_ensemble, 'estimator_type', estimator_type ]) data = Data(dataset_names=dataset_names) for name, dataset in data.datasets.iteritems(): if name in ['letter', 'shuttle']: dataset.reduce_number_instances(0.1) export_datasets_description_to_latex(data, path=diary.path) for i, (name, dataset) in enumerate(data.datasets.iteritems()): np.random.seed(seed_num) dataset.print_summary() diary.add_entry('datasets', [dataset.__str__()]) for mc in np.arange(mc_iterations): skf = StratifiedKFold(dataset.target, n_folds=n_folds, shuffle=True) test_folds = skf.test_folds for test_fold in np.arange(n_folds): x_train, y_train, x_test, y_test = separate_sets( dataset.data, dataset.target, test_fold, test_folds) # Binary discriminative classifier sv = SVC(kernel='linear', probability=True) # Density estimator for the background check if estimator_type == "svm": gamma = 1.0 / x_train.shape[1] est = OneClassSVM(nu=0.1, gamma=gamma) elif estimator_type == "gmm": est = GMM(n_components=1) elif estimator_type == "gmm3": est = GMM(n_components=3) elif estimator_type == "mymvn": est = MyMultivariateNormal() # Multiclass discriminative model with one-vs-one binary class. ovo = OvoClassifier(base_classifier=sv) classifier = ConfidentClassifier(classifier=ovo, estimator=est, mu=0.5, m=0.5) ensemble = Ensemble(base_classifier=classifier, n_ensemble=n_ensemble) # classifier = ConfidentClassifier(classifier=sv, # estimator=est, mu=0.5, # m=0.5) # ovo = OvoClassifier(base_classifier=classifier) # ensemble = Ensemble(base_classifier=ovo, # n_ensemble=n_ensemble) xs_bootstrap, ys_bootstrap = ensemble.fit(x_train, y_train) accuracy = ensemble.accuracy(x_test, y_test) log_loss = ensemble.log_loss(x_test, y_test) diary.add_entry('validation', [ 'dataset', name, 'method', 'our', 'mc', mc, 'test_fold', test_fold, 'acc', accuracy, 'logloss', log_loss ]) df = df.append_rows( [[name, 'our', mc, test_fold, accuracy, log_loss]]) # Li2014: EP-CC model # The classification confidence is used in learning the weights # of the base classifier as well as in weighted voting. ensemble_li = Ensemble(n_ensemble=n_ensemble, lambd=1e-8) ensemble_li.fit(x_train, y_train, xs=xs_bootstrap, ys=ys_bootstrap) accuracy_li = ensemble_li.accuracy(x_test, y_test) log_loss_li = ensemble_li.log_loss(x_test, y_test) diary.add_entry('validation', [ 'dataset', name, 'method', 'Li2014', 'mc', mc, 'test_fold', test_fold, 'acc', accuracy_li, 'logloss', log_loss_li ]) df = df.append_rows( [[name, 'Li2014', mc, test_fold, accuracy_li, log_loss_li]]) export_summary(df, diary)
# - get students average score in class # - hold students name and surname # - Count total attendance of student # The default interface for interaction should be python interpreter. # Please, use your imagination and create more functionalities. # Your project should be able to handle entire school. # If you have enough courage and time, try storing (reading/writing) # data in text files (YAML, JSON). # If you have even more courage, try implementing user interface. # #Try to expand your implementation as best as you can. #Think of as many features as you can, and try implementing them. #Make intelligent use of pythons syntactic sugar (overloading, iterators, generators, etc) #Most of all: CREATE GOOD, RELIABLE, READABLE CODE. #The goal of this task is for you to SHOW YOUR BEST python programming skills. #Impress everyone with your skills, show off with your code. # #Your program must be runnable with command "python task.py". #Show some usecases of your library in the code (print some things) # #When you are done upload this code to your github repository. # #Delete these comments before commit! #Good luck. from diary import Diary, Student, SchoolClass diary = Diary() schoolclass = SchoolClass("biology") student = Student("majlosz", "ef") #schoolclass.add_students([student])
def test_init_no_ext(self): log = Diary(self.NO_EXT_PATH, async=False) log.info(self.INFO) log.close() with open(self.NO_EXT_PATH) as f: self.assertTrue(self.INFO in f.readline())
def main(dataset_names=None): if dataset_names is None: dataset_names = [ 'autos', 'car', 'cleveland', 'dermatology', 'ecoli', 'flare', 'glass', 'led7digit', 'lymphography', 'nursery', 'page-blocks', 'pendigits', 'satimage', 'segment', #'shuttle', 'vehicle', 'vowel', 'yeast', 'zoo', 'auslan' ] seed_num = 42 mc_iterations = 5 n_folds = 2 estimator_type = "svm" # Diary to save the partial and final results diary = Diary(name='results_Krawczyk2015', path='results', overwrite=False, fig_format='svg') # Hyperparameters for this experiment (folds, iterations, seed) diary.add_notebook('parameters', verbose=True) # Summary for each dataset diary.add_notebook('datasets', verbose=False) # Partial results for validation diary.add_notebook('validation', verbose=True) # Final results diary.add_notebook('summary', verbose=True) columns = ['dataset', 'method', 'mc', 'test_fold', 'acc'] df = MyDataFrame(columns=columns) diary.add_entry('parameters', [ 'seed', seed_num, 'mc_it', mc_iterations, 'n_folds', n_folds, 'estimator_type', estimator_type ]) data = Data(dataset_names=dataset_names) for i, (name, dataset) in enumerate(data.datasets.iteritems()): np.random.seed(seed_num) dataset.print_summary() diary.add_entry('datasets', [dataset.__str__()]) accuracies = np.zeros(mc_iterations * n_folds) for mc in np.arange(mc_iterations): skf = StratifiedKFold(dataset.target, n_folds=n_folds, shuffle=True) test_folds = skf.test_folds for test_fold in np.arange(n_folds): x_train, y_train, x_test, y_test = separate_sets( dataset.data, dataset.target, test_fold, test_folds) if estimator_type == "svm": est = OneClassSVM(nu=0.5, gamma=0.5) elif estimator_type == "gmm": est = GMM(n_components=3) bc = BackgroundCheck(estimator=est) oc = OcDecomposition(base_estimator=bc) oc.fit(x_train, y_train) accuracy = oc.accuracy(x_test, y_test) accuracies[mc * n_folds + test_fold] = accuracy diary.add_entry('validation', [ 'dataset', name, 'method', 'our', 'mc', mc, 'test_fold', test_fold, 'acc', accuracy ]) df = df.append_rows([[name, 'our', mc, test_fold, accuracy]]) df = df.convert_objects(convert_numeric=True) table = df.pivot_table(values=['acc'], index=['dataset'], columns=['method'], aggfunc=[np.mean, np.std]) diary.add_entry('summary', [table])
class FileProcessDB(DiaryDB): def create_tables(self): self.cursor.execute(''' CREATE TABLE IF NOT EXISTS files (inputDT TIMESTAMP, level TEXT, info TEXT, path TEXT, success INT) ''') def log(self, event): with self.conn: self.cursor.execute(''' INSERT INTO files(inputDT, level, info, path, success) VALUES(?, ?, ?, ?, ?)''', (event.dt, event.level_str, event.info, event.path, event.success)) logger = Diary("file_info", db_name="file_processes.db", db=FileProcessDB, file_name="file_processes.log") target_dir = "data" # files_to_process = os.listdir(target_dir) files_to_process = range(10) # We will pretend the numbers up to 10 are files for f in files_to_process: if process_file(f) == 1: e = FileProcessEvent("Success!", 1, f) logger.info(e) elif process_file(f) == 2: e = FileProcessEvent("The goal was not achieved", 2, f) logger.warn(e) elif process_file(f) == 3: e = FileProcessEvent("An error occurred", 3, f)