def test_recommendation(self): movies = utils.load_movie_list() my_ratings = np.zeros(len(movies)) my_ratings[1 - 1] = 4 my_ratings[98 - 1] = 2 my_ratings[7 - 1] = 3 my_ratings[12 - 1] = 5 my_ratings[54 - 1] = 4 my_ratings[64 - 1] = 5 my_ratings[66 - 1] = 3 my_ratings[69 - 1] = 5 my_ratings[183 - 1] = 4 my_ratings[226 - 1] = 5 my_ratings[355 - 1] = 5 R = utils.load_from_file('data/R.bin').astype(float) Y = utils.load_from_file('data/Y.bin') Y = np.column_stack((my_ratings, Y)) R = np.column_stack((my_ratings != 0, R)) model = recommender.Recommender(Y=Y, R=R, reg=10, num_features=10) model.learn(maxiter=1000, verbose=True, normalize=False, tol=1e-1) user_id = 0 rated_ids = [i for i in range(Y.shape[0]) if R[i,user_id] == 1] print("USER {} HAS RATED:".format(user_id)) for i in rated_ids: print(" RATED <{:.1f}> FOR '{}'".format(Y[i,user_id], movies[i])) recommendations = model.recommendations(user_id=user_id) print("RECOMMENDATIONS:") for (i, rating) in recommendations: print(" <{:.1f}> {}".format(rating, movies[i]))
def main(): R = utils.load_from_file('data/R.bin').astype(float) Y = utils.load_from_file('data/Y.bin') # reg_list = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100] reg_list = [1e3, 1e4] num_features_list = [45, 50] model = Recommender(Y=Y, R=R) # for reg in reg_list: # print("::: Trying reg = {}".format(reg)) # model.learn(verbose=True, reg=reg, num_features=DEFAULT_NUM_FEATURES, maxiter=DEFAULT_MAX_ITER) # rmse = model.rmse() # mae = model.mae() # with open("log.csv", "a", newline='') as csvfile: # csvwriter = csv.writer(csvfile) # csvwriter.writerow([DEFAULT_NUM_FEATURES, reg, rmse, mae]) for num_features in num_features_list: print("::: Trying num_feature = {}".format(num_features)) model.learn(verbose=True, reg=DEFAULT_REG, num_features=num_features, maxiter=DEFAULT_MAX_ITER) rmse = model.rmse() mae = model.mae() with open("log.csv", "a", newline='') as csvfile: csvwriter = csv.writer(csvfile) csvwriter.writerow([num_features, DEFAULT_REG, rmse, mae])
def init(): global igrice global ramovi global procesori global graficke igrice = utils.load_from_file('data/igrice.txt') ramovi = utils.load_from_file('data/ramovi.txt') graficke = utils.load_from_file('data/graficke.txt') procesori = utils.load_from_file('data/procesori.txt')
def test_learn_and_save(self): # num_users, num_movies, num_features = 10, 10, 5 R = utils.load_from_file('data/R.bin').astype(float) Y = utils.load_from_file('data/Y.bin') model = recommender.Recommender(Y=Y, R=R, reg=10, num_features=10) model.learn(maxiter=10, verbose=True) X, Theta = model.X, model.Theta filename = "models/recommender.bin" model.save(filename) model = recommender.Recommender.load(filename) np.testing.assert_almost_equal(X, model.X, decimal=2) np.testing.assert_almost_equal(Theta, model.Theta, decimal=2)
def evaluate(args): config = Config(args) train, test, word_to_id, id_to_word, embeddings = utils.load_from_file() config.word_to_id = word_to_id config.id_to_word = id_to_word with tf.Graph().as_default(): logger.info('Building model...', ) start = time.time() model = RNNModel(config, embeddings) logger.info('took %.2f seconds', time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_path) sentences, masks, predictions = model.output(session, train) originals, predictions = lookup_words(predictions, sentences, id_to_word) output = zip(originals, masks, predictions) with open('eval_results.txt', 'w') as f: utils.save_results(f, output)
def train(args): config = Config(args) train, test, id_to_word, embedding_lookup, embeddings = utils.load_from_file( ) config.id_to_word = id_to_word config.embedding_lookup = embedding_lookup utils.save(config.output_path, embedding_lookup, id_to_word) handler = logging.FileHandler(config.log_output) handler.setLevel(logging.DEBUG) handler.setFormatter( logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')) logging.getLogger().addHandler(handler) with tf.Graph().as_default(): logger.info('Building model...', ) start = time.time() model = RNNModel(config, embeddings) logger.info('took %.2f seconds', time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) model.fit(session, saver, train, test) sentences, masks, predictions = model.output(session, train) originals, predictions = lookup_words(predictions, sentences, id_to_word) output = zip(originals, masks, predictions) with open('results.txt', 'w') as f: utils.save_results(f, output)
def step3_fill_lengths(): """Retrieve the lengths of the pages via APIs""" cuisines = load_from_file('data/cuisines_langs.dat') # TODO: refactor grouping together pages, do only one request for every xyz.wikipedia.org params = {'action': 'query', 'prop': 'info', 'format': 'json'} skipped = [] for kk, vv in tqdm(cuisines.items()): for lang_prefix, page in tqdm(vv['languages'].items()): if lang_prefix != 'en': wiki_url = page['wiki_url'] api_url = f'https://{wiki_url}/w/api.php' params['titles'] = page['title'] with requests.Session() as session: post = session.post(api_url, params) if post.ok: res = post.json() else: print("Issue in POST call") print(f"{api_url}\n{params}") page_data = res['query']['pages'][next( iter(res['query']['pages']))] if 'length' in page_data: vv['languages'][lang_prefix]['length'] = page_data[ 'length'] else: skipped.append((kk, lang_prefix)) if skipped: for page, lang in skipped: print(f"[Skip] {page} in language {lang} (unavailable length)") save_to_file('data/cuisines_length.dat', cuisines)
def step2_populate_other_languages(): """Gets URLs and titles of cuisines in multiple languages""" cuisines_raw = load_from_file('data/cuisines_raw.dat') wiki_url = 'https://en.wikipedia.org/w/api.php' params = { 'action': 'query', 'prop': 'langlinks|info', 'llprop': 'url', 'lllimit': 'max', 'format': 'json' } print("Getting links for every cuisine for every language...") for vv in tqdm(cuisines_raw.values()): pageid = vv['pageid'] params['pageids'] = pageid with requests.Session() as session: post = session.post(wiki_url, params) res = post.json() res_info = res['query']['pages'][pageid] if 'langlinks' in res_info: vv['languages'] = { vv['lang']: { 'title': vv['*'], 'wiki_url': strip_url(vv['url']) } for vv in res_info['langlinks'] } vv['languages']['en'] = {} vv['languages']['en']['length'] = res_info['length'] vv['languages']['en']['title'] = res['query']['pages'][pageid][ 'title'] save_to_file('data/cuisines_langs.dat', cuisines_raw)
def load(self): try: file_path = join(INT_TEMP_SAVE_FOLDER, next(reversed(sorted(listdir(INT_TEMP_SAVE_FOLDER))))) self._surface.deserialise(load_from_file(file_path)) print('[OKAY] file has been loaded from:', file_path) except StopIteration: print('[FAIL] there is no file in:', INT_TEMP_SAVE_FOLDER)
def __init__(self): self.stop_words = set(stopwords.words('english')) self.SVC_pipeline = utils.load_from_file("lsvc_pipeline") self.categories = [ 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate' ]
def test_cf_cost_regularization(self): # print("Loading dataset...") R = utils.load_from_file('data/R.bin') Y = utils.load_from_file('data/Y.bin') # print("Loading pre-trained parameters...") _ = loadmat('data/movie_params.mat') X = _.get('X') Theta = _.get('Theta') # reduce dataset num_users = 4 num_movies = 5 num_features = 3 X = X[:num_movies, :num_features] Theta = Theta[:num_users, :num_features] Y = Y[:num_movies, :num_users] R = R[:num_movies, :num_users] params = np.append(X.flatten(), Theta.flatten()) cost = utils.cf_cost(params=params, Y=Y, R=R, num_features=num_features, reg=1.5)[0] self.assertAlmostEqual(31.34, cost, places=2)
def test_cf_cost(self): # print("Loading dataset...") R = utils.load_from_file('data/R.bin') Y = utils.load_from_file('data/Y.bin') # print("Loading pre-trained parameters...") _ = loadmat('data/movie_params.mat') X = _.get('X') Theta = _.get('Theta') # reduce dataset num_users = 4 num_movies = 5 num_features = 3 X = X[:num_movies, :num_features] Theta = Theta[:num_users, :num_features] Y = Y[:num_movies, :num_users] R = R[:num_movies, :num_users] params = np.append(X.flatten(), Theta.flatten()) cost = utils.cf_cost(params=params, Y=Y, R=R, num_features=num_features, reg=0)[0] # print("Expected cost = 22.22") # print("Computed cost = {:.2f}".format(cost)) self.assertAlmostEqual(22.22, cost, places=2)
def __init__(self, feature_json_file, timeout=10, max_workers=10): """ Pipeline that manages scoring of multiple custom feature scorers This is the API that almost all scorers will access when training \ a Retrieve & Rank instance with custom features args: feature_json_file (str): Path to a feature configuration file. \ This file defines the pipeline of custom scorers used raise: se.ScorerConfigurationException : If any of the individual scorers raise during configuration, \ If the file feature_json_file cannot be found or is not of the proper type """ scorer_dict = utils.load_from_file(feature_json_file) self._document_scorers = scorer_dict.get('document', []) self._query_scorers = scorer_dict.get('query', []) self._query_document_scorers = scorer_dict.get('query_document', []) self._timeout = timeout self._interval = 0.1 self._thread_executor = futures.ThreadPoolExecutor(max_workers)
def test_save_and_load(self): team_members = main.get_team_members() team_data = {'date': team_members} # save data to file data_file_name = '../data/test/web_data_json_test.data' utils.save_to_file(data_file_name, team_data) # # test loading from the file and if the name of Johanna can be found # member_data = utils.load_from_file(data_file_name) a_day_data = {} # take the first item in the dictionary; doesn'e matter which one it is for key in member_data: a_day_data = member_data[key] break found_Johanna = False for d in a_day_data: if d['name'] == 'Johanna Nicoletta': found_Johanna = True self.assertEqual(found_Johanna, True, "Can not save or load from file")
def train_l2sp(self, env_name="Merging-v0"): """ Directly trains on env_name """ bs2model = {1:B1R, 3:B3R, 5:B5R, 7:B7R} model_info = bs2model[int(self.bs)] model_dir = os.path.join(model_info[0], model_info[1], model_info[2]) data, params = utils.load_from_file(model_dir) self.model = PPO2L2SP.load(model_dir, original_params=params) for seed in [201, 202, 203, 204, 205]: self.seed = seed self.experiment_name = f"{model_info[1]}_B{self.bs}L_L2SP{seed}" print("EXPT NAME: ", self.experiment_name) self.experiment_dir = os.path.join(self.experiment_dir1, self.experiment_name) self.create_eval_dir() env = gym.make(env_name) env.barrier_size = self.bs env = DummyVecEnv([lambda: env]) self.model.set_env(env) eval_env = gym.make(env_name) eval_env.barrier_size = self.bs self.model = train(self.model, eval_env, self.timesteps, self.experiment_dir, self.is_save, self.eval_save_period, self.rets_path, 0)
def main(): if not Path('data/cuisines_raw.dat').exists(): execute_steps(STEPS, [i for i in range(0, len(STEPS))]) elif not Path('data/cuisines_langs.dat').exists(): execute_steps(STEPS, [i for i in range(1, len(STEPS))]) elif not Path('data/cuisines_length.dat').exists(): execute_steps(STEPS, [i for i in range(2, len(STEPS))]) elif not Path('data/table_dataframe.dat').exists(): execute_steps(STEPS, [i for i in range(3, len(STEPS))]) if not Path('data/table_dataframe_full.dat').exists(): step4_preprocess_data_frame(create_full_df=True) if not Path('data/wiki_languages.dat').exists(): get_wikimedia_languages_list() cc1 = load_from_file('data/cuisines_raw.dat') cc2 = load_from_file('data/cuisines_langs.dat') cc3 = load_from_file('data/cuisines_length.dat') wl = load_from_file('data/wiki_languages.dat') df = load_from_file('data/table_dataframe.dat') df_full = load_from_file('data/table_dataframe_full.dat') # Plot dataframe step5_create_plots(df, df_full)
def load(filename): import utils print("Loading recommender model from '{}'".format(filename)) return utils.load_from_file(filename)
def step4_preprocess_data_frame(create_full_df=False): """Create pandas DataFrames filtering out undesired data""" cuisines = load_from_file('data/cuisines_length.dat') # Set values (depending if dataframe/dataframe_full is to create) if create_full_df: threshold_min_voice_length = 0 threshold_min_cuisines = 0 threshold_min_languages = 0 filename = 'data/table_dataframe_full.dat' else: threshold_min_voice_length = defs.THRESHOLD_MIN_VOICE_LENGTH threshold_min_cuisines = defs.THRESHOLD_MIN_CUISINES threshold_min_languages = defs.THRESHOLD_MIN_LANGUAGES filename = 'data/table_dataframe.dat' # Set pandas view options pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.width', None) pd.set_option('display.max_colwidth', None) # Find languages to consider languages = set() for kk, vv in cuisines.items(): for lang in [*vv['languages'].keys()]: languages.add(lang) languages = [*languages] languages.sort() languages.insert(0, 'cuisine') # Create full table df_fulltable = pd.DataFrame(columns=languages) for kk, vv in tqdm(cuisines.items()): entry = {} for kk2, vv2 in vv['languages'].items(): if 'length' in vv2 and kk2 in languages: entry[kk2] = vv2['length'] # Add cuisine name (removing "cuisine") entry['Cuisine'] = kk.replace(" cuisine", "") df_fulltable = df_fulltable.append(entry, ignore_index=True) short_voices = [] for (c_name, c_data) in df_fulltable.iteritems(): if c_name != 'Cuisine': for entry in c_data.iteritems(): if not pd.isna(entry[1]) and int( entry[1]) < threshold_min_voice_length: short_voices.append((c_name, entry[0], entry[1])) for entry in short_voices: df_fulltable.at[entry[1], entry[0]] = np.nan # TODO:Fix: depending on the order different results are obtained # Keep all languages that have least THRESHOLD_MIN_CUISINES written df_fulltable.dropna(axis=1, thresh=threshold_min_cuisines, inplace=True) # Keep all cuisines that appears in at least THRESHOLD_MIN_LANGUAGES languages df_fulltable = df_fulltable[df_fulltable.isnull().sum( axis=1) < len(df_fulltable.columns) - threshold_min_languages] df_fulltable.reset_index(drop=True, inplace=True) df_fulltable.set_index('Cuisine', inplace=True) df_fulltable.columns.names = ['Wikipedia language'] save_to_file(filename, df_fulltable)
def test_rating_normalization(self): Y = utils.load_from_file('data/Y.bin')[:10, :10] R = utils.load_from_file('data/R.bin')[:10, :10] Ynorm, Ymean = utils.normalize_ratings(Y, R) Ymean_target = np.array([4.2, 3, 4, 4, 3, 5, 3.66666667, 3.33333333, 4.5, 3]) np.testing.assert_almost_equal(Ymean, Ymean_target, decimal=2)
def test(self, params): print('\n%s: testing...' % datetime.now()) sys.stdout.flush() session = Session(self._graph, self.results_dir, params['model_name']) if 'init_step' not in params or params['init_step'] is None: init_step = session.init_step else: init_step = params['init_step'] if 'step_num' not in params or params['step_num'] is None: step_num = int(np.ceil( np.float(self.fold_size) / self._batch_size)) else: step_num = params['step_num'] results_file_name = Tester.RESULTS_FILE + '-' + str(init_step) + '-' + \ self.fold_name + '-' + str(step_num) + '.json' results_file = os.path.join(self.results_dir, results_file_name) if not params['load_results'] or not os.path.isfile(results_file): session.init(self._classifier, init_step, params['restoring_file']) session.start() if init_step == 0: print 'WARNING: testing an untrained model' total_step_num = step_num * params['epoch_num'] test_num = total_step_num * self._batch_size print('%s: test_num=%d' % (datetime.now(), step_num * self._batch_size)) print('%s: epoch_num=%d' % (datetime.now(), params['epoch_num'])) results = {} results['losses'] = np.zeros(test_num, dtype=np.float32) results['probs'] = np.zeros((test_num, Reader.CLASSES_NUM), dtype=np.float32) results['labels'] = np.zeros(test_num, dtype=np.int64) start_time = time.time() for step in range(total_step_num): #print('%s: eval_iter=%d' %(datetime.now(), i)) loss_batch, prob_batch, label_batch = session.run([ self._cross_entropy_losses, self._probs, self._input['labels'] ]) begin = step * self._batch_size results['losses'][begin:begin + self._batch_size] = loss_batch results['probs'][begin:begin + self._batch_size, :] = prob_batch results['labels'][begin:begin + self._batch_size] = label_batch if (step + 1) % step_num == 0: print "Epoch num: %d" % ((step + 1) / step_num) if session.should_stop(): break duration = time.time() - start_time print('%s: duration = %.1f sec' % (datetime.now(), float(duration))) sys.stdout.flush() if self.writer is not None: summary_str = session.run(self._all_summaries) self.writer.write_summaries(summary_str, init_step) session.stop() else: print 'WARNING: using precomputed results' results = utils.load_from_file(results_file) results['loss'] = np.mean(results['losses']).item() results = self.get_all_stats(results) if self.writer is not None and not params['load_results']: self.writer.write_scalars( { 'losses/testing/cross_entropy_loss': results['loss'], 'accuracy': results['accuracy'] }, init_step) utils.dump_to_file(results, results_file) return init_step, results['loss']
# Import blender modules import bpy # Import plastey modules path.insert(0, '.') from utils import load_from_file, name_of_vertex from const import INT_PERMANENT_FOLDER, OBJ_GEOMETRY #------------------------------------------------------------------------------# FILE_NAME = '.bz2' SURF_TYPE = 0 # plane=0, sphere=1 #------------------------------------------------------------------------------# coords = load_from_file(join(INT_PERMANENT_FOLDER, FILE_NAME)) try: # Adjust locations of the dots for i, coord in enumerate(zip(*(iter(coords),)*3)): bpy.data.objects[name_of_vertex(i)].location = coord # Deselect everything bpy.ops.object.select_all(action='DESELECT') # Get and surface object surface = bpy.data.objects[OBJ_GEOMETRY] surface.select = True bpy.context.scene.objects.active = surface # If surface is a plane if not SURF_TYPE: modifier = surface.modifiers.new('Solidify', 'SOLIDIFY')
import pyopencl as cl import pyopencl.array as cl_array import utils import numpy as np import math import time FILE_PATH = './data/data_clean.json' COUNT_RUN = 5 mem_flags = cl.mem_flags if __name__ == '__main__': clusters = utils.load_from_file(FILE_PATH)['clusters'] allData = [] clusterInfo = [] countData = 0 for c in clusters: countData += len(c) for member in c: allData.append(member) clusterInfo.append(len(c)) data = np.array(allData, np.float32) data_len = len(allData) vec_size = len(data[0]) clusterInfoBuff = np.array(clusterInfo, np.int32) # create empty matrix matrix = np.zeros(data_len**2, np.float32)
def step5_create_plots(df, df_full): """Produce and store graphs/plots""" figures = {} pd.options.plotting.backend = 'plotly' # Prepare data frames df = df.transpose() df_full = df_full.drop(['cuisine'], axis=1) # Create heatmap fig_hm = create_heatmap(df, defs.X_ADD_FLAGS, defs.MARKER_ON_DIAGONAL_CELLS) figures['correlation_heatmap'] = fig_hm # Create full heatmap if defs.PRODUCE_FULL_HEATMAP: fig_hm_full = create_heatmap(df_full.transpose(), False, False, True) figures['correlation_heatmap_full'] = fig_hm_full # Create statistics graphs fig_sum_cuisines = create_bar_sum_cuisines(df_full) figures['cumulative_cuisines_length'] = fig_sum_cuisines fig_sum_languages = create_bar_sum_languages(df_full) figures['cumulative_languages_length'] = fig_sum_languages # Create histogram if defs.PRODUCE_HISTOGRAM: fig_hist = df_full.hist() figures['historgram'] = fig_hist # Create statistics if defs.STORE_STATISTICS: pd.set_option('display.float_format', '{:.0f}'.format) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) # Cuisine leaderboard sum_data = df_full.transpose().sum().astype(int) leaderboard = sum_data.to_frame('length').sort_values( 'length', ascending=False)[0:30] leaderboard.index = [ f"{flag} {cuisine}" for flag, cuisine in zip( get_flags_from_demonyms(leaderboard.index), leaderboard.index.to_list()) ] with open(Path(f'results/cuisines_leaderboard.md'), 'w') as fp: fp.write(leaderboard.to_markdown()) # Top voices cc2 = load_from_file('data/cuisines_langs.dat') df_topvoices = pd.DataFrame( columns=['cuisine', 'language', 'length', 'url']) # yapf: disable for cuisine, rw in df_full.iterrows(): for lang, length in rw.to_frame('length').sort_values('length',ascending=False)[0:3]['length'].iteritems(): if not np.isnan(length): df_topvoices = df_topvoices.append({'cuisine': cuisine, 'language': lang, 'length': length,}, ignore_index=True) # yapf: enable df_topvoices = df_topvoices.sort_values('length', ascending=False)[0:10] df_topvoices.reset_index(drop=True, inplace=True) urls = {} for idx, row in df_topvoices.iterrows(): wikipage = cc2[f'{row["cuisine"]} cuisine']['languages'][ row['language']] if row['language'] == 'en': wikiurl = 'en.wikipedia.org' else: wikiurl = wikipage['wiki_url'] urls[ idx] = f'[{row["cuisine"]} cuisine ({row["language"]})]' + '(https://' + wikiurl + '/wiki/' + wikipage[ 'title'].replace(' ', '_') + ')' for kk, vv in urls.items(): df_topvoices['url'][kk] = vv df_topvoices['cuisine'] = [ f"{flag} {cuisine}" for flag, cuisine in zip( get_flags_from_demonyms(df_topvoices['cuisine']), df_topvoices['cuisine'].to_list()) ] df_topvoices['language'] = get_languages_names( df_topvoices['language']) with open(Path(f'results/cuisines_top.md'), 'w') as fp: fp.write(df_topvoices.to_markdown()) # Show plots in-browser if defs.SHOW_RESULTS: for fig_name, fig in figures.items(): fig.show() # Store results (html/images) Path('results').mkdir(parents=True, exist_ok=True) for fig_name, fig in figures.items(): if defs.STORE_HTML: with open(Path(f'results/{fig_name}.html'), 'w+') as fp: fp.write(fig.to_html()) if defs.STORE_IMAGE: # Remove axes titles for image fig.update_layout(xaxis={'title': { 'text': '' }}, yaxis={'title': { 'text': '' }}) with open(Path(f'results/{fig_name}.jpg'), 'wb+') as fp: fp.write( fig.to_image(format='jpg', width=1920, height=1080, scale=2.0))
def recover_from_auto_save(self): self._auto_save_time = self._origo[PROP_TEXT_TIMER] self._surface.deserialise(load_from_file(INT_AUTO_SAVE_FILE)) print('[OKAY] file has been recovered from:', INT_AUTO_SAVE_FILE)
def __init__(self): self.stop_words = set(stopwords.words('english')) self.NB_pipeline = utils.load_from_file("nb_pipeline.pkl")
''' dates = list(member_data.keys()) dates.sort() if len(dates) > 1: if len(member_data[dates[-1]]) > len(member_data[dates[-2]]): return True return False if __name__ == "__main__": # Test sum donation dstr = "2017-09-01" print("date str of {} is {}.".format(dstr, str2date(dstr))) # fname = os.path.join(utils.get_raw_data_path(), 'member_data.txt') member_data = utils.load_from_file(fname) sum_donation = get_sum_donations(member_data[max(member_data)]) print("sum donation is {} dollar".format(sum_donation)) # Test donations by division fname = os.path.join(utils.get_raw_data_path(), 'members_divisions.txt') members_divisions = utils.load_from_file(fname) fname = os.path.join(utils.get_raw_data_path(), 'ericsson_divisions.txt') ericsson_divisions = utils.load_from_file(fname) dbd = get_donation_by_division(member_data, members_divisions) print(dbd) # Test all members divisions members_divs = get_all_members_division(member_data, members_divisions) print(members_divs)
def test(self, params): print('\n%s: testing...' %datetime.now()) sys.stdout.flush() session = Session(self._graph, self.results_dir, params['model_name']) if 'init_step' not in params or params['init_step'] is None: init_step = session.init_step else: init_step = params['init_step'] if 'step_num' not in params or params['step_num'] is None: step_num = int(np.ceil(np.float(self.fold_size) / self._batch_size)) else: step_num = params['step_num'] results_file_name = Tester.RESULTS_FILE + '-' + str(init_step) + '-' + \ self.fold_name + '-' + str(step_num) + '.json' results_file = os.path.join(self.results_dir, results_file_name) if not params['load_results'] or not os.path.isfile(results_file): session.init(self._classifier, init_step, params['restoring_file']) session.start() if init_step == 0: print 'WARNING: testing an untrained model' total_step_num = step_num * params['epoch_num'] test_num = total_step_num * self._batch_size print('%s: test_num=%d' % (datetime.now(), step_num * self._batch_size)) print('%s: epoch_num=%d' % (datetime.now(), params['epoch_num'])) results = {} results['losses'] = np.zeros(test_num, dtype=np.float32) results['probs'] = np.zeros((test_num, Reader.CLASSES_NUM), dtype=np.float32) results['labels'] = np.zeros(test_num, dtype=np.int64) start_time = time.time() for step in range(total_step_num): #print('%s: eval_iter=%d' %(datetime.now(), i)) loss_batch, prob_batch, label_batch = session.run( [self._cross_entropy_losses, self._probs, self._input['labels']] ) begin = step * self._batch_size results['losses'][begin:begin+self._batch_size] = loss_batch results['probs'][begin:begin+self._batch_size, :] = prob_batch results['labels'][begin:begin + self._batch_size] = label_batch if (step+1) % step_num == 0: print "Epoch num: %d" % ((step+1)/step_num) if session.should_stop(): break duration = time.time() - start_time print('%s: duration = %.1f sec' %(datetime.now(), float(duration))) sys.stdout.flush() if self.writer is not None: summary_str = session.run(self._all_summaries) self.writer.write_summaries(summary_str, init_step) session.stop() else: print 'WARNING: using precomputed results' results = utils.load_from_file(results_file) results['loss'] = np.mean(results['losses']).item() results = self.get_all_stats(results) if self.writer is not None and not params['load_results']: self.writer.write_scalars({'losses/testing/cross_entropy_loss': results['loss'], 'accuracy': results['accuracy']}, init_step) utils.dump_to_file(results, results_file) return init_step, results['loss']