Exemplo n.º 1
0
def check_completion(file_list):
    #loop all bookmarks
    for bookmark in file_list:
        finished_posts, complete = load_bookmark(bookmark)
        #if this run didn't finish, return false
        if complete == False:
            return False
    return True
Exemplo n.º 2
0
                                stdout=f,
                                stderr=f)

            #run corresponding baseline models in background - if don't have results already
            if run_baseline:
                for mode in baseline_modes:
                    #define output filename for baseline model
                    baseline_outfile = "sim_results/%s/run_results/%s_baseline_%s_%dtrain_%dtest_%d-%d%s%s" % (
                        subreddit, subreddit, mode[1:], arguments['-n_train'],
                        arguments['-n'], arguments['-y'], arguments['-m'],
                        size_class, "_run%d" % run if repeat_runs > 1 else "")
                    outfile_lists[mode].append(baseline_outfile)

                    #no data for this baseline configuration, run the test
                    #check the bookmark saved by the model to know if finished or not
                    finished_posts, complete = load_bookmark(baseline_outfile)
                    if complete:
                        print("skipping", baseline_outfile)

                    else:
                        #build command arguments list
                        #base first
                        baseline_command = [
                            'time', 'python3', 'baseline_model.py', '-s',
                            subreddit, '-o', baseline_outfile, mode, '-v'
                        ]
                        #add the dict args - but only the ones that make sense for the baseline model
                        for arg in [
                                '-n', '-n_train', '-m', '-y', '-min', '-max',
                                '-timestamps'
                        ]:
Exemplo n.º 3
0
if len(test_posts) != len(test_cascades):
	test_cascades = functions_gen_cascade_model.filter_dict_by_list(test_cascades, list(test_posts.keys()))

all_metrics = []		#keep all metrics, separate for each post/observed time run, dump them all at the end

#for outputting comment timestamps
timestamps = {} 	#post_id -> time (or true) -> list of timestamps

#how often do we want to dump? every 20 tests or so
#20 / number of observation settings = number of posts to finish before dumping
dump_count = 20 // len(observed_list) + (20 % len(observed_list) > 0) 
if dump_count == 0: dump_count = 20	#make sure not modding by 0 if small run

#load list of finished posts for this run, so we can skip ones that are already done
#(if no bookmark, will get back empty set and False flag)
finished_posts, complete = functions_gen_cascade_model.load_bookmark(outfile)
#if finished all posts already, exit
if complete:
	vprint("Entire post set already simulated, exiting")
	exit(0)
else: vprint("Skipping %d already simulated posts" % len(finished_posts))

#process all posts (or just one, if doing that)
post_count = 0
vprint("Processing %d post" % len(test_posts) + ("s" if len(test_posts) > 1 else ""))
for sim_post_id, sim_post in test_posts.items():

	#skip this post if we've already done it
	if sim_post_id in finished_posts:
		continue