def dropsubj_run(): pb = ProgressBar(len(PERCENT_REMOVE)) for p in PERCENT_REMOVE: this_rand = lambda d: replace_percent_subjects(d, p) cleaner = lambda d: remove_percent_deviant_subjects(d, p) params = {'p': p, 'cleaner': 'devsubj'} for row in run_experiment(this_rand, [cleaner], [params]): yield row pb.incr_and_errput()
def minrho_run(): pb = ProgressBar(len(NUM_DROP)) sys.stderr.write("Beginning minrho eval.\n") pb.errput() for n in NUM_DROP: this_rand = lambda d: replace_subjects(d, n) cleaner = lambda d: remove_most_deviant_subjects(d, n) params = {'n': n, 'cleaner': 'minrho'} for row in run_experiment(this_rand, [cleaner], [params]): yield row pb.incr_and_errput()
def zscore_run(randomizer, randomizer_name): pb = ProgressBar(len(ZSCORES) * len(NOISES)) sys.stderr.write("Beginning zscore eval with %s randomization.\n" % randomizer_name) pb.errput() for percent_noise in NOISES: this_rand = lambda d: randomizer(d, percent_noise) cleaners = [zscore and RemoveDeviantRatings(zscore).scores or BaselineCleaner().scores for zscore in ZSCORES] parameters = [dict(cleaner='zscore', p=percent_noise, randomizer=randomizer_name, zscore=str(zscore)) for zscore in ZSCORES] for row in run_experiment(this_rand, cleaners, parameters): yield row pb.incr_and_errput()
def svd_run(randomizer, randomizer_name): pb = ProgressBar((K + 1) * len(NOISES)) sys.stderr.write("Beginning SVD eval with %s randomization.\n" % randomizer_name) pb.errput() for percent_noise in NOISES: this_rand = lambda d: randomizer(d, percent_noise) parameters = [{ 'cleaner': 'svd', 'p_noise': percent_noise, 'randomizer': randomizer_name, 'k': str(k) } for k in [None] + range(1, K + 1)] cleaners = [BaselineCleaner().scores] + [c.scores for c in create_svd_cleaners(K)] for row in run_experiment(this_rand, cleaners, parameters): yield row pb.incr_and_errput()
def fetch_image_urls(synset): data = fetch.fetch_data(MAPPING_URL % synset) image_mappings = [y.split() for y in data.split("\r\n") if y] return image_mappings def fetch_hypos(synset): data = fetch.fetch_data(HYPO_URL % synset) return data.replace("-", "").split("\r\n") pb = ProgressBar(len(synsets)) pb.errput() for synset in synsets: image_urls = fetch_image_urls(synset) if len(image_urls) == 0: children_synsets = fetch_hypos(synset) children_urls = [fetch_image_urls(cs) for cs in children_synsets] image_urls = [y for x in children_urls for y in x] for imgid, url in image_urls: print "%s\t%s\t%s" % (synset, imgid, url) pb.incr_and_errput()