# Read the Data Format as input if len(sys.argv) != 3: print "usage: python ranked_list_generation.py <EXPERIMENT_NAME> <REGION (e.g. chicago)>" exit(1) EXPERIMENT_NAME = sys.argv[1] REGION = sys.argv[2] DATA_DIR = "data" PARTITIONED_DATA_DIR = path.join(DATA_DIR, "partitioned_data", REGION) EXPERIMENT_DATA_DIR = path.join(DATA_DIR, "experiments", EXPERIMENT_NAME) RECOMMENDATION_DATA_DIR = path.join(EXPERIMENT_DATA_DIR, REGION, "recommendations") # Reading the Experiment Attributes EXPERIMENT_ATTS = read_experiment_atts(EXPERIMENT_DATA_DIR) # Iterate over the EXPERIMENT partitions for part in EXPERIMENT_ATTS['partitions']: print "Partition %s" % part # Define the DB partition dir and the RESULT partition dir db_partition_dir = path.join(PARTITIONED_DATA_DIR, "partition_%s" % part) result_partition_dir = path.join(RECOMMENDATION_DATA_DIR, "partition_%s" % part) # Define the partition ranks directory partition_rank_dir = path.join(result_partition_dir, "ranks") print "\tReading the mapping files..."
# Read the Data Format as input if len(sys.argv) != 3: print "usage: python ranked_list_generation.py <EXPERIMENT_NAME> <REGION (e.g. chicago)>" exit(1) EXPERIMENT_NAME = sys.argv[1] REGION = sys.argv[2] DATA_DIR = "data" PARTITIONED_DATA_DIR = path.join(DATA_DIR, "partitioned_data", REGION) EXPERIMENT_DATA_DIR = path.join(DATA_DIR, "experiments", EXPERIMENT_NAME) RECOMMENDATION_DATA_DIR = path.join(EXPERIMENT_DATA_DIR, REGION, "recommendations") # Reading the Experiment Attributes EXPERIMENT_ATTS = read_experiment_atts(EXPERIMENT_DATA_DIR) # Iterate over the EXPERIMENT partitions for part in EXPERIMENT_ATTS['partitions']: print "Partition %s" % part # Define the DB partition dir and the RESULT partition dir db_partition_dir = path.join(PARTITIONED_DATA_DIR, "partition_%s" % part) result_partition_dir = path.join(RECOMMENDATION_DATA_DIR, "partition_%s" % part) # Define the partition ranks directory partition_rank_dir = path.join(result_partition_dir, "ranks") print "\tReading the mapping files..." # Read the mapping user_id files map_event_ids = read_map_event_ids(db_partition_dir)
REGION = ARGS.region ALGORITHMS = ARGS.algorithms MAX_PARALLEL = ARGS.max_parallel DATA_DIR = "data" PARTITIONED_REGION_DATA_DIR = os.path.join(DATA_DIR, "partitioned_data", REGION) EXPERIMENT_DIR = os.path.join(DATA_DIR, "experiments", EXPERIMENT_NAME) REC_REGION_DATA_DIR = os.path.join(EXPERIMENT_DIR, REGION, "recommendations") REC_RESULT_DIR_NAME = "hybrid_models" LOGGER.info("HYBRID Algorithms") LOGGER.info(ALGORITHMS) # Read the experiment attributes PARTITIONS = read_experiment_atts(EXPERIMENT_DIR)["partitions"] DICT_ALG_RANKFILE = get_dict_alg_files(REGION) ENSEMBLE_LIST = sorted(DICT_ALG_RANKFILE.keys()) if MAX_PARALLEL > 1: # Define the Multiprocessing Pool (with size equals to CPU_COUNT -1) EXPERIMENT_POOL = multiprocessing.Pool(MAX_PARALLEL) # Starts the multiple processes EXPERIMENT_POOL.map(create_models_and_recommend, get_models_to_experiment(PARTITIONS, ALGORITHMS, PARTITIONED_REGION_DATA_DIR, REC_REGION_DATA_DIR, REC_RESULT_DIR_NAME)) else: for experiment_data in get_models_to_experiment(PARTITIONS, ALGORITHMS, PARTITIONED_REGION_DATA_DIR, REC_REGION_DATA_DIR, REC_RESULT_DIR_NAME):
EXPERIMENT_NAME = ARGS.experiment_name REGION = ARGS.region ALGORITHMS = ARGS.algorithms PARALLEL_EXECUTION = not ARGS.not_parallel DATA_DIR = "data" PARTITIONED_REGION_DATA_DIR = path.join(DATA_DIR, "partitioned_data", REGION) EXPERIMENT_DIR = path.join(DATA_DIR, "experiments", EXPERIMENT_NAME) EXPERIMENT_REGION_DATA_DIR = path.join(EXPERIMENT_DIR, REGION) LOGGER.info("Content-Based Algorithms") LOGGER.info(ALGORITHMS) # Read the experiment attributes PARTITIONS = read_experiment_atts(EXPERIMENT_DIR)["partitions"] if PARALLEL_EXECUTION: # Define the Multiprocessing Pool (with size equals to CPU_COUNT -1) EXPERIMENT_POOL = multiprocessing.Pool(multiprocessing.cpu_count() - 1) # Starts the multiple processes EXPERIMENT_POOL.map( create_models_and_recommend, get_models_to_experiment(PARTITIONS, ALGORITHMS, PARTITIONED_REGION_DATA_DIR, EXPERIMENT_REGION_DATA_DIR)) else: for experiment_data in get_models_to_experiment( PARTITIONS, ALGORITHMS, PARTITIONED_REGION_DATA_DIR, EXPERIMENT_REGION_DATA_DIR): create_models_and_recommend(experiment_data)