class SpiderMan(object): def __init__(self): self.number = 100 self.urlManager = UrlManager() self.downloader = Downloader() self.parser = Parser() self.dataManager = DataManager() self.dbManager = MongoUtils() def set_crawler_number(self, num): if num is None or int(num) < 0: return self.number = int(num) def crawler(self, root_url): self.urlManager.add_new_url(root_url) while (self.urlManager.has_new_url() and self.urlManager.old_urls_size() < self.number): try: new_url = self.urlManager.get_new_url() html = self.downloader.download(new_url) result = self.parser.parser(new_url, html) self.urlManager.add_new_urls(result[0]) self.dataManager.store_data(result[1]) except Exception as err: print("crawl failed" + err) self.dataManager.output_data() datas = self.dataManager.get_data() self.dbManager.insert_baike_many(datas)
def __init__(self): self.number = 100 self.urlManager = UrlManager() self.downloader = Downloader() self.parser = Parser() self.dataManager = DataManager() self.dbManager = MongoUtils()
def __init__(self, building, zones, start, end, window, lambda_val, non_controllable_data=None, debug=False): """ initialize instance variables :param building: (str) building name :param zones: [str] zone names :param start: (datetime timezone aware) :param end: (datetime timezone aware) :param window: (str) the interval in which to split the data. :param lambda_val: (float) lambda value for opjective function """ self.DataManager = DataManager(building, zones, start, end, window, non_controllable_data) self.start = start self.unix_start = start.timestamp() * 1e9 self.end = end self.unix_end = end.timestamp() * 1e9 self.window = window # timedelta string self.building = building self.zones = zones self.lambda_val = lambda_val self.debug = debug self.g = nx.DiGraph() # [TODO:Changed to MultiDiGraph... FIX print]
def __init__(self, urm_train, eurm=False): super(HybridGenRecommender, self).__init__(urm_train) self.data_folder = Path(__file__).parent.parent.absolute() self.eurm = eurm self.num_users = urm_train.shape[0] data = DataManager() urm_train = check_matrix(urm_train.copy(), 'csr') icm_price, icm_asset, icm_sub, icm_all = data.get_icm() ucm_age, ucm_region, ucm_all = data.get_ucm() recommender_1 = ItemKNNCBFRecommender(urm_train, icm_all) recommender_1.fit(shrink=40, topK=20, feature_weighting='BM25') # recommender_2 = UserKNNCBFRecommender(urm_train, ucm_all) # recommender_2.fit(shrink=500, topK=1600, normalize=True) recommender_2 = UserKNNCBFRecommender(urm_train, ucm_all) recommender_2.fit(shrink=1777, topK=1998, similarity='tversky', feature_weighting='BM25', tversky_alpha=0.1604953616, tversky_beta=0.9862348646) self.recommender_1 = recommender_1 self.recommender_2 = recommender_2
def upload(request): if request.method == "POST": cfg_path = handle_uploaded_file(request.FILES['file'], request.FILES['file'].name) dm = DataManager() dm.save_data(cfg_path) return redirect(homepage)
def post(self, request, *args, **kwargs): try: Log.debug('Create momend request: '+str(request.POST)) _owner = request.user if 'owner' in request.POST: if request.user.is_superuser: _owner = User.objects.get(pk=request.POST['owner']) _momend_name = request.POST['momend_name'] _privacy = request.POST['privacy_type'] _theme = request.POST['momend_theme'] _theme = 1 # TODO remove after showing theme selection combo _send_mail = request.POST.get('mail', True) # Send mail after create, default True dm = DataManager(_owner) try: _args = dict() if 'selected' in request.POST and request.POST['selected']: _args['is_date'] = False _args['selected'] = json.loads(request.POST['selected']) else: _args['is_date'] = True _args['selected'] = False _args['since'] = datetime.strptime(request.POST['start_date'], '%d %b, %Y').replace(tzinfo=pytz.UTC) _args['until'] = datetime.strptime(request.POST['finish_date'], '%d %b, %Y').replace(tzinfo=pytz.UTC) _create_params = request.POST.keys() for _param in _create_params: if '-active' in _param: # Provider disable requests _args[_param] = request.POST[_param] if 'friends' in request.POST and len(request.POST['friends']) > 0: _args['friends'] = request.POST['friends'].split(',') else: _args['friends'] = False if 'chronological' in request.POST: _args['chronological'] = request.POST['chronological'] else: _args['chronological'] = False _cryptic_id = dm.create_momend(name=_momend_name, duration=60, privacy=_privacy, theme=Theme.objects.get(pk=_theme), send_mail=_send_mail, **_args) if _cryptic_id: # If created momend successfully return _generate_json_response(True, 'Create momend request received', cid=_cryptic_id) _error_msg = dm.get_last_status() return _generate_json_response(False, 'Create momend failed with error message: '+str(_error_msg), _error_msg) except Exception as e: _log_critical_error('Exception while creating the momend', True, request.POST, request.user, traceback.format_exc()) return _generate_json_response(False, 'Error while creating momend: '+str(e), 'An error occurred. Please try again') # Could be any error except KeyError as e: Log.error(traceback.format_exc()) return _generate_json_response(False, 'Error while creating momend: '+str(e), str(e)) # One of the parameters is missing except Exception as e: _log_critical_error('Impossible exception occurred while creating momend', True, request.POST, request.user, traceback.format_exc()) return _generate_json_response(False, 'Error while creating momend (impossible): '+str(e), 'An error occurred. Please try again after a while')
def __init__(self, building, zones, temperatures, start, end, window, last_temperatures=None, suppress_not_enough_data_error=False, non_controllable_data=None): self.temperatures = temperatures self.last_temperatures = last_temperatures self.indoor_temperature_prediction_stub = xsg.get_indoor_temperature_prediction_stub( ) self.error = {} self.building = building self.zones = zones self.start = start self.end = end self.DataManager = DataManager(building, zones, start, end, window, non_controllable_data) self.timesteps = {iter_zone: 0 for iter_zone in zones} for iter_zone in zones: for action in [ xsg.NO_ACTION, xsg.HEATING_ACTION, xsg.COOLING_ACTION ]: try: END = datetime.datetime(year=2019, month=4, day=1).replace( tzinfo=pytz.utc ) # datetime.datetime.utcnow().replace(tzinfo=pytz.utc) START = END - datetime.timedelta(days=365) raise Exception("ERROR: Not Implemented.") # mean, var, unit = xsg.get_indoor_temperature_prediction_error(self.indoor_temperature_prediction_stub, # building, # iter_zone, # action, START, END) except: if not suppress_not_enough_data_error: raise Exception( "ERROR: Tstat for building: '{0}' and zone: '{1}' did not receive error data from " "indoor_temperature_prediction microservice for action: '{2}'." ) print( "WARNING: Tstat for building: '{0}' and zone: '{1}' did not receive error data from " "indoor_temperature_prediction microservice for action: '{2}' and is now using STANDARD error." .format(building, iter_zone, action)) mean, var, unit = DigitalTwin.STANDARD_MEAN, DigitalTwin.STANDARD_VAR, DigitalTwin.STANDARD_UNIT self.error[action] = {"mean": mean, "var": var}
def __init__(self, urm_train): super(HybridZeroRecommender, self).__init__(urm_train) urm_train = check_matrix(urm_train.copy(), 'csr') data = DataManager() ucm_age, ucm_region, ucm_all = data.get_ucm() recommender_1 = TopPop(urm_train) recommender_1.fit() # recommender_2 = UserKNNCBFRecommender(urm_train, ucm_all) recommender_2.fit(shrink=500, topK=1600, similarity='tversky') self.recommender_1 = recommender_1 self.recommender_2 = recommender_2
def __init__(self, building, zones, lambda_val, start, end, forecasting_horizon, window, tstats, non_contrallable_data=None): """ :param building: :param zones: :param lambda_val: :param start: datetime with timezone :param end: datetime with timezone :param forecasting_horizon: :param window: :param tstats: :param non_contrallable_data: """ assert xsg.get_window_in_sec( forecasting_horizon) % xsg.get_window_in_sec(window) == 0 self.building = building self.zones = zones self.window = window self.lambda_val = lambda_val self.forecasting_horizon = forecasting_horizon self.delta_forecasting_horizon = datetime.timedelta( seconds=xsg.get_window_in_sec(forecasting_horizon)) self.delta_window = datetime.timedelta( seconds=xsg.get_window_in_sec(window)) # Simulation end is when current_time reaches end and end will become the end of our data. self.simulation_end = end end += self.delta_forecasting_horizon self.DataManager = DataManager(building, zones, start, end, window, non_contrallable_data) self.tstats = tstats # dictionary of simulator object with key zone. has functions: current_temperature, next_temperature(action) self.current_time = start self.current_time_step = 0 self.actions = {iter_zone: [] for iter_zone in self.zones} # {zone: [ints]} self.temperatures = { iter_zone: [self.tstats[iter_zone].temperature] for iter_zone in self.zones } # {zone: [floats]}
def get(request): # Check on uploading configuration file if glob.glob(BASE_DIR + "/uploads/*.csv") == []: raise Http404("Please, upload config file.") if request.method == "GET": # To manage the issuance of banners dm = DataManager() if "category" in request.GET: categories = request.GET["category"] good_pks = [] # pks are Primary Keys, actually database indexes # We do not show banners that have run out of paid shows for obj in Banner.objects.filter( prepaid_shows_amount__gt=0).order_by( "-prepaid_shows_amount"): # If at least one category matches if dm.matching_values(obj.categories.split(","), categories) > 0: # Save his database primary key good_pks.append(obj.pk) # Matching banners exist if good_pks != []: sorted_psa = [ Banner.objects.get(pk=good_pk).prepaid_shows_amount for good_pk in good_pks ] # Banner to be shown banner_pk = dm.random_pick_from_given_distribution( good_pks, sorted_psa) # If used banner_pk = dm.was_used(banner_pk, good_pks) banner_to_show = Banner.objects.get(pk=banner_pk) else: # Select random banner banner_to_show = dm.select_random_banner(Banner) else: # Select random banner banner_to_show = dm.select_random_banner(Banner) banner_to_show.prepaid_shows_amount -= 1 banner_to_show.save() return render(request, 'get/get.html', { "banner_url": banner_to_show.url, "categories": banner_to_show.categories })
from Hybrid.HybridGenRecommender import HybridGenRecommender from Hybrid.HybridNormRecommender import HybridNormRecommender from GraphBased.RP3betaRecommender import RP3betaRecommender from FeatureWeighting.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg from Hybrid.HybridNorm1Recommender import HybridNorm1Recommender from Hybrid.HybridNorm2Recommender import HybridNorm2Recommender from Hybrid.HybridGen2Recommender import HybridGen2Recommender from MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_BPR_Cython data_folder = Path(__file__).parent.absolute() from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg from Hybrid.HybridNorm3Recommender import HybridNorm3Recommender from MatrixFactorization.ALSRecommender import ALSRecommender from MatrixFactorization.BPRRecommender import BPRRecommender import similaripy as sim data = DataManager() urm_train = data.get_urm() urm_train, urm_test = split_train_leave_k_out_user_wise(data.get_urm(), temperature='normal') urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train, temperature='valid2') urm_train_warm = data.create_test_warm_users(urm_train, threshold=10) urm_test_warm = data.create_test_warm_users(urm_test, threshold=10) evaluator_test_warm = EvaluatorHoldout(urm_test_warm, cutoff_list=[10]) recommender = UserKNNCFRecommender(urm_train) recommender.fit(shrink=2, topK=600, normalize=True)
class DigitalTwin: STANDARD_MEAN = 0 STANDARD_VAR = 0.05 STANDARD_UNIT = "F" def __init__(self, building, zones, temperatures, start, end, window, last_temperatures=None, suppress_not_enough_data_error=False, non_controllable_data=None): self.temperatures = temperatures self.last_temperatures = last_temperatures self.indoor_temperature_prediction_stub = xsg.get_indoor_temperature_prediction_stub( ) self.error = {} self.building = building self.zones = zones self.start = start self.end = end self.DataManager = DataManager(building, zones, start, end, window, non_controllable_data) self.timesteps = {iter_zone: 0 for iter_zone in zones} for iter_zone in zones: for action in [ xsg.NO_ACTION, xsg.HEATING_ACTION, xsg.COOLING_ACTION ]: try: END = datetime.datetime(year=2019, month=4, day=1).replace( tzinfo=pytz.utc ) # datetime.datetime.utcnow().replace(tzinfo=pytz.utc) START = END - datetime.timedelta(days=365) raise Exception("ERROR: Not Implemented.") # mean, var, unit = xsg.get_indoor_temperature_prediction_error(self.indoor_temperature_prediction_stub, # building, # iter_zone, # action, START, END) except: if not suppress_not_enough_data_error: raise Exception( "ERROR: Tstat for building: '{0}' and zone: '{1}' did not receive error data from " "indoor_temperature_prediction microservice for action: '{2}'." ) print( "WARNING: Tstat for building: '{0}' and zone: '{1}' did not receive error data from " "indoor_temperature_prediction microservice for action: '{2}' and is now using STANDARD error." .format(building, iter_zone, action)) mean, var, unit = DigitalTwin.STANDARD_MEAN, DigitalTwin.STANDARD_VAR, DigitalTwin.STANDARD_UNIT self.error[action] = {"mean": mean, "var": var} def next_temperature(self, action, zone): # data prep for updating temperatures curr_other_zone_temperatures = self.DataManager.all_zone_temperature_data.iloc[ self.timesteps[zone]] for iter_zone_2 in self.zones: curr_other_zone_temperatures[iter_zone_2] = self.temperatures[ iter_zone_2] for i in range(3): # new_temperature = xsg.get_indoor_temperature_prediction(self.DataManager.indoor_temperature_prediction_stub, # self.building, # zone, # self.start, # action, # self.temperatures[zone], # self.DataManager.outdoor_temperature.iloc[ # self.timesteps[zone]], # self.last_temperatures[zone], # curr_other_zone_temperatures)[0] new_temperature = self.DataManager.get_indoor_temperature_prediction( self.building, zone, self.start, action, self.temperatures[zone], self.DataManager.outdoor_temperature.iloc[ self.timesteps[zone]], self.last_temperatures[zone], curr_other_zone_temperatures)[0] self.last_temperatures[zone] = self.temperatures[zone] self.temperatures[zone] = new_temperature self.temperatures[zone] += np.random.normal(self.error[action]["mean"], self.error[action]["var"]) self.timesteps[zone] += 1 return self.temperatures[zone] def reset(self, temperature, zone, last_temperature=None): self.temperatures[zone] = temperature self.last_temperatures[zone] = last_temperature self.timesteps[zone] = 0
from GraphBased.RP3betaRecommender import RP3betaRecommender from FeatureWeighting.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg from Hybrid.HybridNorm1Recommender import HybridNorm1Recommender from Hybrid.HybridNorm2Recommender import HybridNorm2Recommender from Hybrid.HybridGen2Recommender import HybridGen2Recommender from MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_BPR_Cython data_folder = Path(__file__).parent.absolute() from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg from Hybrid.HybridNorm3Recommender import HybridNorm3Recommender from MatrixFactorization.ALSRecommender import ALSRecommender from MatrixFactorization.BPRRecommender import BPRRecommender import similaripy as sim test = False threshold = 1 temperature = 'normal' Data = DataManager() urm_train = Data.get_urm() valid = False multiple_test = True num_test = 3 if multiple_test: # res = np.zeros(num_test) # for i in np.arange(num_test): # Data = DataManager() # urm_train = Data.get_urm() # urm_train, urm_test = split_train_leave_k_out_user_wise(urm_train, threshold=threshold, temperature=temperature) # urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train, threshold=threshold, temperature='valid') # evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10]) # evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) #
def __init__(self, env_config): self.DataManager = DataManager(env_config["building"], env_config["zones"], env_config["start"], env_config["end"], env_config["window"]) self.start = start self.unix_start = start.timestamp() * 1e9 self.end = end self.unix_end = end.timestamp() * 1e9 self.window = window # timedelta string self.building = building self.zones = zones self.lambda_val = env_config["lambda_val"] # assert self.zones == all zones in building. this is because of the thermal model needing other zone temperatures. self.curr_timestep = 0 self.indoor_starting_temperatures = env_config[ "indoor_starting_temperatures"] # to get starting temperatures [last, current] self.outdoor_starting_temperature = env_config[ "outdoor_starting_temperature"] self.tstats = {} for iter_zone in self.zones: self.tstats[iter_zone] = Tstat( self.building, iter_zone, self.indoor_starting_temperatures[iter_zone]["current"], last_temperature=self.indoor_starting_temperatures[iter_zone] ["last"]) assert 60 * 60 % xsg.get_window_in_sec( self.window) == 0 # window divides an hour assert (self.end - self.start).total_seconds() % xsg.get_window_in_sec( self.window) == 0 # window divides the timeframe # the number of timesteps self.num_timesteps = int((self.end - self.start).total_seconds() / xsg.get_window_in_sec(self.window)) self.unit = env_config["unit"] assert self.unit == "F" # all zones current and last temperature = 2*num_zones # building outside temperature -> make a class for how this behaves = 1 # timestep -> do one hot encoding of week, day, hour, window \approx 4 + 7 + 24 + 60*60 / window low_bound = [32] * 2 * len( self.zones ) # we could use parametric temperature bounds... for now we will give negative inft reward low_bound += [-100] # for outside temperature we cannot gurantee much high_bound = [100] * 2 * len(self.zones) high_bound += [200] # for outside temperature we cannot gurantee much low_bound += [0] * ( self.num_timesteps + 1 ) # total timesteps plus the final timestep which wont be executed high_bound += [1] * ( self.num_timesteps + 1 ) # total timesteps plus the final timestep which wont be executed self.observation_space = Box(low=np.array(low_bound), high=np.array(high_bound), dtype=np.float32) self.action_space = Tuple((Discrete(3), ) * len(self.zones)) self.reset()
from ParameterTuning.SearchBayesianSkopt import SearchBayesianSkopt from ParameterTuning.SearchAbstractClass import SearchInputRecommenderArgs from skopt.space import Real, Integer, Categorical from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender from KNN.UserKNNCBFRecommender import UserKNNCBFRecommender from KNN.UserKNNCFRecommender import UserKNNCFRecommender import numpy as np import scipy.sparse as sps from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg from Hybrid.HybridGen2Recommender import HybridGen2Recommender from Hybrid.HybridNormRecommender import HybridNormRecommender from Hybrid.HybridNorm1Recommender import HybridNorm1Recommender from Hybrid.HybridNorm2Recommender import HybridNorm2Recommender Data = DataManager() urm_train, urm_test = split_train_leave_k_out_user_wise(Data.get_urm(), threshold=10, temperature='normal') urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train, threshold=10, temperature='valid') evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) recommender = HybridNorm1Recommender # recommender_3 = UserKNNCFRecommender(urm_train) # recommender_3.fit(shrink=2, topK=600, normalize=True) # w_sparse = recommender_3.W_sparse parameterSearch = SearchBayesianSkopt(recommender, evaluator_validation=evaluator_valid,
from skopt.space import Real, Integer, Categorical from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender from KNN.UserKNNCBFRecommender import UserKNNCBFRecommender from KNN.UserKNNCFRecommender import UserKNNCFRecommender import numpy as np import scipy.sparse as sps from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg from Hybrid.HybridGen2Recommender import HybridGen2Recommender from MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_BPR_Cython from MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_AsySVD_Cython from MatrixFactorization.Cython.MatrixFactorization_Cython import MatrixFactorization_FunkSVD_Cython from Hybrid.HybridNormRecommender import HybridNormRecommender from GraphBased.RP3betaRecommender import RP3betaRecommender from SLIM_ElasticNet.SLIMElasticNetRecommender2 import MultiThreadSLIM_ElasticNet Data = DataManager() urm_train, urm_test = split_train_leave_k_out_user_wise(Data.get_urm(), threshold=10, temperature='normal') urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train, threshold=10, temperature='valid') urm_valid = Data.create_test_warm_users(urm_valid, threshold=3) evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) recommender = MultiThreadSLIM_ElasticNet # recommender = MatrixFactorization_FunkSVD_Cython # recommender = MatrixFactorization_AsySVD_Cython # recommender = MatrixFactorization_BPR_Cython
from lightfm import LightFM from DataManager.DataManager import DataManager from lightfm.evaluation import auc_score from DataManager.split_train_validation_leave_k_out import split_train_leave_k_out_user_wise from Base.Evaluation.Evaluator import EvaluatorHoldout from Hybrid.LighFm import LighFMRecommender import matplotlib.pyplot as pyplot from sklearn import metrics, ensemble import numpy as np data = DataManager() urm = data.get_urm() threshold = 10 temperature = 'normal' ucm_age, ucm_region, ucm_all = data.get_ucm() icm_price, icm_asset, icm_sub, icm_all = data.get_icm() urm_train, urm_test = split_train_leave_k_out_user_wise( urm, threshold=threshold, temperature=temperature) urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train, threshold=threshold, temperature='valid') evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10]) # recommender = LighFMRecommender(urm_train, # no_components=150, # loss='warp', # learning_rate=0.09, # random_state=2019) #
""" This file demonstrates writing tests using the unittest module. These will pass when you run "manage.py test". Replace this with more appropriate tests for your application. """ from django.test import TestCase from django.contrib.auth.models import User from DataManager.DataManager import DataManager from DataManager.models import Provider, RawData from DataManager.DataEnrich.DataEnrichManager import DataEnrichManager from datetime import datetime ert = User.objects.get(username='******') dm = DataManager(ert) start = datetime.strptime('2012-01-01','%Y-%m-%d') end = datetime.strptime('2012-05-01','%Y-%m-%d') dm.create_momend(start,end,50)
from KNN.UserKNNCBFRecommender import UserKNNCBFRecommender from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender import pandas as pd from DataManager.split_train_validation_leave_k_out import split_train_leave_k_out_user_wise from FeatureWeighting.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg from Utils.s_plus import dot_product import scipy.sparse as sps from Base.Evaluation.Evaluator import EvaluatorHoldout from GraphBased.RP3betaRecommender import RP3betaRecommender from ParameterTuning.SearchBayesianSkopt import SearchBayesianSkopt from skopt.space import Real, Integer, Categorical from ParameterTuning.SearchAbstractClass import SearchInputRecommenderArgs data = DataManager() ucm_age, ucm_region, ucm_all = data.get_ucm() icm_price, icm_asset, icm_sub, icm_all = data.get_icm() recommender_4 = UserKNNCFRecommender(data.get_urm()) recommender_4.fit(shrink=2, topK=600, normalize=True) W_sparse_CF = recommender_4.W_sparse cfw = User_CFW_D_Similarity_Linalg(URM_train=data.get_urm(), UCM=ucm_all.copy(), S_matrix_target=W_sparse_CF )
from KNN.UserKNNCBFRecommender import UserKNNCBFRecommender from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender import pandas as pd from DataManager.split_train_validation_leave_k_out import split_train_leave_k_out_user_wise from FeatureWeighting.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg from Utils.s_plus import dot_product import scipy.sparse as sps from Base.Evaluation.Evaluator import EvaluatorHoldout from GraphBased.RP3betaRecommender import RP3betaRecommender from ParameterTuning.SearchBayesianSkopt import SearchBayesianSkopt from skopt.space import Real, Integer, Categorical from ParameterTuning.SearchAbstractClass import SearchInputRecommenderArgs Data = DataManager() ucm_age, ucm_region, ucm_all = Data.get_ucm() icm_price, icm_asset, icm_sub, icm_all = Data.get_icm() urm_train, urm_test = split_train_leave_k_out_user_wise(Data.get_urm(), temperature='normal') urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train, temperature='valid') evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10]) recommender_4 = RP3betaRecommender(urm_train) recommender_4.fit(topK=16, alpha=0.03374950051351756,
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ # dataReader = Movielens10MReader() # # URM_train = dataReader.get_URM_train() # URM_validation = dataReader.get_URM_validation() # URM_test = dataReader.get_URM_test() Data = DataManager() urm_temp, urm_test = split_train_leave_k_out_user_wise( Data.get_urm(), use_validation_set=False, leave_random_out=True) urm_train, urm_valid = split_train_leave_k_out_user_wise( urm_temp, use_validation_set=False, leave_random_out=True) output_folder_path = "result_experiments/" # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ # Random, # TopPop, # P3alphaRecommender, # RP3betaRecommender, # ItemKNNCFRecommender, # UserKNNCFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, # PureSVDRecommender, # SLIM_BPR_Cython, # SLIMElasticNetRecommender SLIMElasticNetRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(urm_valid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=urm_train, metric_to_optimize="MAP", n_cases=10, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, similarity_type_list=["cosine"], parallelizeKNN=False) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc()
import numpy as np from DataManager.DataManager import DataManager import scipy.sparse as sps data = DataManager() _, ucm_all, _ = data.get_ucm() features_per_user = np.ediff1d(ucm_all.indptr) ucm_all = sps.csc_matrix(ucm_all) users_per_feature = np.ediff1d(ucm_all.indptr) features_per_users = np.sort(features_per_user) # users_per_feature = np.sort(users_per_feature) print(features_per_users.shape) print(users_per_feature.shape) import matplotlib.pyplot as pyplot pyplot.plot(users_per_feature, 'ro') pyplot.ylabel('Num features ') pyplot.xlabel('Users Index') pyplot.show()
class MPC: """MPC Optimizer. No Demand Charges and Two Stage actions implemented.""" def __init__(self, building, zones, start, end, window, lambda_val, non_controllable_data=None, debug=False): """ initialize instance variables :param building: (str) building name :param zones: [str] zone names :param start: (datetime timezone aware) :param end: (datetime timezone aware) :param window: (str) the interval in which to split the data. :param lambda_val: (float) lambda value for opjective function """ self.DataManager = DataManager(building, zones, start, end, window, non_controllable_data) self.start = start self.unix_start = start.timestamp() * 1e9 self.end = end self.unix_end = end.timestamp() * 1e9 self.window = window # timedelta string self.building = building self.zones = zones self.lambda_val = lambda_val self.debug = debug self.g = nx.DiGraph() # [TODO:Changed to MultiDiGraph... FIX print] def safety_check(self, node): for iter_zone in self.zones: curr_temperature = node.temperatures[iter_zone] curr_safety = self.DataManager.do_not_exceed[iter_zone].iloc[ node.timestep] if not (curr_safety["t_low"] <= curr_temperature <= curr_safety["t_high"]): return False return True def timestep_to_datetime(self, timestep): return self.start + timestep * datetime.timedelta( seconds=xsg.get_window_in_sec(self.window)) # the shortest path algorithm def shortest_path(self, root): """ Creates the graph using DFS and calculates the shortest path :param root: node being examined right now and needs to be added to graph. :return: root Node if root added else return None. """ if root is None: return None if root in self.g: return root # stop if node is past predictive horizon if self.timestep_to_datetime(root.timestep) >= self.end: self.g.add_node(root, objective_cost=0, best_action=None, best_successor=None) # no cost as leaf node return root # check if valid node if not self.safety_check(root): return None self.g.add_node(root, objective_cost=np.inf, best_action=None, best_successor=None) # creating children, adding corresponding edge and updating root's objective cost for action in itertools.product( [xsg.NO_ACTION, xsg.HEATING_ACTION, xsg.COOLING_ACTION], repeat=len(self.zones)): # TODO Compute temperatures properly temperatures = {} zone_actions = {} for i in range(len(self.zones)): zone_actions[self.zones[i]] = action[i] temperatures[self.zones[i]] = root.temperatures[self.zones[i]] + \ 1 * (action[i] == 1) - 1 * (action[i] == 2) # Create child node and call the shortest_path recursively on it child_node = Node(temperatures=temperatures, timestep=root.timestep + 1) child_node = self.shortest_path(child_node) if child_node is None: continue # get discomfort across edge discomfort = {} for iter_zone in self.zones: curr_comfortband = self.DataManager.comfortband[ iter_zone].iloc[root.timestep] curr_occupancy = self.DataManager.occupancy[iter_zone].iloc[ root.timestep] average_edge_temperature = ( root.temperatures[iter_zone] + child_node.temperatures[iter_zone]) / 2. discomfort[iter_zone] = self.DataManager.get_discomfort( self.building, average_edge_temperature, curr_comfortband["t_low"], curr_comfortband["t_high"], curr_occupancy) # Get consumption across edge price = 1 # self.prices.iloc[root.timestep] TODO also add right unit conversion, and duration consumption_cost = { self.zones[i]: price * self.DataManager.hvac_consumption[self.zones[i]][action[i]] for i in range(len(self.zones)) } # add edge self.g.add_edge(root, child_node, action=zone_actions, discomfort=discomfort, consumption_cost=consumption_cost) # update root node to contain the best child. total_edge_cost = ((1 - self.lambda_val) * (sum(consumption_cost.values()))) + ( self.lambda_val * (sum(discomfort.values()))) objective_cost = self.g.node[child_node][ "objective_cost"] + total_edge_cost if objective_cost < self.g.node[root]["objective_cost"]: self.g.node[root]["objective_cost"] = objective_cost self.g.node[root]["best_action"] = zone_actions self.g.node[root]["best_successor"] = child_node return root def reconstruct_path(self, root): """ Util function that reconstructs the best action path Parameters ---------- graph : networkx graph Returns ------- List """ graph = self.g if root not in self.g: raise Exception("Root does not exist in MPC graph.") path = [root] while graph.node[root]['best_successor'] is not None: root = graph.node[root]['best_successor'] path.append(root) return path # def g_plot(self, zone): # try: # os.remove('mpc_graph_' + zone + '.html') # except OSError: # pass # fig = plotly_figure(self.advise_unit.g, path=self.path) # py.plot(fig, filename='mpc_graph_' + zone + '.html', auto_open=False) def advise(self, starting_temperatures): """Call this function to get best action. :param starting_temperatures: dict {zone: float temperature} :return: action, err """ root = Node(starting_temperatures, 0) root = self.shortest_path(root) if root is None: return None, "Could not find feasible action." return self.g.node[root]["best_action"], None
def __init__(self, urm_train, eurm=False): super(HybridGen2Recommender, self).__init__(urm_train) self.eurm = eurm self.data_folder = Path(__file__).parent.parent.absolute() self.num_users = urm_train.shape[0] urm_train = check_matrix(urm_train.copy(), 'csr') data = DataManager() if Path(self.data_folder / 'Data/icm_sparse.npz').is_file() and self.eurm: print("Previous icm_sparse found") else: _, _, _, icm_all = data.get_icm() args = { 'topK': 6, 'shrink': 5, 'feature_weighting': 'TF-IDF', 'similarity': 'cosine', 'normalize': False } recommender_1 = ItemKNNCBFRecommender(urm_train, icm_all) recommender_1.fit(**args) self.recommender_1 = recommender_1 if Path(self.data_folder / 'Data/ucm_sparse.npz').is_file() and self.eurm: print("Previous ucm_sparse found") else: ucm_age, ucm_region, ucm_all = data.get_ucm() recommender_2 = UserKNNCBFRecommender(urm_train, ucm_all) recommender_2.fit(shrink=1777, topK=1998, similarity='tversky', feature_weighting='BM25', tversky_alpha=0.1604953616, tversky_beta=0.9862348646) self.recommender_2 = recommender_2 if self.eurm: beta = 0.6 if Path(self.data_folder / 'Data/icm_sparse.npz').is_file(): self.score_matrix_1 = sps.load_npz(self.data_folder / 'Data/icm_sparse.npz') else: print("icm_sparse not found, create new one...") self.score_matrix_1 = self.recommender_1._compute_item_matrix_score( np.arange(self.num_users)) user_score_matrix_1 = normalize(self.score_matrix_1, norm='max', axis=1) item_score_matrix_1 = normalize(self.score_matrix_1.tocsc(), norm='max', axis=0) self.score_matrix_1 = item_score_matrix_1 * beta + user_score_matrix_1.tocsr( ) * (1 - beta) sps.save_npz(self.data_folder / 'Data/icm_sparse.npz', self.score_matrix_1) if Path(self.data_folder / 'Data/ucm_sparse.npz').is_file(): self.score_matrix_2 = sps.load_npz(self.data_folder / 'Data/ucm_sparse.npz') else: print("ucm_sparse not found, create new one...") self.score_matrix_2 = self.recommender_2._compute_item_matrix_score( np.arange(self.num_users)) user_score_matrix_2 = normalize(self.score_matrix_2, norm='max', axis=1) item_score_matrix_2 = normalize(self.score_matrix_2.tocsc(), norm='max', axis=0) self.score_matrix_2 = item_score_matrix_2 * beta + user_score_matrix_2.tocsr( ) * (1 - beta) sps.save_npz(self.data_folder / 'Data/ucm_sparse.npz', self.score_matrix_2)
import numpy as np import h5py from DataManager.DataManager import DataManager from Utilities.utilities import extract_NIFTI, extract_FigShare import matplotlib matplotlib.use('TkAgg') from matplotlib import pyplot as plt from DataManager.FeatureExtractor import * dataManager = DataManager( r'C:/Users/eee/workspace_python/Image Reconstruction/data/', ['ADNI']) ''' #Example to extract FigShare Dataset dataManager = DataManager(r'C:/Users/eee/workspace_python/Image Reconstruction/data/', ['FigShare']) params = {'database_name': 'fig_share_data', 'dataset': 'FigShare', 'feature_option': 'image_and_k_space', 'img_shape': 128, 'num_subjects': 'all'} print(len(dataManager.dataCollection['FigShare'])) print(len(dataManager.data_splits['FigShare'][0])) print(len(dataManager.data_splits['FigShare'][1])) print(len(dataManager.data_splits['FigShare'][2])) dataManager.compile_dataset(params) ''' #Example to extract BRATS Dataset # dataManager = DataManager(r'C:/Users/eee/workspace_python/Image Reconstruction/data/', ['BRATS'])
verbose=False) MAP_per_k_valid = [] recommender = HybridNorm3Recommender(urm_train) for alpha in tqdm(x_tick): recommender.fit(beta=alpha) result_dict, res_str = evaluator_valid.evaluateRecommender(recommender) MAP_per_k_valid.append(result_dict[10]["MAP"]) return MAP_per_k_valid data = DataManager() my_input = [] for i in np.arange(num_test): urm_train, urm_test = split_train_leave_k_out_user_wise( data.get_urm(), temperature='normal') urm_train, urm_valid = split_train_leave_k_out_user_wise( urm_train, temperature='valid') # urm_test = data.create_test_warm_users(urm_test, threshold=5) urm_valid = data.create_test_warm_users(urm_valid, threshold=3) my_input.append([urm_train, urm_test, urm_valid, x_tick]) from multiprocessing import Pool
def __init__(self, urm_train, eurm=False): super(HybridNormOrigRecommender, self).__init__(urm_train) self.data_folder = Path(__file__).parent.parent.absolute() self.eurm = eurm self.num_users = urm_train.shape[0] data = DataManager() urm_train = check_matrix(urm_train.copy(), 'csr') icm_price, icm_asset, icm_sub, icm_all = data.get_icm() ucm_age, ucm_region, ucm_all = data.get_ucm() recommender_1 = ItemKNNCBFRecommender(urm_train, icm_all) recommender_1.fit(shrink=40, topK=20, feature_weighting='BM25') recommender_7 = UserKNNCBFRecommender(urm_train, ucm_all) recommender_7.fit(shrink=1777, topK=1998, similarity='tversky', feature_weighting='BM25', tversky_alpha=0.1604953616, tversky_beta=0.9862348646) # recommender_1 = HybridGenRecommender(urm_train, eurm=self.eurm) # recommender_1.fit() # recommender_2 = ItemKNNCFRecommender(urm_train) # recommender_2.fit(shrink=30, topK=20) recommender_2 = ItemKNNCFRecommender(urm_train) recommender_2.fit(topK=5, shrink=500, feature_weighting='BM25', similarity='tversky', normalize=False, tversky_alpha=0.0, tversky_beta=1.0) recommender_3 = UserKNNCFRecommender(urm_train) recommender_3.fit(shrink=2, topK=600, normalize=True) # recommender_3 = UserKNNCFRecommender(urm_train) # recommender_3.fit(topK=697, shrink=1000, feature_weighting='TF-IDF', similarity='tversky', normalize=False, # tversky_alpha=1.0, tversky_beta=1.0) recommender_4 = RP3betaRecommender(urm_train) recommender_4.fit(topK=16, alpha=0.03374950051351756, beta=0.24087176329409027, normalize_similarity=False) recommender_5 = SLIM_BPR_Cython(urm_train) recommender_5.fit(lambda_i=0.0926694015, lambda_j=0.001697250, learning_rate=0.002391, epochs=65, topK=200) recommender_6 = ALSRecommender(urm_train) recommender_6.fit(alpha=5, iterations=40, reg=0.3) self.recommender_1 = recommender_1 self.recommender_2 = recommender_2 self.recommender_3 = recommender_3 self.recommender_4 = recommender_4 self.recommender_5 = recommender_5 self.recommender_6 = recommender_6 self.recommender_7 = recommender_7 if self.eurm: if Path(self.data_folder / 'Data/uicm_orig_sparse.npz').is_file(): print("Previous uicm_sparse found") self.score_matrix_1 = sps.load_npz(self.data_folder / 'Data/uicm_sparse.npz') else: print("uicm_sparse not found, create new one...") self.score_matrix_1 = self.recommender_1._compute_item_matrix_score( np.arange(self.num_users)) sps.save_npz(self.data_folder / 'Data/uicm_orig_sparse.npz', self.score_matrix_1) self.score_matrix_2 = self.recommender_2._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_3 = self.recommender_3._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_4 = self.recommender_4._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_5 = self.recommender_5._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_6 = self.recommender_6._compute_item_score( np.arange(self.num_users)) self.score_matrix_1 = normalize(self.score_matrix_2, norm='max', axis=1) self.score_matrix_2 = normalize(self.score_matrix_2, norm='max', axis=1) self.score_matrix_3 = normalize(self.score_matrix_3, norm='max', axis=1) self.score_matrix_4 = normalize(self.score_matrix_4, norm='max', axis=1) self.score_matrix_5 = normalize(self.score_matrix_5, norm='max', axis=1) self.score_matrix_6 = normalize(self.score_matrix_6, norm='max', axis=1)
class BuildingEnv(gym.Env): def __init__(self, env_config): self.DataManager = DataManager(env_config["building"], env_config["zones"], env_config["start"], env_config["end"], env_config["window"]) self.start = start self.unix_start = start.timestamp() * 1e9 self.end = end self.unix_end = end.timestamp() * 1e9 self.window = window # timedelta string self.building = building self.zones = zones self.lambda_val = env_config["lambda_val"] # assert self.zones == all zones in building. this is because of the thermal model needing other zone temperatures. self.curr_timestep = 0 self.indoor_starting_temperatures = env_config[ "indoor_starting_temperatures"] # to get starting temperatures [last, current] self.outdoor_starting_temperature = env_config[ "outdoor_starting_temperature"] self.tstats = {} for iter_zone in self.zones: self.tstats[iter_zone] = Tstat( self.building, iter_zone, self.indoor_starting_temperatures[iter_zone]["current"], last_temperature=self.indoor_starting_temperatures[iter_zone] ["last"]) assert 60 * 60 % xsg.get_window_in_sec( self.window) == 0 # window divides an hour assert (self.end - self.start).total_seconds() % xsg.get_window_in_sec( self.window) == 0 # window divides the timeframe # the number of timesteps self.num_timesteps = int((self.end - self.start).total_seconds() / xsg.get_window_in_sec(self.window)) self.unit = env_config["unit"] assert self.unit == "F" # all zones current and last temperature = 2*num_zones # building outside temperature -> make a class for how this behaves = 1 # timestep -> do one hot encoding of week, day, hour, window \approx 4 + 7 + 24 + 60*60 / window low_bound = [32] * 2 * len( self.zones ) # we could use parametric temperature bounds... for now we will give negative inft reward low_bound += [-100] # for outside temperature we cannot gurantee much high_bound = [100] * 2 * len(self.zones) high_bound += [200] # for outside temperature we cannot gurantee much low_bound += [0] * ( self.num_timesteps + 1 ) # total timesteps plus the final timestep which wont be executed high_bound += [1] * ( self.num_timesteps + 1 ) # total timesteps plus the final timestep which wont be executed self.observation_space = Box(low=np.array(low_bound), high=np.array(high_bound), dtype=np.float32) self.action_space = Tuple((Discrete(3), ) * len(self.zones)) self.reset() def reset(self): self.curr_timestep = 0 for iter_zone in self.zones: self.tstats[iter_zone].reset( self.indoor_starting_temperatures[iter_zone]["current"], last_temperature=self.indoor_starting_temperatures[iter_zone] ["last"]) self.outdoor_temperature = self.outdoor_starting_temperature return self.create_curr_obs() # obs def step(self, action): self.curr_timestep += 1 # if we reach the end time. if self.curr_timestep == self.num_timesteps: return self.create_curr_obs(), 0, True, {} # find what new temperature would be. use thermal model with uncertainty. use reset if exceeding # do_not_exceed. can't force it to take a different action anymore. # update temperatures for i, iter_zone in enumerate(self.zones): self.tstats[iter_zone].next_temperature(action[i]) self.outdoor_temperature += np.random.normal( ) # TODO we should make a thermostat for the outdoor temperature. # check that in saftey temperature band for iter_zone in self.zones: curr_safety = self.DataManager.do_not_exceed[iter_zone].iloc[ self.curr_timestep] if not (curr_safety["t_low"] <= self.tstats[iter_zone].temperature <= curr_safety["t_high"]): return self.create_curr_obs(), -INF_REWARD, True, { } # TODO do we want to add info? # get reward by calling discomfort and consumption model ... reward = self.get_reward(action) return self.create_curr_obs(), reward, False, { } # obs, reward, done, info def get_reward(self, action): """Get the reward for the given action with the current observation parameters.""" # get discomfort across edge discomfort = {} for iter_zone in self.zones: # TODO Check this again since we are a timestep ahead and we want average comfortband and average occupancy over the edge. curr_comfortband = self.DataManager.comfortband[iter_zone].iloc[ self.curr_timestep] curr_occupancy = self.DataManager.occupancy[iter_zone].iloc[ self.curr_timestep] curr_tstat = self.tstats[iter_zone] average_edge_temperature = (curr_tstat.temperature + curr_tstat.last_temperature) / 2. discomfort[iter_zone] = self.DataManager.get_discomfort( self.building, average_edge_temperature, curr_comfortband["t_low"], curr_comfortband["t_high"], curr_occupancy) # Get consumption across edge price = 1 # self.prices.iloc[root.timestep] TODO also add right unit conversion, and duration consumption_cost = { self.zones[i]: price * self.DataManager.hvac_consumption[self.zones[i]][action[i]] for i in range(len(self.zones)) } cost = ( (1 - self.lambda_val) * (sum(consumption_cost.values()))) + (self.lambda_val * (sum(discomfort.values()))) return -cost def create_curr_obs(self): return self._create_obs(self.tstats, self.outdoor_temperature, self.curr_timestep) def _create_obs(self, tstats, outdoor_temperature, curr_timestep): obs = np.zeros(self.observation_space.low.shape) idx = 0 for iter_zone in self.zones: obs[idx] = tstats[iter_zone].last_temperature idx += 1 obs[idx] = tstats[iter_zone].temperature idx += 1 obs[idx] = outdoor_temperature idx += 1 obs[idx + curr_timestep] = 1 return obs