def checkDerivatives(theFunction, x, names=None, logg=False): """Verifies the analytical derivatives of a function by comparing them with finite difference approximations. :param theFunction: A function object that takes a vector as an argument, and returns a tuple. - The first element of the tuple is the value of the function :math:`f`, - the second is the gradient of the function, - the third is the hessian. :type theFunction: function :param x: arguments of the function :type x: numpy.array :param names: the names of the entries of x (for reporting). :type names: list(string) :param logg: if True, messages will be displayed. :type logg: bool :return: tuple f, g, h, gdiff, hdiff where - f is the value of the function at x, - g is the analytical gradient, - h is the analytical hessian, - gdiff is the difference between the analytical gradient and the finite difference approximation - hdiff is the difference between the analytical hessian and the finite difference approximation :rtype: float, numpy.array,numpy.array, numpy.array,numpy.array """ f, g, h = theFunction(x) g_num = findiff_g(theFunction, x) gdiff = g - g_num if logg: logger = msg.bioMessage() if names is None: names = [f'x[{i}]' for i in range(len(x))] logger.detailed('x\t\tGradient\tFinDiff\t\tDifference') for k, v in enumerate(gdiff): logger.detailed(f'{names[k]:15}\t{g[k]:+E}\t{g_num[k]:+E}\t{v:+E}') h_num = findiff_H(theFunction, x) hdiff = h - h_num if logg: logger.detailed('Row\t\tCol\t\tHessian\tFinDiff\t\tDifference') for row in range(len(hdiff)): for col in range(len(hdiff)): logger.detailed( f'{names[row]:15}\t{names[col]:15}\t{h[row,col]:+E}\t' f'{h_num[row,col]:+E}\t{hdiff[row,col]:+E}') return f, g, h, gdiff, hdiff
B_COST * SM_COST_SCALED V3 = ASC_CAR + \ B_TIME * models.boxcox(CAR_TT_SCALED, LAMBDA) + \ B_COST * CAR_CO_SCALED # Associate utility functions with the numbering of alternatives V = {1: V1, 2: V2, 3: V3} # Associate the availability conditions with the alternatives av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP} # Definition of the model. This is the contribution of each # observation to the log likelihood function. logprob = models.loglogit(V, av, CHOICE) # Define level of verbosity logger = msg.bioMessage() logger.setSilent() #logger.setWarning() #logger.setGeneral() #logger.setDetailed() # Create the Biogeme object biogeme = bio.BIOGEME(database, logprob) biogeme.modelName = '08boxcox' # Estimate the parameters results = biogeme.estimate() pandasResults = results.getEstimatedParameters() print(pandasResults)
def __init__(self, name, pandasDatabase): """Constructor :param name: name of the database. :type name: string :param pandasDatabase: data stored in a pandas data frame. :type pandasDatabase: pandas.DataFrame """ self.logger = msg.bioMessage() start_time = datetime.now() ## Name of the database. Used mainly for the file name when dumping data. self.name = name ## Pandas data frame containing the data. self.data = pandasDatabase self.fullData = pandasDatabase ## self.variables is initialized by _generateHeaders() self.variables = None self._generateHeaders() ## Number of observations removed by the function Database.remove self.excludedData = 0 ## Name of the column identifying the individuals in a panel ## data context. None if data is not panel. self.panelColumn = None ## map identifying the range of observations for each ## individual in a panel data context. None if data is not ## panel. self.individualMap = None self.fullIndividualMap = None ## Initialize the dictionary containing random number ## generators with a series of native generators. self._initNativeRandomNumberGenerators() ## Dictionary containing user defined random number ## generators. Defined by the function ## Database.setRandomNumberGenerators that checks that ## reserved keywords are not used. The element of the ## dictionary is a tuple with two elements: (0) the function ## generating the draws, and (1) a string describing the type of draws self.userRandomNumberGenerators = dict() ## Number of draws generated by the function Database.generateDraws. ## Value 0 if this function is not called. self.numberOfDraws = 0 ## Types of draws for Monte Carlo integration self.typesOfDraws = {} self._auditDone = False ## Draws for Monte-Carlo integration self.theDraws = None ## Availability expression to check self._avail = None ## Choice expression to check self._choice = None ## Expression to check self._expression = None listOfErrors, listOfWarnings = self._audit() if listOfWarnings: self.logger.warning('\n'.join(listOfWarnings)) if listOfErrors: self.logger.warning('\n'.join(listOfErrors)) raise excep.biogemeError('\n'.join(listOfErrors))
def run_simulation(data_file_directory_for_simulation, data_file_name_for_simulation, output_directory_for_simulation, betas, household_income_limit): """ :author: Antonin Danalet, based on the example '01logit_simul.py' by Michel Bierlaire, EPFL, on biogeme.epfl.ch Simulation with a binary logit model. Two alternatives: work from home at least some times, or not.""" # Read the data df_persons = pd.read_csv(data_file_directory_for_simulation / data_file_name_for_simulation, ';') database = db.Database('persons', df_persons) # The following statement allows you to use the names of the variable as Python variable. globals().update(database.variables) # Parameters to be estimated alternative_specific_constant = Beta('alternative_specific_constant', 0, None, None, 0) b_no_post_school_education = Beta('b_no_post_school_education', 0, None, None, 0) b_secondary_education = Beta('b_secondary_education', 0, None, None, 0) b_tertiary_education = Beta('b_tertiary_education', 0, None, None, 0) b_university = Beta('b_university', 0, None, None, 1) b_male = Beta('b_male', 0, None, None, 0) b_public_transport_connection_quality_na_home = Beta('b_public_transport_connection_quality_na_home', 0, None, None, 0) b_public_transport_connection_quality_a_work = Beta('b_public_transport_connection_quality_are_a_work', 0, None, None, 1) b_rural_work = Beta('b_rural_work', 0, None, None, 0) b_home_work_distance = Beta('b_home_work_distance', 0, None, None, 0) b_business_sector_agriculture = Beta('b_business_sector_agriculture', 0, None, None, 0) b_business_sector_production = Beta('b_business_sector_production', 0, None, None, 0) b_business_sector_wholesale = Beta('b_business_sector_wholesale', 0, None, None, 1) b_business_sector_retail = Beta('b_business_sector_retail', 0, None, None, 0) b_business_sector_gastronomy = Beta('b_business_sector_gastronomy', 0, None, None, 0) b_business_sector_finance = Beta('b_business_sector_finance', 0, None, None, 1) b_business_sector_services_fc = Beta('b_business_sector_services_fc', 0, None, None, 0) b_business_sector_other_services = Beta('b_business_sector_other_services', 0, None, None, 1) b_business_sector_others = Beta('b_business_sector_others', 0, None, None, 1) b_business_sector_non_movers = Beta('b_business_sector_non_movers', 0, None, None, 0) b_executives = Beta('b_executives', 0, None, None, 0) b_german = Beta('b_german', 0, None, None, 0) b_hh_income_8000_or_less = Beta('b_hh_income_8000_or_less', 0, None, None, 0) # Definition of new variables no_post_school_educ = education == 1 secondary_education = education == 2 tertiary_education = education == 3 university = education == 4 male = (sex == 1) public_transport_quality_NA_home = (public_transport_connection_quality_ARE_home == 5) public_transport_quality_A_work = (public_transport_connection_quality_ARE_work == 1) home_work_distance = (home_work_crow_fly_distance * (home_work_crow_fly_distance >= 0.0) / 100000.0) business_sector_agriculture = type_1 == 1 business_sector_retail = type_1 == 4 business_sector_gastronomy = type_1 == 5 business_sector_finance = type_1 == 6 business_sector_production = type_1 == 2 business_sector_wholesale = type_1 == 3 business_sector_services_fC = type_1 == 7 business_sector_other_services = type_1 == 8 business_sector_others = type_1 == 9 business_sector_non_movers = type_1 == 10 german = language == 1 nationality_switzerland = nation == 0 nationality_germany_austria = nation == 1 nationality_italy_vatican = nation == 2 nationality_france_monaco_s_marino = nation == 3 nationality_northwestern_europe = nation == 4 nationality_eastern_europe = nation == 7 hh_income_8000_or_less = hh_income < household_income_limit executives = (0 < position_in_bus) * (position_in_bus < 19) rural_work = urban_rural_typology_work == 3 # Utility utility_function_telecommuting = alternative_specific_constant + \ b_executives * executives + \ b_no_post_school_education * no_post_school_educ + \ b_secondary_education * secondary_education + \ b_tertiary_education * tertiary_education + \ b_university * university + \ b_male * male + \ b_public_transport_connection_quality_na_home * public_transport_quality_NA_home + \ b_public_transport_connection_quality_a_work * public_transport_quality_A_work + \ b_rural_work * rural_work + \ b_home_work_distance * home_work_distance + \ models.piecewiseFormula(age, [0, 20, 35, 75, 200]) + \ b_business_sector_agriculture * business_sector_agriculture + \ b_business_sector_retail * business_sector_retail + \ b_business_sector_gastronomy * business_sector_gastronomy + \ b_business_sector_finance * business_sector_finance + \ b_business_sector_production * business_sector_production + \ b_business_sector_wholesale * business_sector_wholesale + \ b_business_sector_services_fc * business_sector_services_fC + \ b_business_sector_other_services * business_sector_other_services + \ b_business_sector_others * business_sector_others + \ b_business_sector_non_movers * business_sector_non_movers + \ b_german * german + \ b_nationality_ch_germany_france_italy_nw_e * nationality_switzerland + \ b_nationality_ch_germany_france_italy_nw_e * nationality_germany_austria + \ b_nationality_ch_germany_france_italy_nw_e * nationality_italy_vatican + \ b_nationality_ch_germany_france_italy_nw_e * nationality_france_monaco_s_marino + \ b_nationality_ch_germany_france_italy_nw_e * nationality_northwestern_europe + \ b_nationality_ch_germany_france_italy_nw_e * nationality_eastern_europe + \ models.piecewiseFormula(work_percentage, [0, 90, 101]) + \ b_hh_income_8000_or_less * hh_income_8000_or_less utility_function_no_telecommuting = 0 # Associate utility functions with the numbering of alternatives utility_functions_with_numbering_of_alternatives = {1: utility_function_telecommuting, # Yes or sometimes 3: utility_function_no_telecommuting} # No availability_conditions = {1: 1, # Always available 3: 1} # Always available # The choice model is a logit, with availability conditions prob_telecommuting = models.logit(utility_functions_with_numbering_of_alternatives, availability_conditions, 1) prob_no_telecommuting = models.logit(utility_functions_with_numbering_of_alternatives, availability_conditions, 3) simulate = {'Prob. telecommuting': prob_telecommuting, 'Prob. no telecommuting': prob_no_telecommuting} # Create the Biogeme object biogeme = bio.BIOGEME(database, simulate) biogeme.modelName = 'logit_telecommuting_simul' # Define level of verbosity logger = msg.bioMessage() # logger.setSilent() logger.setWarning() # logger.setGeneral() # logger.setDetailed() # Get the betas from the estimation (without corrections) # path_to_estimation_folder = Path('../data/output/models/estimation/') # if os.path.isfile(path_to_estimation_folder / 'logit_telecommuting~00.pickle'): # raise Exception('There are several model outputs! Careful.') # results = res.bioResults(pickleFile=path_to_estimation_folder / 'logit_telecommuting.pickle') # betas_without_correction = results.getBetaValues() # Change the working directory, so that biogeme writes in the correct folder, i.e., where this file is standard_directory = os.getcwd() os.chdir(output_directory_for_simulation) results = biogeme.simulate(theBetaValues=betas) # print(results.describe()) df_persons = pd.concat([df_persons, results], axis=1) # Go back to the normal working directory os.chdir(standard_directory) # For unemployed people, fix probability of doing some home office to 0 (and probability of not doing to 1). df_persons.loc[df_persons.employed == 0, 'Prob. telecommuting'] = 0.0 # Unemployed people df_persons.loc[df_persons.employed == 0, 'Prob. no telecommuting'] = 1.0 # Unemployed people df_persons.loc[df_persons.employed == -99, 'Prob. telecommuting'] = 0.0 # Other people df_persons.loc[df_persons.employed == -99, 'Prob. no telecommuting'] = 1.0 # Other people # By definition, apprentices don't work from home (because they were not asked in the MTMC) df_persons.loc[df_persons.position_in_bus == 3, 'Prob. telecommuting'] = 0.0 df_persons.loc[df_persons.position_in_bus == 3, 'Prob. no telecommuting'] = 1.0 # Add a realisation of the probability df_persons['random 0/1'] = np.random.rand(len(df_persons)) df_persons['telecommuting_model'] = np.where(df_persons['random 0/1'] < df_persons['Prob. telecommuting'], 1, 0) del df_persons['random 0/1'] ''' Save the file ''' data_file_name = 'persons_from_SynPop_with_probability_telecommuting.csv' df_persons.to_csv(output_directory_for_simulation / data_file_name, sep=',', index=False)