def run_similarity_search(include_amen, amenities_list, regress_df, scaler, dict2, cat_pref_dict, preference_dict):
		df = regress_df.copy()
		df.loc[9999] = [0]*len(df.columns)
		df.ListingTitle.loc[9999] = dict2['ListingTitle']
		df.Area_Sqft.loc[9999] = dict2['Area_Sqft']
		df.Price_psf.loc[9999] = dict2['Price_psf']
		df.RentalRate.loc[9999] = dict2['RentalRate']
		df.NumBeds.loc[9999] = dict2['NumBeds']
		number = dict2['District']
		district = 'District_D' + number
		try: df[district].loc[9999] = 1
		except: pass
		state = dict2['Furnishing']
		furnishing = 'Furnishing_' + state
		try: df[furnishing].loc[9999] = 1
		except: pass
		proptype = dict2['PropertyType']
		property_type = 'PropertyType_' + proptype
		try: df[property_type].loc[9999] = 1
		except: pass
		duration = dict2['Leasedur']
		lease_dur = 'Lease_' + duration
		try: df[lease_dur].loc[9999] = 1
		except: pass
		if include_amen == 'Y':
			df.Amenities.loc[9999] = dict2['Amenities']
			for amenity in amenities_list:
				if amenity in df.Amenities.loc[9999]:
					df[amenity].loc[9999] = 1
		instance = df.loc[9999].copy()
		instance['Summary'] = ','.join([property_type.split('_')[1], district.split('_')[1], furnishing.split('_')[1], lease_dur.split('_')[1]])
		df.drop(['ListingTitle','Url','Description','Amenities'], axis=1, inplace=True)
		scaled = scaler.fit_transform(df)
		df_scaled = pd.DataFrame(data=scaled, columns=df.columns, index=df.index)
		
		scaler_second = MinMaxScaler()
		features = list(preference_dict.keys())
		for feature in features:
			scaler_second.feature_range = (0, preference_dict[feature])
			df_scaled[feature] = scaler_second.fit_transform(np.array(df_scaled[feature]).reshape(-1,1))
		features_cat = list(cat_pref_dict.keys())
		for feature in features_cat:
			scaler_second.feature_range = (0, cat_pref_dict[feature])
			for column in df.columns:
				if feature in column:
					df_scaled[column] = scaler_second.fit_transform(np.array(df_scaled[column]).reshape(-1,1))
				
		knn_filtering = NearestNeighbors(radius=2)
		knn_filtering.fit(df_scaled)
		return knn_filtering, df_scaled, instance, property_type
示例#2
0
文件: plot.py 项目: bmorris3/HELA
def _min_max_scaler(ranges, feature_range=(0, 100)):
    res = MinMaxScaler()
    res.data_max_ = ranges[:, 1]
    res.data_min_ = ranges[:, 0]
    res.data_range_ = res.data_max_ - res.data_min_
    res.scale_ = (feature_range[1] - feature_range[0]) / (ranges[:, 1] -
                                                          ranges[:, 0])
    res.min_ = -res.scale_ * res.data_min_
    res.n_samples_seen_ = 1
    res.feature_range = feature_range
    return res
def genetic_programming(pidata):
    if plot_gp:
        from sklearn.model_selection import train_test_split
        from sklearn.preprocessing import MinMaxScaler
        from sympy import init_printing

        init_printing(use_latex=True, forecolor="White")

        #    Y = np.array(pidata['PI1'])
        #    X = np.array(pidata[['PI2','PI3','PI4','PI5','PI6','PI7']])

        XY = np.array(pidata)
        min_max_scaler = MinMaxScaler()
        min_max_scaler.feature_range = (0.1, 1)
        XY_train_minmax = min_max_scaler.fit_transform(XY)

        Y = XY_train_minmax[:, 0]
        X = XY_train_minmax[:, 1:]

        X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                            Y,
                                                            test_size=0.1)

        #    xm = np.min(X_train[:,2])
        #    xM = np.max(X_train[:,2])
        ##    X_train[:,2] = (X_train[:,2] - xm) / (xM-xm)
        ##    X_train[:,2] *= 1e-12
        ##    X_train[:,4] *= 1e12
        #
        #    xm = np.min(X_train[:,4])
        #    xM = np.max(X_train[:,4])
        ##    X_train[:,4] = (X_train[:,4] - xm) / (xM-xm)

        def _powf(x1, x2):
            with np.errstate(over='ignore'):
                return np.where(np.abs(x1**x2) < 100, x1**x2, 0.)

        powf = make_function(function=_powf, name='powf', arity=2)

        function_set = ['add', 'sub', 'mul', 'div', 'neg', 'inv']

        est_gp = SymbolicRegressor(
            population_size=5000,
            generations=20,
            #                               tournament_size=50,
            stopping_criteria=0.01,
            const_range=(-10, 10),
            init_depth=(2, 10),
            function_set=function_set,
            #                               p_crossover=0.7,
            #                               p_subtree_mutation=0.05,
            #                               p_hoist_mutation=0.1,
            #                               p_point_mutation=0.1,
            #                               max_samples=0.9,
            verbose=1,
            #                               feature_names = ('\Pi_2','\Pi_3','\Pi_4','\Pi_5','\Pi_6','\Pi_7'),
            #                               parsimony_coefficient=0.01,
            random_state=0,
            n_jobs=1)
        est_gp.fit(X_train, Y_train)

        from sympy import symbols, Add, Mul, Lambda, sympify
        import sympy as sp

        x, y = symbols('x y')
        loc = {
            "add": Add,
            "mul": Mul,
            "sub": Lambda((x, y), x - y),
            "div": Lambda((x, y), x / y),
            "sqrt": Lambda(x, sp.sqrt(sp.Abs(x))),
            "neg": Lambda(x, -x),
            "inv": Lambda(x, 1 / x),
            "powf": Lambda((x, y), x**y),
            "log": Lambda(x, sp.log(x)),
            "abs": Lambda(x, sp.Abs(x))
        }
        exp = sympify(est_gp._program, locals=loc)

        score_gp = est_gp.score(X_test, Y_test)
        print(exp)
        print(score_gp)

        fres = []
        for row in pidata.iterrows():
            drow = row[1]
            X0 = (drow['PI2'] - np.min(pidata[['PI2']])) / (
                np.max(pidata[['PI2']]) - np.min(pidata[['PI2']]))
            X1 = (drow['PI3'] - np.min(pidata[['PI3']])) / (
                np.max(pidata[['PI3']]) - np.min(pidata[['PI3']]))
            X2 = (drow['PI4'] - np.min(pidata[['PI4']])) / (
                np.max(pidata[['PI4']]) - np.min(pidata[['PI4']]))
            X3 = (drow['PI5'] - np.min(pidata[['PI5']])) / (
                np.max(pidata[['PI5']]) - np.min(pidata[['PI5']]))
            X4 = (drow['PI6'] - np.min(pidata[['PI6']])) / (
                np.max(pidata[['PI6']]) - np.min(pidata[['PI6']]))
            X5 = (drow['PI7'] - np.min(pidata[['PI7']])) / (
                np.max(pidata[['PI7']]) - np.min(pidata[['PI7']]))
            tmp = exp.subs({
                'X0': X0,
                'X1': X1,
                'X2': X2,
                'X3': X3,
                'X4': X4,
                'X5': X5
            })
            fres.append(tmp)
        fres = np.array(fres)
        dPi = np.max(pidata[['PI1']]) - np.min(pidata[['PI1']])
        plt.loglog(fres, (pidata[['PI1']] - np.min(pidata[['PI1']])) / dPi,
                   'ko')
        #    plt.xlim([0.5,1e5])
        #    plt.ylim([0.5,1e5])
        plt.loglog(np.logspace(0, 5), np.logspace(0, 5), 'k--')
示例#4
0
文件: functions.py 项目: derigod/ab
def scale(dataframe, scale=(0,1)):
    columns = dataframe.columns
    scaler = MinMaxScaler()
    scaler.feature_range = scale
    return pd.DataFrame(scaler.fit_transform(dataframe), columns=columns).dropna()