Пример #1
0
def distance(long1, lat1, long2, lat2):
    radius = 6371
    diff_lat = radians(lat2 - lat1)
    diff_long = radians(long2 - long1)
    a = sin(diff_lat / 2)**2 + cos(lat1) * cos(lat2) * sin(diff_long / 2)**2
    c = 2 * atan2(a**0.5, (1 - a)**0.5)
    return radius * c
def km(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of the earth in km
    dLat = deg2rad(lat2 - lat1)  # deg2rad below
    dLon = deg2rad(lon2 - lon1)
    a = sin(dLat / 2) * sin(dLat / 2) + cos(deg2rad(lat1)) * cos(
        deg2rad(lat2)) * sin(dLon / 2) * sin(dLon / 2)
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    d = R * c  # Distance in km
    return d
Пример #3
0
def complicated_arithmetic_operation(df):
    theta_1 = df['pickup_longitude']
    phi_1 = df['pickup_latitude']
    theta_2 = df['dropoff_longitude']
    phi_2 = df['dropoff_latitude']

    temp = (f.cos(theta_1) * np.pi / 180) * (f.cos(theta_2) * np.pi / 180) * (
        f.sin(phi_2 - phi_1) / 2 * np.pi / 180)**2
    expression = 2 * f.atan2(f.sqrt(temp), f.sqrt(1 - temp))
    df.select(f.mean(expression)).collect()
Пример #4
0
def complementary_filter(ds, freq: int = 16, accelerometer_x: str = "accelerometer_x",
                         accelerometer_y: str = "accelerometer_y", accelerometer_z: str = "accelerometer_z",
                         gyroscope_x: str = "gyroscope_x", gyroscope_y: str = "gyroscope_y",
                         gyroscope_z: str = "gyroscope_z"):
    """
    Compute complementary filter on gyro and accel data.

    Args:
        ds (DataStream ): Non-Windowed/grouped dataframe
        freq (int): frequency of accel/gryo. Assumption is that frequency is equal for both gyro and accel.
        accelerometer_x (str): name of the column
        accelerometer_y (str): name of the column
        accelerometer_z (str): name of the column
        gyroscope_x (str): name of the column
        gyroscope_y (str): name of the column
        gyroscope_z (str): name of the column
    """
    dt = 1.0 / freq  # 1/16.0;
    M_PI = math.pi;
    hpf = 0.90;
    lpf = 0.10;


    window = Window.partitionBy(ds._data['user']).orderBy(ds._data['timestamp'])

    data = ds._data.withColumn("thetaX_accel",
                               ((F.atan2(-F.col(accelerometer_z), F.col(accelerometer_y)) * 180 / M_PI)) * lpf) \
        .withColumn("roll",
                    (F.lag("thetaX_accel").over(window) + F.col(gyroscope_x) * dt) * hpf + F.col("thetaX_accel")).drop(
        "thetaX_accel") \
        .withColumn("thetaY_accel",
                    ((F.atan2(-F.col(accelerometer_x), F.col(accelerometer_z)) * 180 / M_PI)) * lpf) \
        .withColumn("pitch",
                    (F.lag("thetaY_accel").over(window) + F.col(gyroscope_y) * dt) * hpf + F.col("thetaY_accel")).drop(
        "thetaY_accel") \
        .withColumn("thetaZ_accel",
                    ((F.atan2(-F.col(accelerometer_y), F.col(accelerometer_x)) * 180 / M_PI)) * lpf) \
        .withColumn("yaw",
                    (F.lag("thetaZ_accel").over(window) + F.col(gyroscope_z) * dt) * hpf + F.col("thetaZ_accel")).drop(
        "thetaZ_accel")

    return DataStream(data=data.dropna(), metadata=Metadata())
Пример #5
0
def coords2elem(df):
    df = df.withColumn('Omega', Omega_udf(df['a']))
    df = df.withColumn('Kappa', Kappa_udf(df['a']))
    df = df.withColumn('e_sin_M', df['vr'] / (df['a'] * df['Kappa']))
    df = df.withColumn('e_cos_M', 1.0 - df['r'] / df['a'])
    e2 = df['e_sin_M']**2 + df['e_cos_M']**2
    df = df.withColumn('e', sqrt(e2))
    df = df.withColumn('M', atan2(df['e_sin_M'], df['e_cos_M']))
    wt = df['t'] - (df['Omega'] / df['Kappa']) * (df['M'] +
                                                  2.0 * df['e_sin_M'])
    df = df.withColumn('wt', adjust_angle_udf(wt))
    cols = ['id', 'timestep', 'streamline', 'a', 'e', 'M', 'wt']
    return df.select(cols)
def calculate_bearing_degrees(latitude_1, longitude_1, latitude_2,
                              longitude_2):
    diff_longitude = F.radians(longitude_2 - longitude_1)

    r_latitude_1 = F.radians(latitude_1)
    r_longitude_1 = F.radians(longitude_1)
    r_latitude_2 = F.radians(latitude_2)
    r_longitude_2 = F.radians(longitude_2)

    y = F.sin(diff_longitude) * F.cos(r_longitude_2)
    x = (F.cos(r_latitude_1) * F.sin(r_latitude_2) -
         F.sin(r_latitude_1) * F.cos(r_latitude_2) * F.cos(diff_longitude))

    return F.degrees(F.atan2(x, y))
    def distance(lat, lon, lat2, lon2):
        '''
        Uses the "haversine" formula to calculate the distance between two points
        using they latitude and longitude

        Parameters
        ----------
        lat: latitude co-ordinate using signed decimal degrees without compass direction for first location 
        lon: longitude co-ordinate using signed decimal degrees without compass direction for first location 
        lat2: latitude co-ordinate using signed decimal degrees without compass direction for second location 
        lon2: longitude co-ordinate using signed decimal degrees without compass direction for second location 

        Returns
        -------
        Returns distance between two points
    
    
        Notes
        -----
        Haversine formula
        Δφ = φ1 - φ2
        Δλ = λ1 - λ2
        a = sin²(Δφ/2) + cos φ1 ⋅ cos φ2 ⋅ sin²(Δλ/2)
        c = 2 ⋅ atan2( √a, √(1−a) )
        d = R ⋅ c
        φ -> latitude 
        λ -> longitude
        R -> 6371
        '''
        
        R = 6371
        delta_lat = lat - lat2
        delta_lon = lon - lon2
        a = pow(sin(toRadians(delta_lat/2)),2) + cos(toRadians(lat)) * cos(toRadians(lat2)) * pow(sin(toRadians(delta_lon/2)),2)
        c = 2 * atan2(pow(a,0.5) , pow(1-a, 0.5) )
        d = R * c
        return d
Пример #8
0
def join_and_analyze(df_poi,df_sample):
    """ Joins the Requests data and POI list data, calculates distance between POI Centers
    and retains the record with the minimum distance to a particular POI center
    
    Parameters: df_poi: POI List datafarme 
                df_sample: Requests dataframe
    
    """
    # Since there are no matching fields between the data, cartesian product is done to combine the datasets
    df_joined = df_sample.crossJoin(df_poi)
    # Caching to memory
    df_joined.cache()
    # Applying the Haversine formula to determine distance between coordinate pairs
    df_joined = df_joined.withColumn("a", (
    F.pow(F.sin(F.radians(F.col("POI_Latitude") - F.col("Latitude")) / 2), 2) +
    F.cos(F.radians(F.col("Latitude"))) * F.cos(F.radians(F.col("POI_Latitude"))) *
    F.pow(F.sin(F.radians(F.col("POI_Longitude") - F.col("Longitude")) / 2), 2)
    )).withColumn("distance", F.atan2(F.sqrt(F.col("a")), F.sqrt(-F.col("a") + 1)) * 2 * 6371)
    
    # Applying window function to retain the records with the least distance to a POI center
    w = Window.partitionBy('_ID')
    df_joined = df_joined.withColumn('min', F.min('distance').over(w))    .where(F.col('distance') == F.col('min'))    .drop('min').drop('a')

    return df_joined
Пример #9
0
def distance_measure():
    return atan2(sqrt(col("distance_inter")), sqrt(1 - col("distance_inter")))
                          	              (col('Start_Longitude') > -80) &\
                                 	      (col('Start_Longitude') < -70) &\
                                	      (col('Start_Latitude') > 40) &\
                               		      (col('Start_Latitude') < 46) &\
                             		      (col('End_Longitude') > -80) &\
                             		      (col('End_Longitude') < -70) &\
                             		      (col('End_Latitude') > 40) &\
                               		      (col('End_Latitude') < 46) &\
                                              (col('Cost') > 0))

yellow_tripdata_1m = yellow_tripdata_1m.withColumn("Duration", ((unix_timestamp(col("End_Datetime")) - unix_timestamp(col("Start_Datetime")))/60))\
		                       .withColumn("Diff_Longitude", col("End_Longitude") - col("Start_Longitude"))\
				       .withColumn("Diff_Latitude", col("End_Latitude") - col("Start_Latitude"))\
				       .withColumn("a", F.pow(F.sin(col("Diff_Latitude")/2),2) +\
				                        F.cos(col("Start_Latitude"))*F.cos(col("End_Latitude"))*F.pow(F.sin(col("Diff_Longitude")/2),2))\
				       .withColumn("Distance", 2 * 6371 * F.atan2(F.sqrt(col("a")), F.sqrt(1.0 - col("a"))))\
				       .drop("Diff_Longitude").drop("Diff_Latitude").drop("Start_Datetime")\
				       .drop("End_Datetime").drop("Start_Longitude").drop("Start_Latitude")\
				       .drop("End_Longitude").drop("End_Latitude").drop("a").drop("Cost")

yellow_trip_joined = yellow_tripdata_1m.join(yellow_tripvendors_1m, "ID", "inner").drop("ID")
yellow_trip_joined.createOrReplaceTempView("yellow_trip_joined")

window = Window.partitionBy("Vendor")
res = yellow_trip_joined.withColumn("Max_Distance", F.max("Distance").over(window))\
                        .where(col("Distance") == col("Max_Distance"))\
                        .drop("Max_Distance").select(["Vendor", "Distance", "Duration"])
   
res.show() 
print("Time of Q2 using SQL with parquet is: %s seconds" % (time.time() - start_time_parquet)) 
Пример #11
0
def tocolumns(df, expr):
    import pyspark.sql.functions as fcns

    if isinstance(expr, histbook.expr.Const):
        return fcns.lit(expr.value)

    elif isinstance(expr, (histbook.expr.Name, histbook.expr.Predicate)):
        return df[expr.value]

    elif isinstance(expr, histbook.expr.Call):
        if expr.fcn == "abs" or expr.fcn == "fabs":
            return fcns.abs(tocolumns(df, expr.args[0]))
        elif expr.fcn == "max" or expr.fcn == "fmax":
            return fcns.greatest(*[tocolumns(df, x) for x in expr.args])
        elif expr.fcn == "min" or expr.fcn == "fmin":
            return fcns.least(*[tocolumns(df, x) for x in expr.args])
        elif expr.fcn == "arccos":
            return fcns.acos(tocolumns(df, expr.args[0]))
        elif expr.fcn == "arccosh":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "arcsin":
            return fcns.asin(tocolumns(df, expr.args[0]))
        elif expr.fcn == "arcsinh":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "arctan2":
            return fcns.atan2(tocolumns(df, expr.args[0]),
                              tocolumns(df, expr.args[1]))
        elif expr.fcn == "arctan":
            return fcns.atan(tocolumns(df, expr.args[0]))
        elif expr.fcn == "arctanh":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "ceil":
            return fcns.ceil(tocolumns(df, expr.args[0]))
        elif expr.fcn == "copysign":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "cos":
            return fcns.cos(tocolumns(df, expr.args[0]))
        elif expr.fcn == "cosh":
            return fcns.cosh(tocolumns(df, expr.args[0]))
        elif expr.fcn == "rad2deg":
            return tocolumns(df, expr.args[0]) * (180.0 / math.pi)
        elif expr.fcn == "erfc":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "erf":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "exp":
            return fcns.exp(tocolumns(df, expr.args[0]))
        elif expr.fcn == "expm1":
            return fcns.expm1(tocolumns(df, expr.args[0]))
        elif expr.fcn == "factorial":
            return fcns.factorial(tocolumns(df, expr.args[0]))
        elif expr.fcn == "floor":
            return fcns.floor(tocolumns(df, expr.args[0]))
        elif expr.fcn == "fmod":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "gamma":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "hypot":
            return fcns.hypot(tocolumns(df, expr.args[0]),
                              tocolumns(df, expr.args[1]))
        elif expr.fcn == "isinf":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "isnan":
            return fcns.isnan(tocolumns(df, expr.args[0]))
        elif expr.fcn == "lgamma":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "log10":
            return fcns.log10(tocolumns(df, expr.args[0]))
        elif expr.fcn == "log1p":
            return fcns.log1p(tocolumns(df, expr.args[0]))
        elif expr.fcn == "log":
            return fcns.log(tocolumns(df, expr.args[0]))
        elif expr.fcn == "pow":
            return fcns.pow(tocolumns(df, expr.args[0]),
                            tocolumns(df, expr.args[1]))
        elif expr.fcn == "deg2rad":
            return tocolumns(df, expr.args[0]) * (math.pi / 180.0)
        elif expr.fcn == "sinh":
            return fcns.sinh(tocolumns(df, expr.args[0]))
        elif expr.fcn == "sin":
            return fcns.sin(tocolumns(df, expr.args[0]))
        elif expr.fcn == "sqrt":
            return fcns.sqrt(tocolumns(df, expr.args[0]))
        elif expr.fcn == "tanh":
            return fcns.tanh(tocolumns(df, expr.args[0]))
        elif expr.fcn == "tan":
            return fcns.tan(tocolumns(df, expr.args[0]))
        elif expr.fcn == "trunc":
            raise NotImplementedError(
                expr.fcn)  # FIXME (fcns.trunc is for dates)
        elif expr.fcn == "xor":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "conjugate":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "exp2":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "heaviside":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "isfinite":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "left_shift" and isinstance(expr.args[1],
                                                     histbook.expr.Const):
            return fcns.shiftLeft(tocolumns(df, expr.args[0]),
                                  expr.args[1].value)
        elif expr.fcn == "log2":
            return fcns.log2(tocolumns(df, expr.args[0]))
        elif expr.fcn == "logaddexp2":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "logaddexp":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "mod" or expr.fcn == "fmod":
            return tocolumns(df, expr.args[0]) % tocolumns(df, expr.args[1])
        elif expr.fcn == "right_shift" and isinstance(expr.args[1],
                                                      histbook.expr.Const):
            return fcns.shiftRight(tocolumns(df, expr.args[0]),
                                   expr.args[1].value)
        elif expr.fcn == "rint":
            return fcns.rint(tocolumns(df, expr.args[0]))
        elif expr.fcn == "sign":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "where":
            return fcns.when(tocolumns(df, expr.args[0]),
                             tocolumns(df, expr.args[1])).otherwise(
                                 tocolumns(df, expr.args[2]))
        elif expr.fcn == "numpy.equal":
            return tocolumns(df, expr.args[0]) == tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.not_equal":
            return tocolumns(df, expr.args[0]) != tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.less":
            return tocolumns(df, expr.args[0]) < tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.less_equal":
            return tocolumns(df, expr.args[0]) <= tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.isin":
            return tocolumns(df, expr.args[0]) in tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.logical_not":
            return ~tocolumns(df, expr.args[0])
        elif expr.fcn == "numpy.add":
            return tocolumns(df, expr.args[0]) + tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.subtract":
            return tocolumns(df, expr.args[0]) - tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.multiply":
            return tocolumns(df, expr.args[0]) * tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.true_divide":
            return tocolumns(df, expr.args[0]) / tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.logical_or":
            return tocolumns(df, expr.args[0]) | tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.logical_and":
            return tocolumns(df, expr.args[0]) & tocolumns(df, expr.args[1])
        else:
            raise NotImplementedError(expr.fcn)

    else:
        raise AssertionError(expr)
Пример #12
0
from pyspark.sql import functions as F
Q11 = "IxxPSF_i"
Q22 = "IyyPSF_i"
Q12 = "IxyPSF_i"

# pre-compute denominator
df_shear = df.withColumn("denom", F.col(Q11) + F.col(Q22))
#read and img parts of shear
df_shear = df_shear.withColumn("R_E", (F.col(Q11) - F.col(Q22)) /
                               F.col('denom')).withColumn(
                                   "I_E", (2 * F.col(Q12)) / F.col('denom'))
# convert to amplitude and phase
df_shear = df_shear.withColumn("amp_E",
                               F.hypot(F.col("R_E"), F.col("I_E"))).withColumn(
                                   "phase_E",
                                   F.atan2(F.col("R_E"), F.col("I_E")))
df_shear.select("R_E", "I_E", "amp_E", "phase_E").show(5)

# In[63]:

var = "amp_E"
var_sys = "avg(" + var + ")"
df_map = df_shear.groupBy("ipix").mean(var)
df_map.describe([var_sys]).show()
dfp = df_map.toPandas()
map_e = np.zeros(hp.nside2npix(nside))
map_e[dfp['ipix'].values] = dfp[var_sys].values
hp.gnomview(map_e,
            rot=[55, -29.8],
            reso=hp.nside2resol(nside, arcmin=True),
            title=var_sys)
Пример #13
0
df_join = df_join.withColumn(
    'longitude_distance',
    functions.radians(over_station_coord['near_longitude']) -
    functions.radians(short_station_coord['start_longitude']))

df_join = df_join.withColumn(
    'a',
    (pow(functions.sin('latitude_distance'), 2) +
     functions.cos(functions.radians(short_station_coord['start_latitude'])) *
     functions.cos(functions.radians(over_station_coord['near_latitude'])) *
     (pow(functions.sin('longitude_distance'), 2))))

df_join = df_join.withColumn(
    'distance',
    6373 * 2 * functions.atan2(sqrt(df_join['a']), sqrt(1 - df_join['a'])))

# distance less than 3 km
#df_join = df_join.filter(df_join['distance'] < 3)

df_join = df_join.select('date', 'hour', 'start_station_name',
                         'near_station_name', 'distance')

df_join = df_join.dropDuplicates(
    ['date', 'hour', 'start_station_name', 'near_station_name'])

df_join = df_join.orderBy('date', 'hour',
                          'distance').select('date', 'hour',
                                             'start_station_name',
                                             'near_station_name')
Пример #14
0
    if showHolding == True:
        # option to show only planes in a holding pattern, note that the below is NOT foolproof depending on flight
        # route
        #
        # first we calculate the bearing change from the previous coordinate to the current coordinate, on how to
        # calculate bearing see:
        #
        # https://www.mrexcel.com/forum/excel-questions/626081-calculate-bearing-direction-between-2-coordinates.html

        adsbDf5 = adsbDf4.withColumn("lon_rad",
                                     adsbDf4.lon * 3.14159265358979 / 180)
        adsbDf6 = adsbDf5.withColumn("lat_rad",
                                     adsbDf4.lat * 3.14159265358979 / 180)
        adsbDf7 = adsbDf6.withColumn("bearing", psf.when(psf.isnull(psf.lag('lat_rad').over(window_flightNum_UTC)),0). \
            otherwise(psf.atan2(psf.sin(adsbDf6.lon_rad-psf.lag('lon_rad').over(window_flightNum_UTC))* \
            psf.cos('lat_rad'),psf.cos(psf.lag('lat_rad').over(window_flightNum_UTC))*psf.sin('lat_rad')- \
            psf.sin(psf.lag('lat_rad').over(window_flightNum_UTC))*psf.cos('lat_rad')*psf.cos(adsbDf6.lon_rad- \
            psf.lag('lon_rad').over(window_flightNum_UTC)))/( 3.14159265358979/180)))
        adsbDf8 = adsbDf7.withColumn("bearing_final",psf.when(adsbDf7.bearing < 0, adsbDf7.bearing+360). \
            otherwise(adsbDf7.bearing))

        # calculate bearing change from the previous coordinate to the current
        adsbDf9 = adsbDf8.withColumn("bearing_change", psf.when(psf.lag('bearing_final'). \
            over(window_flightNum_UTC)==0,0).otherwise(psf.when(adsbDf8.bearing_final==0,0). \
            otherwise(psf.lag('bearing_final').over(window_flightNum_UTC)-adsbDf8.bearing_final)))

        # crude way of ignoring changes when the bearing crosses zero degrees, such as 350 to 10 degrees, because it's
        # not really 340 degree change but rather 20 degrees, but I'm too lazy to calculate this so ignore anything
        # thathas a bearing change of 200 degrees
        adsbDf10 = adsbDf9.withColumn("bearing_change_final", psf.when(psf.abs(adsbDf9.bearing_change)>200,0). \
            otherwise(adsbDf9.bearing_change))