def randomize_speed(df, contain_zero):
    df = df.copy()
    # Round down speed, need more caution
    if contain_zero:
        speed_redistribution_info = 'Redistribute upward, e.g. 0 -> [0,1]'
        df['speed_ran'] = df['speed'].apply(lambda x:
                                            (x + np.random.uniform(0, 1)))
    else:
        speed_redistribution_info = 'Redistribute downward, e.g. 1 -> [0,1]'
        df['speed_ran'] = df['speed'].apply(
            lambda x: (x + np.random.uniform(-1, 0)) if x > 0 else x)

    max_speed = df.speed.max()
    df['speed'].hist(bins=arange(0, max_speed),
                     alpha=0.5,
                     label='Original Data')
    df['speed_ran'].hist(bins=arange(0, max_speed, 0.5),
                         alpha=0.5,
                         label='Redistributed Data')
    print speed_redistribution_info
    plt_configure(xlabel="Speed",
                  ylabel="Frequency",
                  legend=True,
                  figsize=(8, 3))

    df['speed'] = df['speed_ran']
    df.drop(['speed_ran'], 1, inplace=True)
    return df, speed_redistribution_info
def is_with_too_many_zero(df, threshold=1.5):
    too_many_zero = False
    bins = arange(0, df.speed.max())
    count, _ = np.histogram(df['speed'], bins=bins)
    null_wind_frequency = count[0]/len(df)
    if count[0]/count[1] >= threshold:
        df['speed'].plot(kind='hist', bins=bins, alpha=0.5)
        plt_configure(figsize=(4, 3), title='Original speed distribution')
        print ' Too many zeros'
        too_many_zero = True
    return too_many_zero, null_wind_frequency
def is_with_too_many_zero(df, threshold=1.5):
    too_many_zero = False
    bins = arange(0, df.speed.max())
    count, _ = np.histogram(df['speed'], bins=bins)
    null_wind_frequency = count[0] / len(df)
    if count[0] / count[1] >= threshold:
        df['speed'].plot(kind='hist', bins=bins, alpha=0.5)
        plt_configure(figsize=(4, 3), title='Original speed distribution')
        print ' Too many zeros'
        too_many_zero = True
    return too_many_zero, null_wind_frequency
def plot_sectoral_comparison(gmm, weibull, direction, datasize):
    from plot_print_helper import plt_configure
    _, gmm_mean = nominal_avg_and_weight_avg(datasize, gmm)
    _, weibull_mean = nominal_avg_and_weight_avg(datasize, weibull)

    line, = plt.plot(direction, gmm, '-', label = 'GMM', marker='o')
    plt.axhline(gmm_mean, linestyle='--', color = line.get_color(), label ='GMM weighted average')

    line,= plt.plot(direction, weibull, '-', label = 'Weibull', marker='o')
    plt.axhline(weibull_mean, linestyle='--', color = line.get_color(), label ='Weibull weighted average')
    plt_configure(xlabel='Direction',
                  legend={'loc':'best'},figsize=(5, 3))
    plt.locator_params(axis='y', nbins=5)
def plot_sectoral_comparison(gmm, weibull, direction, datasize):
    from plot_print_helper import plt_configure
    _, gmm_mean = nominal_avg_and_weight_avg(datasize, gmm)
    _, weibull_mean = nominal_avg_and_weight_avg(datasize, weibull)

    line, = plt.plot(direction, gmm, '-', label = 'GMM', marker='o')
    plt.axhline(gmm_mean, linestyle='--', color = line.get_color(), label ='GMM weighted average')

    line,= plt.plot(direction, weibull, '-', label = 'Weibull', marker='o')
    plt.axhline(weibull_mean, linestyle='--', color = line.get_color(), label ='Weibull weighted average')
    plt_configure(xlabel='Direction',
                  legend={'loc':'best'},figsize=(4.5, 2.5))
    plt.locator_params(axis='y', nbins=5)
def fill_direction_999(df, SECTOR_LENGTH):
    # df = df.copy()
    fig = plt.figure()
    df['wind_type'].value_counts().plot(
        kind='bar', title='Wind Types Comprisement', figsize=(4, 3))

    fig = plt.figure()
    bins = arange(0, df.dir.max() + 100, 10)
    df['dir'].plot(kind='hist', alpha=0.5, bins=bins, label='before interpolation')
    df['dir'] = df.apply(lambda x: np.nan if x.dir == 999 else x.dir, axis=1)
    df['dir'] = df['dir'].interpolate() // SECTOR_LENGTH * SECTOR_LENGTH
    df['dir'].plot(kind='hist', alpha=0.5, bins=bins, label='after interpolation')
    plt_configure(title='Dir 999 record handling comparison', figsize=(8, 3), legend={'loc': 'best'})
    return df
示例#7
0
def fill_direction_999(df, SECTOR_LENGTH):
    # df = df.copy()
    fig = plt.figure()
    df['wind_type'].value_counts().plot(
        kind='bar', title='Wind Types Comprisement', figsize=(4, 3))

    fig = plt.figure()
    bins = arange(0, df.dir.max() + 100, 10)
    df['dir'].plot(kind='hist', alpha=0.5, bins=bins, label='before interpolation')
    df['dir'] = df.apply(lambda x: np.nan if x.dir == 999 else x.dir, axis=1)
    df['dir'] = df['dir'].interpolate() // SECTOR_LENGTH * SECTOR_LENGTH
    df['dir'].plot(kind='hist', alpha=0.5, bins=bins, label='after interpolation')
    plt_configure(title='Dir 999 record handling comparison', figsize=(8, 3), legend={'loc': 'best'})
    return df
def randomize_angle(df, DIR_REDISTRIBUTE, sector_span = 10):
    df = df.copy()
    if DIR_REDISTRIBUTE == 'even':
        df['dir_ran'] = df['dir'].apply(lambda x: (x + np.random.uniform(-sector_span/2,sector_span/2)))
    else:
        df['dir_ran'] = df['dir'].apply(lambda x: (x + np.random.uniform(0,sector_span)))

    bins=arange(0, 360+10, 5)
    df['dir'].hist(bins=bins, alpha=0.5, label='Original Data')
    bins=arange(0, 360+10, 1)
    df['dir_ran'].hist(bins=bins, alpha=0.5, label='Redistributed Data')
    plt_configure(xlabel="Direction", ylabel="Frequency", tight='x',
                  legend={'loc':'best'}, figsize=(8, 3))

    df['dir']=df['dir_ran']
    df.drop(['dir_ran'], 1,inplace=True)
    return df
def knot_unit_detect(df):
    # df = df.copy()
    # 1. Determine whether using knot unit
    df['decimal'] = df.speed % 1
    df.decimal.hist(alpha=0.5, label='m/s', figsize=(4, 3))
    knot_unit = True if len(df.query('decimal >= 0.2')) / len(df) > 0.3 else False

    # 2. Convert into knot unit
    if knot_unit:
        df['speed'] = df['speed'] * 1.943845
        df['decimal'] = df.speed % 1
        df.decimal.hist(alpha=0.5, label='knot')
        # need more elaboration, some is not near an integer
        df['speed'] = df['speed'].apply(lambda x: int(round(x)))
    plt_configure(xlabel='Decimal', ylabel='Frequency', legend={'loc': 'best'}, title='Decimal Distribution')

    return knot_unit, df
示例#10
0
def randomize_angle(df, DIR_REDISTRIBUTE, sector_span = 10):
    df = df.copy()
    if DIR_REDISTRIBUTE == 'even':
        df['dir_ran'] = df['dir'].apply(lambda x: (x + np.random.uniform(-sector_span/2,sector_span/2)))
    else:
        df['dir_ran'] = df['dir'].apply(lambda x: (x + np.random.uniform(0,sector_span)))

    bins=arange(0, 360+10, 5)
    df['dir'].hist(bins=bins, alpha=0.5, label='Original Data')
    bins=arange(0, 360+10, 1)
    df['dir_ran'].hist(bins=bins, alpha=0.5, label='Redistributed Data')
    plt_configure(xlabel="Direction", ylabel="Frequency", tight='x',
                  legend={'loc':'best'}, figsize=(8, 3))

    df['dir']=df['dir_ran']
    df.drop(['dir_ran'], 1,inplace=True)
    return df
def randomize_speed(df, contain_zero):
    df = df.copy()
    # Round down speed, need more caution
    if contain_zero:
        speed_redistribution_info = 'Redistribute upward, e.g. 0 -> [0,1]'
        df['speed_ran'] = df['speed'].apply(lambda x: (x + np.random.uniform(0,1)))
    else:
        speed_redistribution_info = 'Redistribute downward, e.g. 1 -> [0,1]'
        df['speed_ran'] = df['speed'].apply(lambda x: (x + np.random.uniform(-1,0)) if x > 0 else x)

    max_speed = df.speed.max()
    df['speed'].hist(bins=arange(0, max_speed), alpha=0.5, label='Original Data')
    df['speed_ran'].hist(bins=arange(0, max_speed, 0.5), alpha=0.5, label='Redistributed Data')
    print speed_redistribution_info
    plt_configure(xlabel="Speed", ylabel="Frequency", legend=True, figsize=(8, 3))

    df['speed']=df['speed_ran']
    df.drop(['speed_ran'], 1, inplace=True)
    return df, speed_redistribution_info
def knot_unit_detect(df):
    # df = df.copy()
    # 1. Determine whether using knot unit
    df['decimal'] = df.speed % 1
    df.decimal.hist(alpha=0.5, label='m/s', figsize=(4, 3))
    knot_unit = True if len(
        df.query('decimal >= 0.2')) / len(df) > 0.3 else False

    # 2. Convert into knot unit
    if knot_unit:
        df['speed'] = df['speed'] * 1.943845
        df['decimal'] = df.speed % 1
        df.decimal.hist(alpha=0.5, label='knot')
        # need more elaboration, some is not near an integer
        df['speed'] = df['speed'].apply(lambda x: int(round(x)))
    plt_configure(xlabel='Decimal',
                  ylabel='Frequency',
                  legend={'loc': 'best'},
                  title='Decimal Distribution')

    return knot_unit, df
示例#13
0
def randomize_speed(df, redistribute_method='round_up'):
    df = df.copy()
    # Round down speed, need more caution
    if redistribute_method == 'round_up':
        speed_redistribution_info = 'Redistribute upward, e.g. 0 -> [0,1]'
        df['speed_ran'] = df['speed'].apply(lambda x: (x + np.random.uniform(0,1)))
    elif redistribute_method == 'round_down':
        speed_redistribution_info = 'Redistribute downward, e.g. 1 -> [0,1]'
        df['speed_ran'] = df['speed'].apply(lambda x: (x + np.random.uniform(-1,0)) if x > 0 else x)
    elif redistribute_method == 'even':
        speed_redistribution_info = 'Redistribute evenly, e.g. 0 -> [0, 0.5]; 1 -> [0.5,1.5]'
        df['speed_ran'] = df['speed'].apply(lambda x: (x + np.random.uniform(-0.5,0.5)) if x > 0 else x+ np.random.uniform(0, 0.5))

    max_speed = df.speed.max()
    df['speed'].hist(bins=arange(0, max_speed), alpha=0.5, label='Original Data')
    df['speed_ran'].hist(bins=arange(0, max_speed, 0.5), alpha=0.5, label='Redistributed Data')
    print(speed_redistribution_info)
    plt_configure(xlabel="Speed", ylabel="Frequency", legend=True, figsize=(8, 3))

    df['speed']=df['speed_ran']
    df.drop(['speed_ran'], 1, inplace=True)
    return df, speed_redistribution_info