def gen_new_fake_data(n_rain=50, n_samples=10,
                      rel_noise=2, mean_slope=5):
    dummy_rain = np.array([[abs(x)] for x in gauss(50)])
    arr2csv(dummy_rain, 'dummy_rainfalldata.csv')

    sample_sizes = np.random.randint(n_rain//4, n_rain*2, n_samples)

    if opt.TEST_FALSE_CASE:
        dummy_samples = [abs(gauss(sz)) * rel_noise for sz in sample_sizes]

    else:
        max_shift = opt.MAX_FAKE_DATA_SHIFT
        shifts = np.random.randint(-max_shift, max_shift, n_samples)
        slopes = np.array([abs(x) for x in gauss(n_samples)]) + mean_slope

        constants = [abs(x) for x in gauss(n_samples)]
        dummy_samples = []
        for idx, shift in enumerate(shifts):
            sample_size = sample_sizes[idx]
            sample = gauss(sample_size) * rel_noise  # add noise
            rainyr_first = shift
            rainyr_last = shift + n_rain - 1
            transformed_rain = slopes[idx] * dummy_rain + constants[idx]
            if shift > 0:
                time_period = range(shift, shift + n_rain)
                for rain_yr, yr in enumerate(time_period):
                    try:
                        # sample[shift] <-- rain[0]
                        sample[yr] += transformed_rain[rain_yr]
                    except IndexError:
                        continue
            else:
                time_period = range(abs(shift), abs(shift) + n_rain)
                for yr, rain_yr in enumerate(time_period):
                    try:
                        # sample[0] <-- rain[shift]
                        sample[yr] += transformed_rain[rain_yr]
                    except IndexError:
                        continue
            dummy_samples.append([abs(x) for x in sample])

        arr2csv([shifts, slopes, constants], 'dummy_params.csv')
    arr2csv(transpose(dummy_samples), 'dummy_sampledata.csv')
def gen_new_fake_data(n_rain=50, n_samples=10, rel_noise=2, mean_slope=5):
    dummy_rain = np.array([[abs(x)] for x in gauss(50)])
    arr2csv(dummy_rain, 'dummy_rainfalldata.csv')

    sample_sizes = np.random.randint(n_rain // 4, n_rain * 2, n_samples)

    if opt.TEST_FALSE_CASE:
        dummy_samples = [abs(gauss(sz)) * rel_noise for sz in sample_sizes]

    else:
        max_shift = opt.MAX_FAKE_DATA_SHIFT
        shifts = np.random.randint(-max_shift, max_shift, n_samples)
        slopes = np.array([abs(x) for x in gauss(n_samples)]) + mean_slope

        constants = [abs(x) for x in gauss(n_samples)]
        dummy_samples = []
        for idx, shift in enumerate(shifts):
            sample_size = sample_sizes[idx]
            sample = gauss(sample_size) * rel_noise  # add noise
            rainyr_first = shift
            rainyr_last = shift + n_rain - 1
            transformed_rain = slopes[idx] * dummy_rain + constants[idx]
            if shift > 0:
                time_period = range(shift, shift + n_rain)
                for rain_yr, yr in enumerate(time_period):
                    try:
                        # sample[shift] <-- rain[0]
                        sample[yr] += transformed_rain[rain_yr]
                    except IndexError:
                        continue
            else:
                time_period = range(abs(shift), abs(shift) + n_rain)
                for yr, rain_yr in enumerate(time_period):
                    try:
                        # sample[0] <-- rain[shift]
                        sample[yr] += transformed_rain[rain_yr]
                    except IndexError:
                        continue
            dummy_samples.append([abs(x) for x in sample])

        arr2csv([shifts, slopes, constants], 'dummy_params.csv')
    arr2csv(transpose(dummy_samples), 'dummy_sampledata.csv')
# if None, defaults to 'summary_data.csv' (in the folder containing this
# script)
outf = None

#####################################################
# set defaults
if not rows2grab:
    rows2grab = [2, 3]
if not trans2grab:
    trans2grab = range(10)
if not summary_dir:
    summary_dir = os.path.join(os.getcwd(), 'summaries')
if not outf:
    outf = 'summary_data.csv'

# do stuff
data = []
data_guide = []
for fn in os.listdir(summary_dir):
    path2summary = os.path.join(summary_dir, fn)
    summary = csv2arr(path2summary)[4:] # throw out first 4 rows
    for m in trans2grab:
        for k in rows2grab:
            data.append(summary[11*m + k][3:])
            x = 10*m + k + 4  # in original csv (not counting blank rows)
            data_guide.append(fn[:-4] + '_({}-{}-{}))'.format(m, k, x))

data = transpose(data)
data.insert(0, data_guide)
arr2csv(data, filename=outf)
示例#4
0
# if None, defaults to 'summary_data.csv' (in the folder containing this
# script)
outf = None

#####################################################
# set defaults
if not rows2grab:
    rows2grab = [2, 3]
if not trans2grab:
    trans2grab = range(10)
if not summary_dir:
    summary_dir = os.path.join(os.getcwd(), 'summaries')
if not outf:
    outf = 'summary_data.csv'

# do stuff
data = []
data_guide = []
for fn in os.listdir(summary_dir):
    path2summary = os.path.join(summary_dir, fn)
    summary = csv2arr(path2summary)[4:]  # throw out first 4 rows
    for m in trans2grab:
        for k in rows2grab:
            data.append(summary[11 * m + k][3:])
            x = 10 * m + k + 4  # in original csv (not counting blank rows)
            data_guide.append(fn[:-4] + '_({}-{}-{}))'.format(m, k, x))

data = transpose(data)
data.insert(0, data_guide)
arr2csv(data, filename=outf)