def gen_new_fake_data(n_rain=50, n_samples=10,
                      rel_noise=2, mean_slope=5):
    dummy_rain = np.array([[abs(x)] for x in gauss(50)])
    arr2csv(dummy_rain, 'dummy_rainfalldata.csv')

    sample_sizes = np.random.randint(n_rain//4, n_rain*2, n_samples)

    if opt.TEST_FALSE_CASE:
        dummy_samples = [abs(gauss(sz)) * rel_noise for sz in sample_sizes]

    else:
        max_shift = opt.MAX_FAKE_DATA_SHIFT
        shifts = np.random.randint(-max_shift, max_shift, n_samples)
        slopes = np.array([abs(x) for x in gauss(n_samples)]) + mean_slope

        constants = [abs(x) for x in gauss(n_samples)]
        dummy_samples = []
        for idx, shift in enumerate(shifts):
            sample_size = sample_sizes[idx]
            sample = gauss(sample_size) * rel_noise  # add noise
            rainyr_first = shift
            rainyr_last = shift + n_rain - 1
            transformed_rain = slopes[idx] * dummy_rain + constants[idx]
            if shift > 0:
                time_period = range(shift, shift + n_rain)
                for rain_yr, yr in enumerate(time_period):
                    try:
                        # sample[shift] <-- rain[0]
                        sample[yr] += transformed_rain[rain_yr]
                    except IndexError:
                        continue
            else:
                time_period = range(abs(shift), abs(shift) + n_rain)
                for yr, rain_yr in enumerate(time_period):
                    try:
                        # sample[0] <-- rain[shift]
                        sample[yr] += transformed_rain[rain_yr]
                    except IndexError:
                        continue
            dummy_samples.append([abs(x) for x in sample])

        arr2csv([shifts, slopes, constants], 'dummy_params.csv')
    arr2csv(transpose(dummy_samples), 'dummy_sampledata.csv')
def gen_new_fake_data(n_rain=50, n_samples=10, rel_noise=2, mean_slope=5):
    dummy_rain = np.array([[abs(x)] for x in gauss(50)])
    arr2csv(dummy_rain, 'dummy_rainfalldata.csv')

    sample_sizes = np.random.randint(n_rain // 4, n_rain * 2, n_samples)

    if opt.TEST_FALSE_CASE:
        dummy_samples = [abs(gauss(sz)) * rel_noise for sz in sample_sizes]

    else:
        max_shift = opt.MAX_FAKE_DATA_SHIFT
        shifts = np.random.randint(-max_shift, max_shift, n_samples)
        slopes = np.array([abs(x) for x in gauss(n_samples)]) + mean_slope

        constants = [abs(x) for x in gauss(n_samples)]
        dummy_samples = []
        for idx, shift in enumerate(shifts):
            sample_size = sample_sizes[idx]
            sample = gauss(sample_size) * rel_noise  # add noise
            rainyr_first = shift
            rainyr_last = shift + n_rain - 1
            transformed_rain = slopes[idx] * dummy_rain + constants[idx]
            if shift > 0:
                time_period = range(shift, shift + n_rain)
                for rain_yr, yr in enumerate(time_period):
                    try:
                        # sample[shift] <-- rain[0]
                        sample[yr] += transformed_rain[rain_yr]
                    except IndexError:
                        continue
            else:
                time_period = range(abs(shift), abs(shift) + n_rain)
                for yr, rain_yr in enumerate(time_period):
                    try:
                        # sample[0] <-- rain[shift]
                        sample[yr] += transformed_rain[rain_yr]
                    except IndexError:
                        continue
            dummy_samples.append([abs(x) for x in sample])

        arr2csv([shifts, slopes, constants], 'dummy_params.csv')
    arr2csv(transpose(dummy_samples), 'dummy_sampledata.csv')
示例#3
0
# Data Pre-Processing
############################################################
assert 0 <= opt.MIN_OVERLAP <= len(csv2arr(opt.RAINFALLDATA_FN))

# format and normalize rainfall data
rain = csv2arr(opt.RAINFALLDATA_FN, hasheaders=opt.RAIN_DATA_HAS_HEADERS)
rain = list(np.array(rain).T[0])  # convert Nx1 array to list

rain = [float(x) for x in rain]

# read and format sample data and take the transpose
# (so now each sample is a row)
sampledata = csv2arr(opt.SAMPLEDATA_FN, hasheaders=opt.SAMPLE_DATA_HAS_HEADERS)

# take the transpose (so now each sample is a row)
sampledata = transpose(sampledata)

# convert any strings to floats
sampledata = [[float(x) for x in sample if x] for sample in sampledata]

############################################################
# Analysis
############################################################
if not opt.DEBUG_MODE_ON or opt.TEST_DUMMY_RESULTS_LIKE_THEY_ARE_REAL:
    # Test single sample results if using generated fake data
    single_sample_results, probs = align(rain, sampledata)
    print "\nSingle Sample Results:"
    q = 1
    print "# : (p < {}, p, r, shift)".format(opt.ALPHA)
    digs = int(ceil(log10(len(single_sample_results))))
    for idx, res in enumerate(single_sample_results):
# if None, defaults to 'summary_data.csv' (in the folder containing this
# script)
outf = None

#####################################################
# set defaults
if not rows2grab:
    rows2grab = [2, 3]
if not trans2grab:
    trans2grab = range(10)
if not summary_dir:
    summary_dir = os.path.join(os.getcwd(), 'summaries')
if not outf:
    outf = 'summary_data.csv'

# do stuff
data = []
data_guide = []
for fn in os.listdir(summary_dir):
    path2summary = os.path.join(summary_dir, fn)
    summary = csv2arr(path2summary)[4:] # throw out first 4 rows
    for m in trans2grab:
        for k in rows2grab:
            data.append(summary[11*m + k][3:])
            x = 10*m + k + 4  # in original csv (not counting blank rows)
            data_guide.append(fn[:-4] + '_({}-{}-{}))'.format(m, k, x))

data = transpose(data)
data.insert(0, data_guide)
arr2csv(data, filename=outf)
示例#5
0
# if None, defaults to 'summary_data.csv' (in the folder containing this
# script)
outf = None

#####################################################
# set defaults
if not rows2grab:
    rows2grab = [2, 3]
if not trans2grab:
    trans2grab = range(10)
if not summary_dir:
    summary_dir = os.path.join(os.getcwd(), 'summaries')
if not outf:
    outf = 'summary_data.csv'

# do stuff
data = []
data_guide = []
for fn in os.listdir(summary_dir):
    path2summary = os.path.join(summary_dir, fn)
    summary = csv2arr(path2summary)[4:]  # throw out first 4 rows
    for m in trans2grab:
        for k in rows2grab:
            data.append(summary[11 * m + k][3:])
            x = 10 * m + k + 4  # in original csv (not counting blank rows)
            data_guide.append(fn[:-4] + '_({}-{}-{}))'.format(m, k, x))

data = transpose(data)
data.insert(0, data_guide)
arr2csv(data, filename=outf)
示例#6
0
############################################################
assert 0 <= opt.MIN_OVERLAP <= len(csv2arr(opt.RAINFALLDATA_FN))

# format and normalize rainfall data
rain = csv2arr(opt.RAINFALLDATA_FN, hasheaders=opt.RAIN_DATA_HAS_HEADERS)
rain = list(np.array(rain).T[0])  # convert Nx1 array to list


rain = [float(x) for x in rain]

# read and format sample data and take the transpose
# (so now each sample is a row)
sampledata = csv2arr(opt.SAMPLEDATA_FN, hasheaders=opt.SAMPLE_DATA_HAS_HEADERS)

# take the transpose (so now each sample is a row)
sampledata = transpose(sampledata)

# convert any strings to floats
sampledata = [[float(x) for x in sample if x] for sample in sampledata]

############################################################
# Analysis
############################################################
if not opt.DEBUG_MODE_ON or opt.TEST_DUMMY_RESULTS_LIKE_THEY_ARE_REAL:
    # Test single sample results if using generated fake data
    single_sample_results, probs = align(rain, sampledata)
    print "\nSingle Sample Results:"
    q = 1
    print "# : (p < {}, p, r, shift)".format(opt.ALPHA)
    digs = int(ceil(log10(len(single_sample_results))))
    for idx, res in enumerate(single_sample_results):