Exemplo n.º 1
0
 def testDataset3(self):
     """Dataset with only missing values"""        
     mean = compute_mean(self.dataset3, -9999)
     self.assertEquals(mean, -9999)
     
     std = compute_mean(self.dataset3, -9999)
     self.assertAlmostEquals(std, -9999, delta=1e-3)
Exemplo n.º 2
0
 def testDataset1(self):
     """Dataset with one missing value"""        
     mean = compute_mean(self.dataset1, -9999)
     self.assertEquals(mean, 40.0)
     
     std = compute_std(self.dataset1, -9999)
     self.assertAlmostEquals(std, 18.2574, delta=1e-3)
Exemplo n.º 3
0
 def testDataset2(self):
     """Dataset with no missing values"""        
     mean = compute_mean(self.dataset2, -9999)
     self.assertEquals(mean, 45.0)
     
     std = compute_std(self.dataset2, -9999)
     self.assertAlmostEquals(std, 18.7082, delta=1e-3)
Exemplo n.º 4
0
def standardize(data, missing_val=-9999):
    """Standardize a subset of data using a normal score.
    
    Computes the normal score for a set of data necessary for performing
    the standard normal homogenity test. The normal scores are defined as
    
    Z_i = (Q_i - Qbar) / sigma_Q,
    
    For more information, please refer to Alexandersson and Moberg, 1997,
    Int'l Journal of Climatology Vol. 17, pp 25-34.    
    
    This is a direct port of splitmerge.v21f.f > subroutine 'standard'.
    
    :Param data:
        The dataset to standardize.
    :Param missing_val:
        The placeholder for missing data.
    :Return:
        A list of length (right-left), with the standardized reference 
        values computed here.
    
    """
    
    ## Find the valid data to use to compute the data_mean, etc.
    valid_data = get_valid_data(data, missing_val)
    num_vals = len(valid_data)
    data_mean = compute_mean(valid_data, valid=True)

    ## Compute the sum the squared error for each term (variance)
    variance_sum = 0.0
    for d in valid_data:
        variance_sum = variance_sum + (d-data_mean)**2

    ## The standard deviation is the root of the sum of the squared error
    sum_std = sqrt(variance_sum/(num_vals-2))
    
    ## Normalize each data value using this standard deviation
    standardized_data = []
    #for d in data[left:right+1]:
    for d in data:
        if d!= missing_val:
            standardized_data.append((d-data_mean)/sum_std)
        else:
            standardized_data.append(missing_val)
            
    return standardized_data