def areSimilarLst(A,B,level=0.05): """ finds the similarity of the given list and the next. default level is 0.05 Returns True if they are similar, or returns False otherwise """ #only for same length list if (len(A) != len(B)): return None; #set some parameters degree_of_freedom = len(A); significance_level = level; #calculate chisquared between the two lists Exp_vals = A; Obv_vals = B; chiSquared = 0.0; #use the pearson chi squared test chiSquared,p_val = stats.lchisquare(Obv_vals,Exp_vals); print "chiSquared, p_val: ", chiSquared, p_val; if p_val <= significance_level: return True,chiSquared,p_val; else: return False,chiSquared,p_val; # #W1 = numpy.random.rand(50).astype(numpy.float32); #W2 = numpy.random.rand(50).astype(numpy.float32); #W3 = numpy.random.poisson(1.0,50); #print areSimilarLst(W1.tolist(),W2.tolist(),0.05); #print areSimilarLst(W1.tolist(),W3.tolist(),0.05);
def data_table(self): global My_Global_NDRL NDRL_num = My_Global_NDRL data_table = [None] try: number_of_points = len(self.sequence) except: number_of_points = 1 data_table.append("""<br><div class="warning">No data! """) data_table.append( """Enter data or select from sample tab.</div>""") if number_of_points > 2: number_of_points = number_of_points output_comment = stats.lchisquare(self.freq_bin(), self.normal()) comment = """Your data is awful! """ if output_comment[1] >= .98: comment = """ Your data is an excellent fit to a "log normal distribution" so you may be confident in the output of this and any other statistical analysis on this data. """ elif output_comment[1] < .98 and output_comment[1] >= .80: comment = """Your data is a reasonable fit to a "log normal distribution" but you should investigate for potential reasons for the discrepancy. <br> <strong>Any local DRL or other statistic claculated from this data should be treated with some caution.</strong> """ elif output_comment[1] < .80 and output_comment[1] >= .50: comment = """<div class="warning">Your data is a very poor fit to a "log normal distribution" so you should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid.</div> """ elif output_comment[1] < .50 and output_comment[1] >= .20: comment = """<div class="warning">Your data very random and not behaving like a set of radiation total doses. You should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid. The data comprises of mixed procedures or mixed units.</div> """ else: comment = """<div class="warning">Your data is very, very random and not behaving like a set of radiation total doses so you should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid. Perhaps the data comprises of mixed procedures or mixed units.</div> """ table_start1 = """ <div = "information"><p>This is a summary of your data that includes so statistical tests for validity. A good quality set of data will fit a "log normal distribution". When plotted correctly, a log normal distribution looks like a smooth bell shaped curve. If your data does not fit such a curve, then you should seek a reason for the discrepancy.</p> <p>You may cut and paste data from this page into a spreadsheet or other document. (Formatting is usually preserved better when pasting into a spreadsheet).</p> </div>""" table_start2 = """<table style="text-align: left; width: 100%;" border="0" cellpadding="1" cellspacing="2"> <tbody> """ table_end = """</tbody> </table>""" table_start_row = "<TR>" table_end_row = "</TR>" table_start_cell = """<TD align="right">""" table_end_cell = "</TD>" # Now create a table of all the data # This method is "clunky" but easy to read # Start the table: # data_table = [None] data_table.append(table_start1) data_table.append(comment) data_table.append(table_start2) # Histogram data_table.append(table_start_row) #data_table.append(table_start_cell) #data_table.append("Histogram") #data_table.append(table_end_cell) data = self.standard_normal_histo_chart() bins = self.bin_limits() cols_span = len(bins) data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) data_table.append(""" <img alt="Plot of input data" src="%s"/> <br> """ % data) # Units data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Units") data = self.bin_limits() for item in range(len(data) - 1): data_table.append(table_start_cell) data_table.append("%s" % NDRL_num[3]) data_table.append(table_end_cell) # Observed data data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Observed Frequency") data = self.standard_normal_freq_bin() Observed_data_frequency = data[0] for item in range(len(Observed_data_frequency)): data_table.append(table_start_cell) data_table.append("%d" % Observed_data_frequency[item]) data_table.append(table_end_cell) # Expected data data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Expected Frequency") Expected_data_frequency = self.standard_z() for item in range(len(Expected_data_frequency)): data_table.append(table_start_cell) data_table.append("%d" % Expected_data_frequency[item]) data_table.append(table_end_cell) # Standard Normal Histogram data_table.append(table_start_row) #data_table.append(table_start_cell) bins = self.bin_limits() cols_span = len(bins) data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) data_table.append(""" <p>The standard normal histogram plots the data as bell shaped curve of with a mean of 0 and a standard deviation of 1. This view allows for easier interpretation of anomalies in the data set. </p><br> """) output = stats.lchisquare(self.freq_bin(), self.normal()) # Single data row data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Sum of Chi Squared ") data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) data_table.append( """ (The closer this value is to zero the better fit your data is to a normal distribution ) = %.2f """ % output[0]) # Single data row data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("P value ") #data = self.histo_fit_chart() #bins = self.bin_limits() #cols_span = len(bins)-1 data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) output_percent = output[1] * 100 data_table.append( """ (The closer this is to 100%% higher the probability that you have a normal distribution) = %.2f %% """ % output_percent) # Single data row skewness data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Skewness ") skewness = self.skewness() data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) data_table.append(""" left = %s, right = %s """ % (skewness[0], skewness[1])) # Single data My_Global_NDRL: # data_table.append(table_start_row) # data_table.append(table_start_cell) # data_table.append("Selected NDRL Data ") # # data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) # data_table.append( """ left = %s, right = """ % (My_Global_NDRL[0]) ) # # data_table.append(table_end) return data_table
def data_table(self): global My_Global_NDRL NDRL_num = My_Global_NDRL data_table = [None] try: number_of_points = len(self.sequence) except: number_of_points = 1 data_table.append("""<br><div class="warning">No data! """) data_table.append("""Enter data or select from sample tab.</div>""") if number_of_points > 2: number_of_points = number_of_points output_comment = stats.lchisquare(self.freq_bin(),self.normal()) comment = """Your data is awful! """ if output_comment[1] >= .98: comment = """ Your data is an excellent fit to a "log normal distribution" so you may be confident in the output of this and any other statistical analysis on this data. """ elif output_comment[1] < .98 and output_comment[1] >= .80 : comment = """Your data is a reasonable fit to a "log normal distribution" but you should investigate for potential reasons for the discrepancy. <br> <strong>Any local DRL or other statistic claculated from this data should be treated with some caution.</strong> """ elif output_comment[1] < .80 and output_comment[1] >= .50 : comment = """<div class="warning">Your data is a very poor fit to a "log normal distribution" so you should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid.</div> """ elif output_comment[1] < .50 and output_comment[1] >= .20 : comment = """<div class="warning">Your data very random and not behaving like a set of radiation total doses. You should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid. The data comprises of mixed procedures or mixed units.</div> """ else: comment = """<div class="warning">Your data is very, very random and not behaving like a set of radiation total doses so you should investgate why, as any attempt to calculate a local DRL or other statistic will be invalid. Perhaps the data comprises of mixed procedures or mixed units.</div> """ table_start1 = """ <div = "information"><p>This is a summary of your data that includes so statistical tests for validity. A good quality set of data will fit a "log normal distribution". When plotted correctly, a log normal distribution looks like a smooth bell shaped curve. If your data does not fit such a curve, then you should seek a reason for the discrepancy.</p> <p>You may cut and paste data from this page into a spreadsheet or other document. (Formatting is usually preserved better when pasting into a spreadsheet).</p> </div>""" table_start2 ="""<table style="text-align: left; width: 100%;" border="0" cellpadding="1" cellspacing="2"> <tbody> """ table_end= """</tbody> </table>""" table_start_row = "<TR>" table_end_row = "</TR>" table_start_cell = """<TD align="right">""" table_end_cell = "</TD>" # Now create a table of all the data # This method is "clunky" but easy to read # Start the table: # data_table = [None] data_table.append(table_start1) data_table.append(comment) data_table.append(table_start2) # Histogram data_table.append(table_start_row) #data_table.append(table_start_cell) #data_table.append("Histogram") #data_table.append(table_end_cell) data = self.standard_normal_histo_chart() bins = self.bin_limits() cols_span = len(bins) data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) data_table.append(""" <img alt="Plot of input data" src="%s"/> <br> """ % data) # Units data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Units") data = self.bin_limits() for item in range(len(data)-1): data_table.append(table_start_cell) data_table.append("%s" %NDRL_num[3]) data_table.append(table_end_cell) # Observed data data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Observed Frequency") data = self.standard_normal_freq_bin() Observed_data_frequency = data[0] for item in range(len(Observed_data_frequency)): data_table.append(table_start_cell) data_table.append("%d" % Observed_data_frequency[item]) data_table.append(table_end_cell) # Expected data data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Expected Frequency") Expected_data_frequency = self.standard_z() for item in range(len(Expected_data_frequency)): data_table.append(table_start_cell) data_table.append("%d" % Expected_data_frequency[item]) data_table.append(table_end_cell) # Standard Normal Histogram data_table.append(table_start_row) #data_table.append(table_start_cell) bins = self.bin_limits() cols_span = len(bins) data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) data_table.append(""" <p>The standard normal histogram plots the data as bell shaped curve of with a mean of 0 and a standard deviation of 1. This view allows for easier interpretation of anomalies in the data set. </p><br> """ ) output = stats.lchisquare(self.freq_bin(), self.normal() ) # Single data row data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Sum of Chi Squared ") data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) data_table.append( """ (The closer this value is to zero the better fit your data is to a normal distribution ) = %.2f """ % output[0] ) # Single data row data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("P value ") #data = self.histo_fit_chart() #bins = self.bin_limits() #cols_span = len(bins)-1 data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) output_percent = output[1]*100 data_table.append( """ (The closer this is to 100%% higher the probability that you have a normal distribution) = %.2f %% """ % output_percent ) # Single data row skewness data_table.append(table_start_row) data_table.append(table_start_cell) data_table.append("Skewness ") skewness = self.skewness() data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) data_table.append( """ left = %s, right = %s """ % (skewness[0], skewness[1]) ) # Single data My_Global_NDRL: # data_table.append(table_start_row) # data_table.append(table_start_cell) # data_table.append("Selected NDRL Data ") # # data_table.append("""<TD colspan="%s" align="left" >""" % cols_span) # data_table.append( """ left = %s, right = """ % (My_Global_NDRL[0]) ) # # data_table.append(table_end) return data_table