Пример #1
0
def main():
    """The application entry point"""
    print 'EXERCISE 1A'
    print '==========='
    print 'This program takes a CSV file, asks you to select a row from that'
    print 'file, and then computes the mean and standard deviation of the'
    print 'values in that row.'
    print
    file_path = io.get_and_confirm_input('Enter csv file with values: ')
    data = io.read_csv_file(file_path)

    if not data:
        raise RuntimeError('No data found in file {}'.format(file_path))

    column = io.choose_from_list(
        'Which column would you like to use:', data[0].keys())

    if column not in data[0]:
        raise RuntimeError('Invalid column {}'.format(column))

    values = linked_list.LinkedList()
    for each in data:
        values.insert(each[column])

    for each in values:
        print each

    print 'Mean: ', statistics.mean(values)
    print 'Std Dev: ', statistics.standard_deviation(values)
Пример #2
0
    def execute(self):
        print '8A: Load CSV file and sort by selected column'
        print

        file_name = io.prompt_existant_file_name('CSV file to sort: ')
        data = io.read_csv_file(file_name)

        if not data:
            print 'ERROR: File contains no data.'
            sys.exit(1)

        column_names = data[0].keys()
        sort_column = io.choose_from_list('Column to sort on', column_names)

        try:
            for each in data:
                each[sort_column] = float(each[sort_column])
        except ValueError:
            print 'ERROR: Column {} contains non-integer value.'.format(
                sort_column)
            sys.exit(1)

        sorted_data = sorted(data, key=lambda item: item[sort_column])

        table = display_table.DisplayTable(column_names)
        for each in sorted_data:
            table.add_row(each.values())
        table.display()
Пример #3
0
    def execute(self):
        parser = argparse.ArgumentParser()
        parser.add_argument(
            'CSVFILE', help='path to csv file with historical data.')
        parser.add_argument(
            'ESTVAL', help='an estimated value')
        args = parser.parse_args()
        file_path = args.CSVFILE
        estimated_value = float(args.ESTVAL)
        csv_data = io.read_csv_file(file_path)

        if not csv_data:
            print 'ERROR: No data'
            sys.exit(1)

        columns = csv_data[0].keys()
        x_column = io.choose_from_list('X Column:', columns)
        y_column = io.choose_from_list('Y Column:', columns)
        x_data = [float(each[x_column]) for each in csv_data if each[x_column]]
        y_data = [float(each[y_column]) for each in csv_data if each[y_column]]
        print

        x_data, y_data = probe.trim_to_equal_length(x_data, y_data)
        print 'X DATA: {}'.format(x_data)
        print 'Y DATA: {}'.format(y_data)
        print

        beta_0 = statistics.beta_0(x_data, y_data)
        print u'\u03B20: {}'.format(beta_0)

        beta_1 = statistics.beta_1(x_data, y_data)
        print u'\u03B21: {}'.format(beta_1)

        integ = integration.Integrator(20, 0.000001)
        tdist = statistics.make_t_distribution(len(x_data) - 2)
        itdist = lambda x: integ.integrate_minus_infinity_to(tdist, x)

        std_dev = (
            statistics.standard_deviation_around_regression(x_data, y_data)
        )
        print "StdDev: ", std_dev

        projection = beta_0 + beta_1 * estimated_value
        print 'Projection: ', projection

        print 't(70 percent): ', integration.approximate_inverse(itdist, 0.85)
        print 't(90 percent): ', integration.approximate_inverse(itdist, 0.95)

        range70 = statistics.prediction_range(
            estimated_value, 0.85, x_data, y_data
        )
        range90 = statistics.prediction_range(
            estimated_value, 0.95, x_data, y_data
        )
        print 'Range(70 percent) =', projection + range70, \
            'UPI =', projection - range70, 'LPI =', range70
        print 'Range(90 percent) =', projection + range90, \
            'UPI =', projection - range90, 'LPI =', range90
Пример #4
0
 def execute(self):
     """Prompt the user for a CSV file and column selection. Then, perform
     chi-squared test on the data given.
     """
     print 'PSP Exercise 9A'
     print 'This program performs the chi-squared test on data given.'
     print
     file_path = self.get_file_name()
     data = io.read_csv_file(file_path)
     test_data = self.get_test_column(data)
     q_val, p_val = chi_squared.ChiSquaredTest().execute(test_data)
     print 'Q: ', q_val
     print 'P: ', 1.0 - p_val
Пример #5
0
    def execute(self):
        """Run the program"""
        parser = argparse.ArgumentParser()
        parser.add_argument('CSVFILE', help='path to csv file with data.')
        args = parser.parse_args()
        csv_data = io.read_csv_file(args.CSVFILE)

        if not csv_data:
            print 'ERROR: Invalid csv data file.'
            sys.exit(1)

        columns = csv_data[0].keys()
        x_column = io.choose_from_list('X Column:', columns)
        y_column = io.choose_from_list('Y Column:', columns)
        x_data = [float(each[x_column]) for each in csv_data if each[x_column]]
        y_data = [float(each[y_column]) for each in csv_data if each[x_column]]

        print 'R:', statistics.correlation(x_data, y_data)
        print 'T:', statistics.t_value(x_data, y_data)
        print 'Significance:', statistics.significance(x_data, y_data)
Пример #6
0
    def execute(self):
        """Calculate and display the relative size table report"""
        data = io.read_csv_file(self.file_path)

        if not data:
            print "NO DATA"
            return

        if not all([self.are_valid_keys(each.keys()) for each in data]):
            raise RuntimeError(
                'Invalid data columns {}'.format(data[0].keys()))

        normalized_data = self.get_normalized_data(data)
        normalized_by_category = self.group_by_category(normalized_data)
        results = {}
        for category, items in normalized_by_category.iteritems():
            if len(items) < 2:
                results[category] = [items[0]] * 5
            else:
                results[category] = statistics.size_ranges(items)
        self.print_table(results)
Пример #7
0
    def from_csv_file(cls, filename):
        """Reads historical data from a CSV file.

        Arguments:
            filename(str): A file name

        Returns:
            HistoricalData: Historical data read from CSV file.
        """
        data = io.read_csv_file(filename)
        planned_sizes = collect("Planned A+M Size", data)
        proxy_sizes = collect("Proxy Size Estimate", data)
        actual_sizes = collect("Actual A+M Size", data)
        planned_times = collect("Planned Time", data)
        actual_times = collect("Actual Time", data)
        return cls(
            planned_sizes=planned_sizes,
            proxy_sizes=proxy_sizes,
            actual_sizes=actual_sizes,
            planned_times=planned_times,
            actual_times=actual_times,
        )
Пример #8
0
    def execute(self):
        parser = argparse.ArgumentParser()
        parser.add_argument(
            'CSVFILE', help='path to csv file with historical data.')
        args = parser.parse_args()
        file_path = args.CSVFILE
        csv_data = io.read_csv_file(file_path)

        if not csv_data:
            print 'ERROR: No data'
            sys.exit(1)

        columns = csv_data[0].keys()
        x_column = io.choose_from_list('X Column:', columns)
        y_column = io.choose_from_list('Y Column:', columns)
        x_data = [float(each[x_column]) for each in csv_data]
        y_data = [float(each[y_column]) for each in csv_data]
        #x_data, y_data = statistics.remove_outliers(x_data, y_data)
        print

        print 'X DATA: {}'.format(x_data)
        print 'Y DATA: {}'.format(y_data)
        print

        beta_0 = statistics.beta_0(x_data, y_data)        
        print u'\u03B20: {}'.format(beta_0)
        warnings = statistics.beta_0_warnings(beta_0)
        if warnings:
            print 'WARNINGS:'
            print '\n'.join(warnings)
        print

        beta_1 = statistics.beta_1(x_data, y_data)
        print u'\u03B21: {}'.format(beta_1)
        warnings = statistics.beta_1_warnings(beta_1)
        if warnings:
            print 'WARNINGS:'
            print '\n'.join(warnings)
        print