Exemplo n.º 1
0
def apply_join_condition(condition_dict, op, current_name, current_main,
                         current_metadata, source_main, source_metadata):

    op_func = ops[op]
    for key in condition_dict:
        parameter1 = key.split('.')
        table1_name = parameter1[0]
        table1_column = parameter1[1]
        for value in condition_dict[key]:
            parameter2 = value.split('.')
            table2_column = parameter2[1]
            if table1_name == current_name:
                table1_require_index = GeneralFunction.get_index_of_metadata(
                    current_metadata, [table1_column])
                table2_require_index = GeneralFunction.get_index_of_metadata(
                    source_metadata, [table2_column])
            else:
                table1_require_index = GeneralFunction.get_index_of_metadata(
                    source_metadata, [table1_column])
                table2_require_index = GeneralFunction.get_index_of_metadata(
                    current_metadata, [table2_column])

            if op_func(current_main.value[table1_require_index[0]],
                       source_main.value[table2_require_index[0]]):
                return False

    return True
Exemplo n.º 2
0
    def sum_group(self, sum_header, group_header):

        sum_index = GeneralFunction.get_index_of_metadata(self.metadata, [sum_header])
        # get the index of parameters in metadata
        group_index = GeneralFunction.get_index_of_metadata(self.metadata, group_header)

        new_data_type = [self.data_type[i] for i in group_index] + [self.data_type[i] for i in sum_index]

        group_dict = {}

        for key in self.main_table:
            new_key_string, group_dict = self.update_sum_group_dict(group_dict, key, group_index, sum_index)

        return [header for header in group_header] + ['sum_' + sum_header], new_data_type, group_dict
Exemplo n.º 3
0
    def sort(self, require_metadata):

        # get the index of parameters in metadata
        require_index = GeneralFunction.get_index_of_metadata(self.metadata, require_metadata)

        # print(list(self.main_table.values())[0].value[require_index[0]])
        # define prepare_sort function and import operator
        # import itemgetter to take input from list variables and sort

        sorted_table = sorted(self.main_table.items(), key=lambda x: operator.itemgetter(*require_index)(x[1].value))

        # covert the list of tuples into dictionary
        new_dict = GeneralFunction.convert_tuples_into_dic(sorted_table)

        return self.metadata, self.data_type, new_dict
Exemplo n.º 4
0
    def mov_sum(self, variables):
        require_header = variables[0]
        window_size = int(variables[1])

        new_metadata = [item for item in self.metadata]
        new_metadata.append('mov_sum')
        # get the index of parameters in metadata
        require_index = GeneralFunction.get_index_of_metadata(self.metadata, [require_header])[0]
        new_data_type = [item for item in self.data_type]
        new_data_type.append(self.data_type[require_index])

        new_dict = {}
        # moving average counting
        # current_index as list(self.main_table.keys()).index(key)
        for key in self.main_table:
            current_index = list(self.main_table.keys()).index(key)
            moving_sum = 0
            # counting period_of_time and get new_dict[key]
            for j in range(0, window_size):
                index = current_index - j
                if index < 0:
                    break
                current_dbobj = list(self.main_table.values())[index]
                moving_sum += current_dbobj.value[require_index]

            new_value = [item for item in self.main_table[key].value]
            new_value.append(moving_sum)
            new_dict[key] = DbObject.DbObject(new_value)

        return new_metadata, new_data_type, new_dict
Exemplo n.º 5
0
def read_test():
    for line in fileinput.input():
        # Clean space and change line symbol
        line = line.rstrip().strip().replace(" ", "")
        # Get key information
        assign_name, action_name, action_parameters = GeneralFunction.get_input_action(
            line)
        # Perform db action
        perform_input_action(assign_name, action_name, action_parameters, line)
Exemplo n.º 6
0
    def count_group(self, count_header, group_header):
        count_index = GeneralFunction.get_index_of_metadata(self.metadata, [count_header])
        # get the index of parameters in metadata
        group_index = GeneralFunction.get_index_of_metadata(self.metadata, group_header)

        new_data_type = [self.data_type[i] for i in group_index] + [self.data_type[i] for i in count_index]

        group_dict = {}
        count_dict = {}

        for key in self.main_table:
            new_key_string, group_dict = self.update_count_group_dict(group_dict, key, group_index, count_index)

            if count_dict.get(new_key_string):
                count_dict[new_key_string] += 1
            else:
                count_dict[new_key_string] = 1

        return [header for header in group_header] + ['count_' + count_header], new_data_type, group_dict
Exemplo n.º 7
0
    def sum(self, require_metadata):

        # get the index of parameters in metadata
        require_index = GeneralFunction.get_index_of_metadata(self.metadata, require_metadata)

        total_sum = 0
        for key in self.main_table:
            total_sum += int(self.main_table[key].value[require_index[0]])

        return ['sum_' + require_metadata[0]], [self.data_type[require_index[0]]], {None: DbObject.DbObject([total_sum])}
Exemplo n.º 8
0
def append_to_dict(target_dict, op, item):
    item_split = item.split(op)
    if GeneralFunction.check_is_float(item_split[1]):
        value = float(item_split[1])
    else:
        value = item_split[1]

    if target_dict.get(item_split[0]):
        target_dict[item_split[0]] = target_dict[item_split].append(value)
    else:
        target_dict[item_split[0]] = [value]
Exemplo n.º 9
0
    def average(self, require_metadata):

        # get the index of parameters in metadata
        require_index = GeneralFunction.get_index_of_metadata(self.metadata, require_metadata)
        new_data_type = self.data_type[0]

        total_sum = 0
        for key in self.main_table:
            total_sum += self.main_table[key].value[require_index[0]]
        total_average = total_sum / len(self.main_table)

        return ['avg_' + require_metadata[0]], new_data_type, {None: DbObject.DbObject([total_average])}
Exemplo n.º 10
0
    def join_through_single_index(new_dict, index, main_table, source_table, source_metadata, table1_column, table2_column):
        source_require_index = GeneralFunction.get_index_of_metadata(source_metadata,
                                                                     [table2_column])
        table1_index = index[table1_column]

        for source_key in source_table:
            item_position = []
            source_v = source_table[source_key].value[source_require_index[0]]
            if source_v in table1_index:
                item_position.extend(table1_index[source_v])
            for position in item_position:
                new_value = main_table[position].value
                new_value.extend(source_table[source_key].value)
                new_dict[position + '_' + source_key] = DbObject.DbObject(new_value)
Exemplo n.º 11
0
    def avg_group(self, avg_header, group_header):

        avg_index = GeneralFunction.get_index_of_metadata(self.metadata, [avg_header])
        # get the index of parameters in metadata
        group_index = GeneralFunction.get_index_of_metadata(self.metadata, group_header)

        new_data_type = [self.data_type[i] for i in group_index] + [self.data_type[i] for i in avg_index]

        group_dict = {}
        count_dict = {}

        for key in self.main_table:
            new_key_string, group_dict = self.update_sum_group_dict(group_dict, key, group_index, avg_index)

            if count_dict.get(new_key_string):
                count_dict[new_key_string] += 1
            else:
                count_dict[new_key_string] = 1

        for key in group_dict:
            group_value = group_dict[key].value
            group_value[-1] = group_value[-1] / count_dict[key]
        
        return [header for header in group_header] + ['avg_' + avg_header], new_data_type, group_dict
Exemplo n.º 12
0
    def project(self, require_metadata):

        require_index = GeneralFunction.get_index_of_metadata(self.metadata, require_metadata)

        # Project data type
        data_type = []
        for index, value in enumerate(self.data_type):
            if index in require_index:
                data_type.append(value)

        new_dict = {}

        for key in self.main_table:
            current_value = self.main_table[key].value
            new_dict[key] = DbObject.DbObject([current_value[i] for i in require_index])

        return require_metadata, data_type, new_dict
Exemplo n.º 13
0
    def create_index(self, index_data_type, metadata):

        # get the index of parameters in metadata
        require_index = GeneralFunction.get_index_of_metadata(self.metadata, [metadata])
        # create index based on hash or btree
        if index_data_type == 'hash':
            new_dic = {}
        else:
            new_dic = OOBTree()

        for key in self.main_table:
            new_key = self.main_table[key].value[require_index[0]]
            if new_dic.get(new_key):
                new_dic[new_key].append(key)
            else:
                new_dic[new_key] = [key]

        self.index[metadata] = new_dic
Exemplo n.º 14
0
    def input_from_file(self, file_name, create_metadata_flag):

        for name in file_name:
            file = open('rowData/' + name + '.txt')
            line = file.readline().rstrip().strip().replace(" ", "")
            # Create metadata
            if create_metadata_flag:
                metadata_array = line.lower().split('|')
                self.assign_metadata(metadata_array)
                line = file.readline().rstrip().strip().replace(" ", "")

            # check data type:
            data_type = []
            line_item = line.split('|')
            for item in line_item:
                if GeneralFunction.check_is_float(item):
                    data_type.append('float')
                else:
                    data_type.append('string')

            self.data_type = data_type

            # Input data into dictionary
            iterator = 0
            while line:
                line_item = line.split('|')
                object_array = []
                for index, d_type in enumerate(data_type):
                    matching_value = line_item[index]
                    if d_type == 'float':
                        object_array.append(float(matching_value))
                    else:
                        object_array.append(matching_value)

                self.main_table[str(iterator)] = DbObject.DbObject(object_array)
                line = file.readline().rstrip().strip().replace(" ", "")
                iterator += 1
            file.close()
Exemplo n.º 15
0
def perform_input_action(assign_name, function_name, variables, input_string):
    function_name = function_name.lower()

    start = time.time()

    # Create table in parameter table if the table not exist
    if function_name != 'hash' or function_name != 'hash':
        parameter_assignment_table.create_parameter_assignment_table(
            assign_name)

    # Below actions follow the steps :
    # Get the table from assignment table -> perform the action

    # Action: inputfromfile
    if function_name == 'inputfromfile':
        temp_table = parameter_assignment_table.get_parameter_assignment_table(
            assign_name)
        temp_table.input_from_file(variables, True)
        temp_table.assign_name(assign_name)

    # Action: project
    if function_name == 'project':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.project(variables[1::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: select
    if function_name == 'select':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        meta_data, data_type, new_dict = temp_old_table.select(variables[1])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: sort
    if function_name == 'sort':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.sort(variables[1::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: movavg
    if function_name == 'movavg':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.mov_avg(variables[1::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: avg
    if function_name == 'avg':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.average(variables[1::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: movsum
    if function_name == 'movsum':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.mov_sum(variables[1::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: sumgroup
    if function_name == 'sumgroup':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.sum_group(
            variables[1], variables[2::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: avggroup
    if function_name == 'avggroup':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.avg_group(
            variables[1], variables[2::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Actions: create index
    if function_name == 'hash' or function_name == 'btree':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        temp_old_table.create_index(function_name, variables[1])

    # Action: join
    if function_name == 'join':
        table_parameter1 = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter1)
        table_parameter2 = variables[1]
        temp_old_table2 = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter2)
        meta_data, data_type, new_dict = temp_old_table.join(
            table_parameter1, table_parameter2, temp_old_table2, variables[2])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: concat
    if function_name == 'concat':
        table_parameter1 = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter1)
        table_parameter2 = variables[1]
        temp_old_table2 = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter2)
        meta_data, data_type, new_dict = temp_old_table.concat(temp_old_table2)
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: sum
    if function_name == 'sum':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.sum(variables[1::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: count
    if function_name == 'count':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.count(variables[1::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    # Action: countgroup
    if function_name == 'countgroup':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        # Passing parameter except the first value as first value is table_parameter
        meta_data, data_type, new_dict = temp_old_table.count_group(
            variables[1], variables[2::])
        parameter_assignment_table.insert_parameter_assignment_table(
            assign_name, meta_data, data_type, new_dict)

    if function_name == 'outputtofile':
        table_parameter = variables[0]
        temp_old_table = parameter_assignment_table.get_parameter_assignment_table(
            table_parameter)
        with open('output/' + variables[1] + '.txt', 'w') as file:
            # write metadata at first line
            meta_data_string = ''
            for value in temp_old_table.metadata:
                meta_data_string = meta_data_string + value + '|'
            meta_data_string = meta_data_string[0:len(meta_data_string) - 1]
            file.writelines(meta_data_string + '\n')

            # write values
            for key in temp_old_table.main_table:
                value_string = ''
                current_obj = temp_old_table.main_table[key]
                for value in current_obj.value:
                    if GeneralFunction.check_is_float(str(value)):
                        value_string = value_string + str(round(value,
                                                                4)) + '|'
                    else:
                        value_string = value_string + str(value) + '|'

                value_string = value_string[0:len(value_string) - 1]
                file.writelines(value_string + '\n')

        file.close()

    GeneralFunction.print_time(start, time.time(), function_name, input_string)