示例#1
0
 def _operation(self, df):
     vars_with_criterion = list(self.__criteria.keys())
     if any(var not in df.vars for var in vars_with_criterion):
         raise KeyError
     result = DataFrame()
     for var in df.vars:
         result.add_column(var)
     for i in range(len(df)):
         row_dict = df.row_as_dict(i)
         add_row = True
         for var in vars_with_criterion:
             test_result = False
             try:
                 test_result = eval(
                     str(var) + " " + str(self.__criteria[var]),
                     {"__builtins__": {}}, row_dict)
             except TypeError:
                 pass
             except Exception as e:
                 raise e
             finally:
                 if isinstance(test_result, bool):
                     add_row *= test_result
                 else:
                     raise TypeError
         if add_row:
             result.add_row(df[None, i])
     return result
示例#2
0
 def _operation(self, group_df):
     nested_group_list = []
     for row in group_df:
         nested_group_list.append(row)
     index_criteria = []
     for var in self.vars:
         is_desc = False
         if var.startswith("desc_"):
             is_desc = True
             var = var[5:]
         index_criterion = group_df.vars.index(var) + 1
         if is_desc:
             index_criterion *= -1
         index_criteria.append(index_criterion)
     default_recursion_limit = getrecursionlimit()
     setrecursionlimit(len(group_df) + 10)
     sorted_group_list = Sort.__merge_sort(nested_group_list,
                                           index_criteria)
     setrecursionlimit(default_recursion_limit)
     result = DataFrame()
     for var in group_df.vars:
         result.add_column(var)
     for row in sorted_group_list:
         result.add_row(row)
     return result
示例#3
0
    def import_json(path, root=None):
        """
        Imports a JSON file as DataFrame.

        Parameters
        ----------
        path : str
            Absolute or relative path to the JSON file to import
        root : str = None
            Name of the root's node to import ; if None, imports the first root node of the file

        Returns
        -------
        DataFrame
            A DataFrame with the contents of the JSON file
        """
        with open(path) as jsonfile:
            data = json.load(jsonfile)
            roots = list(data.keys())
            if len(roots) == 1 or root is None:
                root = roots[0]
            elif root not in roots:
                raise KeyError
        table = data[root]
        table_vars = list(table[0].keys())
        df = DataFrame()
        for var in table_vars:
            df.add_column(var)
        for row in table:
            df.add_row(list(row.values()))
        return df
示例#4
0
 def apply(self, df):
     list_vars = [*df.groups_vars, *self.vars]
     df = Select(*list_vars).apply(df)
     result = DataFrame()
     for var in list_vars:
         result.add_column(var)
     groups = df.groups_df
     for group_df in groups:
         row = []
         for group_var in df.groups_vars:
             row.append(group_df[group_var, 0])
         for var in self.vars:
             col = group_df[var]
             if self.__del_na:
                 col = [val for val in col if val is not None]
             if self.__del_nan:
                 col = [val for val in col if isinstance(val, Number)]
             partial_result = self._operation(col)
             if isinstance(partial_result, dict):
                 keys = list(partial_result.keys())
                 if (var + "_" + keys[0]) not in result.vars:
                     last = var
                     for key in keys:
                         new_var = var + "_" + key
                         result.add_column(new_var, after=last)
                         last = new_var
                     result.del_column(var)
                 row.extend(list(partial_result.values()))
             else:
                 row.append(partial_result)
         result.add_row(row)
     result = GroupBy(*df.groups_vars[:-1]).apply(result)
     return result
 def apply(self, df):
     result = DataFrame()
     groups = df.groups_df
     for group_df in groups:
         transformed_group = self._operation(group_df)
         if len(transformed_group) > 0:
             if len(result.vars) == 0:
                 for var in transformed_group.vars:
                     result.add_column(var)
             for row in transformed_group:
                 result.add_row(row)
     result = GroupBy(*df.groups_vars).apply(result)
     return result
示例#6
0
 def apply(self, df):
     result = DataFrame()
     other_vars = [
         var for var in self.__other.vars
         if var not in list(self.__matches.keys())
     ]
     for var in df.vars:
         result.add_column(var)
     for var in other_vars:
         if var in df.vars:
             result.add_column("Y_" + str(var))
         else:
             result.add_column(var)
     known_matches = {}
     for i in range(len(df)):
         base_row = df[None, i]
         filter_kw = {}
         filter_str = ""
         for key in list(self.__matches.keys()):
             target_value = df[self.__matches[key], i]
             filter_kw[key] = '=="' + str(target_value) + '"'
             filter_str += str(key) + "_" + str(target_value)
         if known_matches.get(filter_str) is None:
             matches = Filter(**filter_kw).apply(self.__other)
             if len(matches) == 0:
                 other_content = [None] * len(other_vars)
             else:
                 other_content = Select(*other_vars).apply(matches)
             known_matches[filter_str] = other_content
         else:
             other_content = known_matches[filter_str]
         if isinstance(other_content, DataFrame):
             for row in other_content:
                 new_row = deepcopy(base_row)
                 new_row.extend(row)
                 result.add_row(new_row)
         else:
             new_row = deepcopy(base_row)
             new_row.extend(other_content)
             result.add_row(new_row)
     return result
示例#7
0
    def import_csv(path, headers=True, delimiter=";", encoding='ISO-8859-1'):
        """
        Imports a CSV file as DataFrame

        Parameters
        ----------
        path : str
            Absolute or relative path to the CSV file to import
        headers : bool = True
            Specify if the file have headers
        delimiter : str = ";"
            Specify the file's delimiter
        encoding : str = 'ISO-8859-1'
            Specify the file's encoding

        Returns
        -------
        DataFrame
            A DataFrame with the contents of the CSV file
        """
        df = DataFrame()
        with open(path, newline='', encoding=encoding) as csv_file:
            reader = csv.reader(csv_file, delimiter=delimiter)
            first_row = True
            for row in reader:
                if first_row:
                    if headers:
                        for var in row:
                            df.add_column(var)
                    else:
                        for i in range(len(row)):
                            name = "Var" + str(i)
                            df.add_column(name)
                    first_row = False
                else:
                    df.add_row(row)
        return df