Пример #1
0
    def _extract_models(self, criterion):
        tables = []

        """ This means that the current criterion is involving a constant value: there
            is not information that could be collected about a join between tables. """
        if ":" in str(criterion):
            return
        else:
            """ Extract tables names from the criterion. """
            expressions = [criterion.expression.left, criterion.expression.right] if hasattr(criterion, "expression") else []
            for expression in expressions:
                if str(expression) == "NULL":
                    return
                if hasattr(expression, "foreign_keys"):
                    for foreign_key in getattr(expression, "foreign_keys"):
                        if hasattr(foreign_key, "column"):
                            tables += [foreign_key.column.table]
        tables_objects = getattr(criterion, "_from_objects", [])
        tables_names = map(lambda x: str(x), tables_objects)
        tables += tables_names
        tables = list(set(tables)) # remove duplicate names

        """ Extract the missing entity models from tablenames. """
        current_entities = map(lambda x: x._model, self._models)
        current_entities = filter(lambda x: x is not None, current_entities)
        current_entities_tablenames = map(lambda x: x.__tablename__, current_entities)
        missing_tables = filter(lambda x: x not in current_entities_tablenames, tables)
        missing_tables_names = map(lambda x: str(x), missing_tables)
        missing_entities_names = map(lambda x: get_model_classname_from_tablename(x), missing_tables_names)
        missing_entities_objects = map(lambda x: get_model_class_from_name(x), missing_entities_names)

        """ Add the missing entity models to the models of the current query. """
        missing_models_to_selections = map(lambda x: Selection(x, "id", is_hidden=True), missing_entities_objects)
        self._models += missing_models_to_selections
Пример #2
0
 def _extract_fields(self, tablename):
     from lib.rome.core.models import get_model_class_from_name, get_model_classname_from_tablename
     modelclass_name = get_model_classname_from_tablename(tablename)
     klass = get_model_class_from_name(modelclass_name)
     fields = []
     try:
         fields = map(lambda x: "%s" % (x.key), klass()._sa_instance_state.attrs)
     except:
         fields = map(lambda x: "%s" % (x), klass._sa_class_manager)
     fields += ["_pid", "_metadata_novabase_classname", "_rid", "_nova_classname", "_rome_version_number"]
     fields = sorted(list(set(fields)))
     print("fields@%s => %s" % (tablename, fields))
     return fields
Пример #3
0
    def _extract_table_metadata(self, tablename):
        from lib.rome.core.models import get_model_class_from_name, get_model_classname_from_tablename
        modelclass_name = get_model_classname_from_tablename(tablename)
        klass = get_model_class_from_name(modelclass_name)

        fields = self._extract_fields(tablename)
        corrected_columns = map(lambda x: self._correct_badname(x), fields)
        corrected_columns = filter(lambda x: x!="_rome_version_number", corrected_columns)
        # columns_name_str = ", ".join(map(lambda x: "%s varchar" % (x), corrected_columns))
        columns_associated_with_types_list = map(lambda x: process_column(x, klass), corrected_columns)
        columns_associated_with_types = {}
        for each in columns_associated_with_types_list:
            columns_associated_with_types[each[0]] = each
        self.table_columns_metadata[tablename] = columns_associated_with_types
Пример #4
0
    def _extract_table_metadata(self, tablename):
        from lib.rome.core.models import get_model_class_from_name, get_model_classname_from_tablename
        modelclass_name = get_model_classname_from_tablename(tablename)
        klass = get_model_class_from_name(modelclass_name)

        fields = self._extract_fields(tablename)
        corrected_columns = map(lambda x: self._correct_badname(x), fields)
        corrected_columns = filter(lambda x: x != "_rome_version_number",
                                   corrected_columns)
        # columns_name_str = ", ".join(map(lambda x: "%s varchar" % (x), corrected_columns))
        columns_associated_with_types_list = map(
            lambda x: process_column(x, klass), corrected_columns)
        columns_associated_with_types = {}
        for each in columns_associated_with_types_list:
            columns_associated_with_types[each[0]] = each
        self.table_columns_metadata[tablename] = columns_associated_with_types
Пример #5
0
 def _extract_fields(self, tablename):
     from lib.rome.core.models import get_model_class_from_name, get_model_classname_from_tablename
     modelclass_name = get_model_classname_from_tablename(tablename)
     klass = get_model_class_from_name(modelclass_name)
     fields = []
     try:
         fields = map(lambda x: "%s" % (x.key),
                      klass()._sa_instance_state.attrs)
     except:
         fields = map(lambda x: "%s" % (x), klass._sa_class_manager)
     fields += [
         "_pid", "_metadata_novabase_classname", "_rid", "_nova_classname",
         "_rome_version_number"
     ]
     fields = sorted(list(set(fields)))
     print("fields@%s => %s" % (tablename, fields))
     return fields
Пример #6
0
 def _extract_models(self, criterion):
     tables = []
     """ This means that the current criterion is involving a constant value: there
         is not information that could be collected about a join between tables. """
     if ":" in str(criterion):
         return
     else:
         """ Extract tables names from the criterion. """
         expressions = [
             criterion.expression.left, criterion.expression.right
         ] if hasattr(criterion, "expression") else []
         for expression in expressions:
             if str(expression) == "NULL":
                 return
             if hasattr(expression, "foreign_keys"):
                 for foreign_key in getattr(expression, "foreign_keys"):
                     if hasattr(foreign_key, "column"):
                         tables += [foreign_key.column.table]
     tables_objects = getattr(criterion, "_from_objects", [])
     tables_names = map(lambda x: str(x), tables_objects)
     tables += tables_names
     tables = list(set(tables))  # remove duplicate names
     """ Extract the missing entity models from tablenames. """
     current_entities = map(lambda x: x._model, self._models)
     current_entities = filter(lambda x: x is not None, current_entities)
     current_entities_tablenames = map(lambda x: x.__tablename__,
                                       current_entities)
     missing_tables = filter(lambda x: x not in current_entities_tablenames,
                             tables)
     missing_tables_names = map(lambda x: str(x), missing_tables)
     missing_entities_names = map(
         lambda x: get_model_classname_from_tablename(x),
         missing_tables_names)
     missing_entities_objects = map(lambda x: get_model_class_from_name(x),
                                    missing_entities_names)
     """ Add the missing entity models to the models of the current query. """
     missing_models_to_selections = map(
         lambda x: Selection(x, "id", is_hidden=True),
         missing_entities_objects)
     self._models += missing_models_to_selections
Пример #7
0
def building_tuples(list_results, labels, criterions, hints=[]):
    from lib.rome.core.rows.rows import get_attribute, set_attribute, has_attribute
    mode = "experimental"
    if mode is "cartesian_product":
        cartesian_product = []
        for element in itertools.product(*list_results):
            cartesian_product += [element]
        return cartesian_product
    elif mode is "experimental":
        steps = zip(list_results, labels)
        candidates_values = {}
        candidates_per_table = {}
        joining_criterions = []
        non_joining_criterions = {}
        # Initialising candidates per table
        for each in labels:
            candidates_per_table[each] = {}
        # Collecting joining expressions
        for criterion in criterions:
            # if criterion.operator in  "NORMAL":
            for exp in criterion.exps:
                for joining_criterion in extract_joining_criterion(exp):
                    foo = [x for x in joining_criterion if x is not None]
                    if len(foo) > 1:
                        joining_criterions += [foo]
                    else:
                        # Extract here non joining criterions, and use it to filter objects
                        # that are located in list_results
                        exp_criterions = ([x for x in flatten(joining_criterion) if x is not None])
                        for non_joining_criterion in exp_criterions:
                            tablename = non_joining_criterion["table"]
                            column = non_joining_criterion["column"]
                            if not tablename in non_joining_criterions:
                                non_joining_criterions[tablename] = []
                            non_joining_criterions[tablename] += [{
                                "tablename": tablename,
                                "column": column,
                                "exp": exp,
                                "criterion": criterion
                            }]
        # # Filtering list_of_results with non_joining_criterions
        # corrected_list_results = []
        # for results in list_results:
        #     cresults = []
        #     for each in results:
        #         tablename = each["nova_classname"]
        #         if tablename in non_joining_criterions:
        #             do_add = True
        #             for criterion in non_joining_criterions[tablename]:
        #                 if not criterion["criterion"].evaluate(KeyedTuple([each], labels=[tablename])):
        #                     do_add = False
        #                     break
        #             if do_add:
        #                 cresults += [each]
        #     corrected_list_results += [cresults]
        # list_results = corrected_list_results
        # Consolidating joining criterions with data stored in relationships
        done_index = {}
        for step in steps:
            tablename = step[1]
            model_classname = get_model_classname_from_tablename(tablename)
            fake_instance = get_model_class_from_name(model_classname)()
            relationships = fake_instance.get_relationships()
            for r in relationships:
                criterion = extract_joining_criterion_from_relationship(r, tablename)
                key1 = criterion[0]["table"]+"__"+criterion[1]["table"]
                key2 = criterion[1]["table"]+"__"+criterion[0]["table"]
                if key1 not in done_index and key2 not in criterion[0]["table"] in labels and criterion[1]["table"] in labels:
                    joining_criterions += [criterion]
                    done_index[key1] = True
                    done_index[key2] = True
                pass
        # Collecting for each of the aforementioned expressions, its values <-> objects
        if len(joining_criterions) > 0:
            for criterion in joining_criterions:
                for each in criterion:
                    key = "%s.%s" % (each["table"], each["column"])
                    index_list_results = labels.index(each["table"])
                    objects = list_results[index_list_results]
                    if not candidates_values.has_key(key):
                        candidates_values[key] = {}
                    for object in objects:
                        value_key = get_attribute(object, each["column"])
                        skip = False
                        for hint in hints:
                            if each["table"] == hint.table_name and hint.attribute in object and object[hint.attribute] != hint.value:
                                skip = True
                                break
                        if not skip:
                            if not candidates_values[key].has_key(value_key):
                                candidates_values[key][value_key] = {}
                            object_hash = str(object).__hash__()
                            object_table = object["nova_classname"]
                            candidates_values[key][value_key][object_hash] = {"value": value_key, "object": object}
                            candidates_per_table[object_table][object_hash] = object
        else:
            for each in steps:
                for each_object in each[0]:
                    object_hash = str(each_object).__hash__()
                    object_table = each_object["nova_classname"]
                    candidates_per_table[object_table][object_hash] = each_object
        # Progressively reduce the list of results
        results = []
        processed_models = []
        if len(steps) > 0:
            step = steps[0]
            results = map(lambda  x: [candidates_per_table[step[1]][x]], candidates_per_table[step[1]])
            processed_models += [step[1]]
        remaining_models = map(lambda x:x[1], steps[1:])
        for step in steps[1:]:
            for criterion in joining_criterions:
                criterion_models = map(lambda x: x["table"], criterion)
                candidate_models = [step[1]] + processed_models
                if len(intersect(candidate_models, criterion_models)) > 1:
                    processed_models += [step[1]]
                    remaining_models = filter(lambda x: x ==step[1], remaining_models)
                    # try:
                    current_criterion_option = filter(lambda x:x["table"]==step[1], criterion)
                    remote_criterion_option = filter(lambda x:x["table"]!=step[1], criterion)
                    if not (len(current_criterion_option) > 0 and len(remote_criterion_option) > 0):
                        continue
                    current_criterion_part = current_criterion_option[0]
                    remote_criterion_part = remote_criterion_option[0]
                    new_results = []
                    for each in results:
                        existing_tuple_index = processed_models.index(remote_criterion_part["table"])
                        existing_value = get_attribute(each[existing_tuple_index], remote_criterion_part["column"])
                        if existing_value is not None:
                            key = "%s.%s" % (current_criterion_part["table"], current_criterion_part["column"])
                            candidates_value_index = candidates_values[key]
                            candidates = candidates_value_index[existing_value] if existing_value in candidates_value_index else {}
                            for candidate_key in candidates:
                                new_results += [each + [candidates[candidate_key]["object"]]]
                    results = new_results
                    break
                continue
        return results
Пример #8
0
def default_panda_building_tuples(lists_results, labels, criterions, hints=[]):
    """ Build tuples (join operator in relational algebra). """
    """ Create the Dataframe indexes. """
    dataframes = []
    dataindex = {}
    substitution_index = {}
    normal_keys_index = {}
    refactored_keys_index = {}
    normal_keys_to_key_index = {}
    refactored_keys_to_key_index = {}
    refactored_keys_to_table_index = {}
    index = 0

    classname_index = {}
    for each in labels:
        classname_index[each] = get_model_classname_from_tablename(each)
    # if len(lists_results) == 1:
    #     return map(lambda x: [x], lists_results[0])

    for list_results in lists_results:
        label = labels[index]
        if len(list_results) == 0:
            continue
        keys = map(lambda x: x, list_results[0]) + ["created_at", "updated_at"]

        dataframe = pd.DataFrame(data=list_results, columns=keys)

        for value in keys:
            normal_key = "%s.%s" % (label, value)
            refactored_keys = "%s___%s" % (label, value)
            refactored_keys_to_table_index[refactored_keys] = label
            normal_keys_to_key_index[normal_key] = value
            refactored_keys_to_key_index[refactored_keys] = value
        normal_keys = map(lambda x: "%s.%s" % (label, x), keys)
        normal_keys_index[label] = normal_keys
        refactored_keys = map(lambda x: "%s___%s" % (label, x), keys)
        refactored_keys_index[label] = refactored_keys
        for (a, b) in zip(normal_keys, refactored_keys):
            substitution_index[a] = b
        dataframe.columns = refactored_keys
        dataframes += [dataframe]
        """ Index the dataframe and create a reverse index. """
        dataindex[label] = index
        index += 1
    """ Collecting joining expressions. """
    joining_pairs = []
    non_joining_criterions = []
    _joining_pairs_str_index = {}
    _nonjoining_criterions_str_index = {}
    for criterion in criterions:
        _joining_pairs = criterion.extract_joining_pairs()
        _nonjoining_criterions = criterion.extract_nonjoining_criterions()

        _nonjoining_criterions_str = str(_nonjoining_criterions)

        if len(_joining_pairs) > 0:
            _joining_pairs_str = str(sorted(_joining_pairs[0]))
            if not _joining_pairs_str in _joining_pairs_str_index:
                _joining_pairs_str_index[_joining_pairs_str] = 1
                joining_pairs += _joining_pairs
        if not _nonjoining_criterions_str in _nonjoining_criterions_str_index:
            _nonjoining_criterions_str_index[_nonjoining_criterions_str] = 1
            non_joining_criterions += _nonjoining_criterions
    """ Construct the resulting rows. """
    if len(labels) > 1 and len(filter(lambda x: len(x) == 0,
                                      lists_results)) > 0:
        return []

    result = None

    if len(lists_results) > 1:
        processed_tables = []
        for joining_pair in joining_pairs:
            """ Preparing the tables that will be joined. """

            attribute_1 = joining_pair[0].strip()
            attribute_2 = joining_pair[1].strip()
            tablename_1 = attribute_1.split(".")[0]
            tablename_2 = attribute_2.split(".")[0]

            if tablename_1 not in dataindex or tablename_2 not in dataindex:
                return []
            index_1 = dataindex[tablename_1]
            index_2 = dataindex[tablename_2]
            dataframe_1 = dataframes[
                index_1] if not tablename_1 in processed_tables else result
            dataframe_2 = dataframes[
                index_2] if not tablename_2 in processed_tables else result

            refactored_attribute_1 = attribute_1.split(
                ".")[0] + "___" + attribute_1.split(".")[1]
            refactored_attribute_2 = attribute_2.split(
                ".")[0] + "___" + attribute_2.split(".")[1]
            """ Join the tables. """
            try:
                result = pd.merge(dataframe_1,
                                  dataframe_2,
                                  left_on=refactored_attribute_1,
                                  right_on=refactored_attribute_2,
                                  how="outer")
                drop_y(result)
                rename_x(result)
            except KeyError:
                return []
            """ Update the history of processed tables. """
            processed_tables += [tablename_1, tablename_2]
            processed_tables = list(set(processed_tables))
    """ Filtering rows. """
    if result is None:
        if len(dataframes) == 0:
            return []
        result = dataframes[0]

    for non_joining_criterion in non_joining_criterions:
        expression_str = non_joining_criterion.raw_expression
        for value in substitution_index:
            if value in expression_str:
                corresponding_key = substitution_index[value]
                expression_str = expression_str.replace(
                    value, corresponding_key)
        try:
            corrected_expression = correct_boolean_int(expression_str)
            corrected_expression = correct_expression_containing_none(
                corrected_expression)
            result = result.query(corrected_expression)
        except:
            pass
    """ Building the rows. """
    rows = []
    columns_indexes = {}
    label_indexes = {}
    i = 0
    for refactored_key in result.columns.values:
        columns_indexes[refactored_key] = i
        i += 1
    i = 0
    for label in labels:
        label_indexes[label] = i
        i += 1

    transposed_result = result.transpose()
    dict_values = transposed_result.to_dict()
    for value in dict_values.values():
        row = map(lambda x: {}, labels)
        for ci in value:
            table = refactored_keys_to_table_index[ci]
            table_index = label_indexes[table]
            key = refactored_keys_to_key_index[ci]
            v = value[ci]
            if type(v) is float and math.isnan(v):
                v = 0
            if key == "_metadata_novabase_classname":
                v = classname_index[table]
            row[table_index][key] = v
        rows += [row]
    return rows
Пример #9
0
def building_tuples(list_results, labels, criterions, hints=[]):
    mode = "not_cartesian_product"
    if mode is "cartesian_product":
        cartesian_product = []
        for element in itertools.product(*list_results):
            cartesian_product += [element]
        return cartesian_product
    else:
        # construct dicts that will keep a ref on objects according to their "id" and "uuid" fields.
        indexed_results = {}
        for i in zip(list_results, labels):
            (results, label) = i
            dict_result = {"id": {}, "uuid": {}}
            for j in results:
                if has_attribute(j, "id"):
                    dict_result["id"][get_attribute(j, "id")] = j
                if has_attribute(j, "uuid"):
                    dict_result["uuid"][get_attribute(j, "uuid")] = j
            indexed_results[label] = dict_result
        # find iteratively pairs that matches according to relationship modelisation
        tuples = []
        tuples_labels = []
        # initialise tuples
        count = 0
        for i in zip(list_results, labels):
            (results, label) = i
            tuples_labels += [label]
            for j in results:
                current_tuple = {label: j}
                tuples += [current_tuple]
            break
        # increase model of exisintg tuples
        count == 0
        for i in zip(list_results, labels):
            if count == 0:
                count += 1
                continue
            (results, label) = i
            tuples_labels += [label]
            # iterate on tuples
            for t in tuples:
                # iterate on existing elements of the current rows
                keys = t.keys()
                for e in keys:
                    model_classname = get_model_classname_from_tablename(e)
                    fake_instance = get_model_class_from_name(model_classname)()
                    relationships = fake_instance.get_relationships()
                    for r in relationships:
                        if r.local_fk_field in ["id", "uuid"]:
                            continue
                        remote_label_name = r.remote_object_tablename
                        if remote_label_name in indexed_results:
                            local_value = get_attribute(t[e], r.local_fk_field)
                            if local_value is not None:
                                try:
                                    remote_candidate = indexed_results[remote_label_name][r.remote_object_field][local_value]
                                    t[remote_label_name] = remote_candidate
                                except Exception as e:
                                    logging.error(e)
                                    traceback.print_exc()
            tuple_groupby_size = {}
            for t in tuples:
                tuple_size = len(t)
                if not tuple_size in tuple_groupby_size:
                    tuple_groupby_size[tuple_size] = []
                tuple_groupby_size[tuple_size] += [t]
            if len(tuple_groupby_size.keys()) > 0:
                max_size = max(tuple_groupby_size.keys())
                tuples = tuple_groupby_size[max_size]
            else:
                tuples = []
        # reordering tuples
        results = []
        for t in tuples:
            if len(t) == len(labels):
                ordered_t = [t[i] for i in labels]
                results += [tuple(ordered_t)]
        return results