Exemplo n.º 1
0
def is_property_present_for_df(df, name):
    """
    Check if the property is present for the dataframe

    Args:
        df (pandas dataframe): Input dataframe
        name (str): Property name

    Returns:
        result (bool). Returns True if the property is present for the input dataframe

    Raises:
        AttributeError: If the input dataframe is null
        KeyError: If the dataframe is not present in the catalog

    """
    catalog = Catalog.Instance()
    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if catalog.is_df_info_present_in_catalog(df) is False:
        logger.error('Dataframe information is not present in the catalog')
        raise KeyError('Dataframe information is not present in the catalog')

    return catalog.is_property_present_for_df(df, name)
Exemplo n.º 2
0
def get_all_properties(df):
    """
    Get all the properties for a dataframe

    Args:
        df (pandas dataframe): Dataframe for which the properties must be retrieved

    Returns:
        Property dictionary (dict). The keys are property names (str) and the values are property values (pandas object)

    Raises:
        AttributeError: If the input dataframe is null
        KeyError: If the information about the input dataframe is not present in the catalog

    """
    catalog = Catalog.Instance()

    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if not catalog.is_df_info_present_in_catalog(df):
        logger.error('Dataframe information is not present in the catalog')
        raise KeyError('Dataframe information is not present in the catalog')

    return catalog.get_all_properties(df)
Exemplo n.º 3
0
def set_property(df, name, value):
    """
    Set property for a dataframe

    Args:
        df (pandas dataframe): Dataframe for which the property has to be set
        name (str): Property name
        value (pandas object): Property value

    Returns:
        status (bool). Returns True if the property was set successfully

    Raises:
        AttributeError: If the input dataframe is null

    """
    catalog = Catalog.Instance()

    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if not isinstance(name, six.string_types):
        logger.error('Property name is not of type string')
        raise AssertionError('Property name is not of type string')

    # if df is None:
    #     raise AttributeError('Input dataframe cannot be null')

    if not catalog.is_df_info_present_in_catalog(df):
        catalog.init_properties(df)

    catalog.set_property(df, name, value)
Exemplo n.º 4
0
def show_properties_for_id(obj_id):
    catalog = Catalog.Instance()
    metadata = catalog.get_all_properties_for_id(obj_id)
    print('id: ' + str(obj_id))
    for prop, value in six.iteritems(metadata):
        if isinstance(value, six.string_types):
            print(prop + ": " + value)
        else:
            print(prop + "(obj.id): " + str(id(value)))
Exemplo n.º 5
0
def del_catalog():
    """
    Delete catalog information

    Returns:
        status (bool). Returns True if the deletion was successful.
    """
    catalog = Catalog.Instance()
    return catalog.del_catalog()
Exemplo n.º 6
0
def get_catalog_len():
    """
    Get the number of entries in the catalog

    Returns:
        length (int) of the catalog

    """
    catalog = Catalog.Instance()
    return catalog.get_catalog_len()
Exemplo n.º 7
0
def is_catalog_empty():
    """
    Check if the catalog is empty

    Returns:
        result (bool). Returns True if the catalog is empty, else returns False.

    """
    catalog = Catalog.Instance()
    return catalog.is_catalog_empty()
Exemplo n.º 8
0
def get_catalog():
    """
    Get Catalog information.


    Returns:
        Catalog information in a dictionary format.

    """
    catalog = Catalog.Instance()
    return catalog.get_catalog()
Exemplo n.º 9
0
def has_property(df, prop):
    catalog = Catalog.Instance()
    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if not isinstance(prop, six.string_types):
        logger.error('Property name is not of type string')
        raise AssertionError('Property name is not of type string')

    if not is_dfinfo_present(df):
        logger.error('Dataframe is not in the catalog')
        raise KeyError('Dataframe is not in the catalog')

    p = get_all_properties(df)
    # return p.has_key(prop)
    return prop in p
Exemplo n.º 10
0
def get_property(df, name):
    """
    Get property for a dataframe

    Args:
        df (pandas dataframe): Dataframe for which the property should be retrieved
        name (str): Name of the property that should be retrieved

    Returns:
        Property value (pandas object) for the given property name

    Raises:
        AttributeError: If the input dataframe in null
        KeyError: If the dataframe is not present in the catalog, or the requested property is not
            present in the catalog

    """

    catalog = Catalog.Instance()

    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if not isinstance(name, six.string_types):
        logger.error('Property name is not of type string')
        raise AssertionError('Property name is not of type string')

    # if df is None or pd.isnull(df):
    #     logger.error('Input dataframe cannot be null')
    #     raise AttributeError('Input dataframe cannot be null')

    if not catalog.is_df_info_present_in_catalog(df):
        logger.error('Dataframe information is not present in the catalog')
        raise KeyError('Dataframe information is not present in the catalog')

    if not catalog.is_property_present_for_df(df, name):
        logger.error(
            'Requested metadata ( %s ) for the given dataframe is not present in the catalog'
            % name)
        raise KeyError(
            'Requested metadata ( %s ) for the given dataframe is not present in the catalog'
            % name)

    return catalog.get_property(df, name)
Exemplo n.º 11
0
def set_properties(df, prop_dict, replace=True):
    """
    Set properties for a dataframe in the catalog
    Args:
        df (pandas dataframe): Input dataframe
        prop_dict (dict): Property dictionary with keys as property names and values as python objects
        replace (bool): Flag to indicate whether the input properties can replace the properties in the catalog

    Returns:
        status (bool). Returns True if the setting of properties was successful

    Notes:
        The function is intended to set all the properties in the catalog with the given
        property dictionary.
          The replace flag is just a check where the properties will be not be disturbed
          if they exist already in the
          catalog

    """
    catalog = Catalog.Instance()
    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if not isinstance(prop_dict, dict):
        logger.error('The properties should be of type python dictionary')
        raise AssertionError(
            'The properties should be of type python dictionary')

    if catalog.is_df_info_present_in_catalog(df) and replace is False:
        logger.warning(
            'Properties already exists for df ( %s ). Not replacing it' %
            str(id(df)))
        return False

    if not catalog.is_df_info_present_in_catalog(df):
        catalog.init_properties(df)

    # for k, v in prop_dict.iteritems():
    for k, v in six.iteritems(prop_dict):
        catalog.set_property(df, k, v)
    return True
Exemplo n.º 12
0
def is_dfinfo_present(df):
    """
    Check if the dataframe information is present in the catalog

    Args:
        df (pandas dataframe): Input dataframe

    Returns:
        result (bool). Returns True if the dataframe information is present in the catalog, else returns False

    Raises:
        AttributeError: If the input dataframe is null

    """
    catalog = Catalog.Instance()
    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    return catalog.is_df_info_present_in_catalog(df)
Exemplo n.º 13
0
def copy_properties(src, tar, update=True):
    """
    Copy properties from one dataframe to another
    Args:
        src (pandas dataframe): Dataframe from which the properties to be copied from
        tar (pandas dataframe): Dataframe to which the properties to be copied
        update (bool): Flag to indicate whether the source properties can replace
        the tart properties

    Returns:
        status (bool). Returns True if the copying was successful

    Notes:
        This function internally calls set_properties and get_all_properties


    """
    # copy catalog information from src to tar
    catalog = Catalog.Instance()
    if not isinstance(src, pd.DataFrame):
        logger.error('Input object (src) is not of type pandas data frame')
        raise AssertionError(
            'Input object (src) is not of type pandas data frame')

    if not isinstance(tar, pd.DataFrame):
        logger.error('Input object (tar) is not of type pandas data frame')
        raise AssertionError(
            'Input object (tar) is not of type pandas data frame')

    if catalog.is_df_info_present_in_catalog(src) is False:
        logger.error(
            'Dataframe information (src) is not present in the catalog')
        raise KeyError(
            'Dataframe information (src) is not present in the catalog')

    metadata = catalog.get_all_properties(src)
    return set_properties(tar, metadata,
                          update)  # this initializes tar in the catalog.
Exemplo n.º 14
0
def del_all_properties(df):
    """
    Delete all properties for a dataframe

    Args:
        df (pandas dataframe): Input dataframe for which all the properties must be deleted.

    Returns:
        status (bool). Returns True if the deletion was successful

    Raises:
        AttributeError: If the input dataframe is null
        KeyError: If the dataframe information is not present in the catalog
    """
    catalog = Catalog.Instance()
    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if not catalog.is_df_info_present_in_catalog(df):
        logger.error('Dataframe information is not present in the catalog')
        raise KeyError('Dataframe information is not present in the catalog')

    return catalog.del_all_properties(df)
Exemplo n.º 15
0
def del_property(df, name):
    """
    Delete a property from the catalog

    Args:
        df (pandas dataframe): Input dataframe for which a property must be deleted
        name (str): Property name

    Returns:
        status (bool). Returns True if the deletion was successful

    Raises:
        AttributeError: If the input dataframe is null
        KeyError: If the Dataframe info. is not present or the given property is not present for that dataframe in the
            catalog
    """
    catalog = Catalog.Instance()

    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')

    if not isinstance(name, six.string_types):
        logger.error('Property name is not of type string')
        raise AssertionError('Property name is not of type string')

    if not catalog.is_df_info_present_in_catalog(df):
        logger.error('Dataframe information is not present in the catalog')
        raise KeyError('Dataframe information is not present in the catalog')

    if not catalog.is_property_present_for_df(df, name):
        logger.error('Dataframe information is not present in the catalog')
        raise KeyError('Requested metadata ( %s ) for the given dataframe is '
                       'not present in the catalog' % name)

    return catalog.del_property(df, name)
Exemplo n.º 16
0
from PyQt4 import QtGui

from magellan.catalog.catalog import Catalog

__version__ = '0.1.0'

_catalog = Catalog.Instance()
#
# import catalog related methods
from magellan.catalog.catalog_manager import get_property, get_all_properties, \
    set_property, del_property, del_all_properties
from magellan.catalog.catalog_manager import get_catalog, del_catalog, \
    get_catalog_len, show_properties, show_properties_for_id
from magellan.catalog.catalog_manager import is_property_present_for_df, \
    is_dfinfo_present, is_catalog_empty
from magellan.catalog.catalog_manager import get_key, set_key
#
# # io related methods
#
from magellan.io.parsers import read_csv_metadata, to_csv_metadata
from magellan.io.pickles import load_object, load_table, save_object, save_table
#
#
# # blockers
from magellan.blocker.attr_equiv_blocker import AttrEquivalenceBlocker
from magellan.blocker.black_box_blocker import BlackBoxBlocker
from magellan.blocker.overlap_blocker import OverlapBlocker
from magellan.blocker.rule_based_blocker import RuleBasedBlocker

# # blocker combiner
from magellan.blockercombiner.blockercombiner import combine_blocker_outputs_via_union
Exemplo n.º 17
0
def init_properties(df):
    catalog = Catalog.Instance()
    if not isinstance(df, pd.DataFrame):
        logger.error('Input object is not of type pandas data frame')
        raise AssertionError('Input object is not of type pandas data frame')
    catalog.init_properties(df)