Пример #1
0
def insert_dynamo(table_name, dict_data, key_name, force=True):
    aws_session = getSession()
    dynamo = aws_session.client('dynamodb')

    try:
        _ = dynamo.put_item(
            TableName=table_name,
            Item=dict_data,
            ReturnConsumedCapacity='TOTAL',
            ConditionExpression='attribute_not_exists({0})'.format(key_name))

    except botocore.exceptions.ClientError as e:

        if e.response['Error']['Code'] == 'ConditionalCheckFailedException':

            if force:
                pm.print_info('Forcing to rewrite [{0}:{1}]'.format(
                    key_name, dict_data[key_name]))
                pm.print_dict(dict_data)

                _ = dynamo.put_item(TableName=table_name,
                                    Item=dict_data,
                                    ReturnConsumedCapacity='TOTAL')

            else:
                pm.print_warning('Key already exists [{0}:{1}]'.format(
                    key_name, dict_data[key_name]))

        else:
            pm.print_error('Dynamo problem unknown')
            pm.print_error(str(e), exit_code=1)
Пример #2
0
def athena_query(query,
                 athena_database,
                 s3_bucketname,
                 file_remote_path,
                 verbose=True):

    athena = athena_resource()
    output_location = 's3://' + '/'.join([s3_bucketname, file_remote_path
                                          ]) + '/'

    query_result = None
    response = None

    try:
        response = athena.start_query_execution(
            QueryString=query,
            QueryExecutionContext={'Database': athena_database},
            ResultConfiguration={
                'OutputLocation': output_location,
                'EncryptionConfiguration': {
                    'EncryptionOption': 'SSE_S3'
                }
            })

    except ClientError as err:
        pm.print_warning('Athena [{0}] error'.format(athena_database))
        pm.print_error(err.response['Error']['Message'], exit_code=1)

    try:

        while True:
            status = athena.get_query_execution(
                QueryExecutionId=response['QueryExecutionId'])
            current_status = status['QueryExecution']['Status']['State']

            if current_status not in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
                if verbose:
                    pm.print_info_flush(
                        msg='Query Status: {0}'.format(current_status),
                        wait=True)
            elif current_status == 'SUCCEEDED':
                if verbose:
                    pm.print_info_flush(
                        msg='Query Status: {0}'.format(current_status),
                        wait=False)
                query_result = athena.get_query_results(
                    QueryExecutionId=response['QueryExecutionId'])
                break
            else:
                if verbose:
                    pm.print_error('Query {0}'.format(current_status))
                return None

    except ClientError as err:
        pm.print_warning('Athena [{0}] error'.format(athena_database))
        pm.print_error(err.response['Error']['Message'], exit_code=1)

    return query_result
Пример #3
0
def sns_publish(topic, message):

    sns = sns_resource()   

    try:
        response = sns.publish(TopicArn = topic, Message = message)

    except ClientError as err:
        pm.print_warning('SNS [{0}] error')
        pm.print_error(err.response['Error']['Message'], exit_code=1)    

    return response
Пример #4
0
def deletes3(s3_bucketname, file_remote_path):

    if isfiles3(s3_bucketname, file_remote_path):
        s3 = s3_resource()

        try:
            s3.Object(s3_bucketname, file_remote_path).delete()
        except botocore.exceptions.ClientError as e:
            # Something else has gone wrong.
            s3 = None
            pm.print_error('[AWS][S3] Unknown error')
            pm.print_error(str(e))
            pm.print_error('', exit_code=1)
        else:
            s3 = None
            return True

    else:
        pm.print_warning('File [s3://{0}/{1}] does not exist'.format(
            s3_bucketname, file_remote_path))
Пример #5
0
def merge_dicts(a, b, path=None, replacement=True):
    """merges b into a"""
    if path is None:
        path = []
    for key in b:
        if key in a:
            if isinstance(a[key], dict) and isinstance(b[key], dict):
                merge_dicts(a[key], b[key], path + [str(key)])
            elif a[key] == b[key]:
                pass
            elif a[key] != b[key]:
                if replacement:
                    msg = 'Update [{1}] -> [{2}]'.format(key, a[key], b[key])
                    pm.print_warning(msg)
                    a[key] = b[key]  # Child step over
                else:
                    msg = 'Not updated [{1}] -/> [{2}]'.format(
                        key, a[key], b[key])
                    pm.print_warning(msg)
                    pass
        else:
            a[key] = b[key]
    return a
Пример #6
0
def applymodifier(var_value, modifiers=None):
    if modifiers is None:
        modifiers = list()

    import pymake.utils.common.text_modifiers as tm
    allowed_modifiers_names = [
        f[0] for f in getmembers(tm) if isfunction(f[1])
    ]

    if not isinstance(modifiers, list):
        modifiers = [modifiers]
        for m in modifiers:

            import pymake.utils.common.text_modifiers as tm

            if m not in allowed_modifiers_names:
                pm.print_warning(
                    'Unknown modifier [{0}] - Unchanged'.format(m))
            else:
                modifier_func = [f[1] for f in getmembers(tm) if f[0] == m][0]
                var_value = modifier_func(var_value)

    return var_value
Пример #7
0
    def connect(self):

        if not self._connected:

            connection_string = 'DRIVER={ODBC Driver 13 for SQL Server};'
            connection_string += 'SERVER={0};DATABASE={1};UID={2};PWD={3};'.format(self._host,
                                                                                   self._dbname,
                                                                                   self._user,
                                                                                   self._pwd)
            try:
                import pyodbc
            except ImportError:
                pm.print_error('Package pyodbc is not installed')
                try:
                    import pydockerutils
                except ImportError:
                    pm.print_error('You have installation recipes in package pydockerutils @')
                    pm.print_error('  - [https://github.com/nenetto/pydockerutils]')
                    pm.print_error('Exiting', exit_code=1)

                pm.print_warning('Please, run the command install_pyodb from pydockerutils in the shell')


            try:
                self._connection = pyodbc.connect(connection_string)
            except Exception as e:
                pm.print_error('Error connecting to database')
                pm.print_error(str(e))
                pm.print_separator()
                self._connected = False
                return

            self._connected = True

            if self._verbose:
                pm.print_info('Connection Success')
Пример #8
0
def load_env_var_from_dict(envar_dict, prefix='', update=True):

    newprefix = prefix + '_' if prefix != '' else ''

    for k, v in envar_dict.items():
        if isinstance(v, dict):
            load_env_var_from_dict(v, newprefix + k)
        else:
            varname = newprefix + k

            if update and varname in os.environ:
                pm.print_warning(
                    'Updating environment variable [{0}]:[{1}]->[{2}]'.format(
                        varname, os.environ[varname], str(v)))
                os.environ[varname] = str(v)
            elif varname not in os.environ:
                pm.print_info(
                    'Setting environment variable [{0}]:[{1}]'.format(
                        varname, str(v)))
                os.environ[varname] = str(v)
            elif k in os.environ:
                pm.print_warning(
                    'Found enviroment variable [{0}]:[{1}]- no replaced by new value [{2}]'
                    .format(varname, os.environ[varname], str(v)))
Пример #9
0
def separate_numeric_column(df, column_name, verbose=True):
    """
    This function try to convert a column to a numeric. Those values where a number is not found are set to nans

    :param df: dataframe for input
    :type df: pandas.DataFrame
    :param column_name: name of the column to be fixed
    :type column_name: str
    """
    pm.print_info('Fixing column {0}'.format(column_name))
    if verbose:
        pm.print_info('Number of rows {0}'.format(df.shape[0]))

    # Copy column of interest
    dfx = df[[column_name]].copy()

    # Create type variable
    dfx[column_name + '_type'] = 'num'

    n_num = 0
    n_str = 0
    n_nans = 0
    n_others = 0

    total = dfx.shape[0]
    for i, row in dfx.iterrows():
        pm.print_info_percentage(100 * i / total,
                                 'Processing column',
                                 padding=1)

        try:  # Try conversion to number
            x = float(row[column_name])

            if np.isnan(x):
                n_nans += 1

            n_num += 1

        except ValueError:
            dfx.loc[i, column_name + '_type'] = row[column_name]
            dfx.loc[i, column_name] = np.nan
            n_str += 1
        except TypeError:
            n_others += 1

    pm.print_info_percentage(100, 'Processed  column', padding=1)

    n_nums_no_nans = n_num - n_nans

    pm.print_info('Nums: {0}'.format(n_nums_no_nans))
    pm.print_info('Nans: {0}'.format(n_nans))
    pm.print_info('Strs: {0}'.format(n_str))
    pm.print_info('Unkn: {0}'.format(n_others))

    if verbose:
        pm.print_info(
            'Number of different types reduces from {0} to {1}'.format(
                len(df[column_name].unique()),
                len(dfx[column_name + '_type'].unique())))

        pm.print_info('Classification:')
        for e in dfx[column_name + '_type'].unique():

            n = dfx[dfx[column_name + '_type'] == e].shape[0]

            pm.print_info_2('{0} # {1}'.format(e, n), padding=1)

    # check the number of non numeric values

    total_num = dfx[column_name].shape[0]

    if total_num == (n_nums_no_nans + n_nans):
        # Numeric variable - leave as it
        pm.print_warning('Seems to be numeric, please revise')
    elif total_num == n_str:
        # Categorical variable - leave as it
        pm.print_warning('Seems to be categorical, please revise')
        #df[column_name] = df[column_name].astype('str')
    elif total_num == n_others:
        # Unknown type or date - leave as it
        pm.print_warning('Unknown or date, please revise')
    elif n_str > (n_nums_no_nans + n_nans):
        # Categorical variable - leave as it
        pm.print_warning('Seems to be categorical, please revise')
        #df[column_name] = df[column_name].astype('str')
    elif (n_nums_no_nans == 0) and (n_str > 0):
        # Categorical variable - leave as it
        pm.print_warning('Seems to be categorical, please revise')
        #df[column_name] = df[column_name].astype('str')
    else:
        # Mixed variable, do the split in two
        df[column_name] = dfx[column_name].copy().astype('float')
        df[column_name + '_type'] = dfx[column_name +
                                        '_type'].copy().astype('str')
Пример #10
0
def reload_partitions_in_table(athena_database,
                               athena_table,
                               s3_bucketname,
                               file_remote_path,
                               verbose=True):

    if not athena_exist(athena_database, s3_bucketname, file_remote_path,
                        False):
        pm.print_error('Database does not exist', exit_code=1)

    athena = athena_resource()

    output_location = 's3://' + '/'.join([s3_bucketname, file_remote_path
                                          ]) + '/'

    response = None

    try:
        response = athena.start_query_execution(
            QueryString='MSCK REPAIR TABLE {0};'.format(athena_table),
            QueryExecutionContext={'Database': athena_database},
            ResultConfiguration={
                'OutputLocation': output_location,
                'EncryptionConfiguration': {
                    'EncryptionOption': 'SSE_S3'
                }
            })
    except ClientError as err:
        pm.print_error('Reload partitions failed on table [{0}.{1}]'.format(
            athena_database, athena_table))
        pm.print_error(err.response['Error']['Message'], exit_code=1)

    try:

        while True:
            status = athena.get_query_execution(
                QueryExecutionId=response['QueryExecutionId'])
            current_status = status['QueryExecution']['Status']['State']

            if current_status not in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
                if verbose:
                    pm.print_info_flush(
                        msg='Query Status: {0}'.format(current_status),
                        wait=True)
            elif current_status == 'SUCCEEDED':
                if verbose:
                    pm.print_info_flush(
                        msg='Query Status: {0}'.format(current_status),
                        wait=False)
                _ = athena.get_query_results(
                    QueryExecutionId=response['QueryExecutionId'])
                break
            else:
                if verbose:
                    pm.print_error('Query {0}'.format(current_status))
                return None
            time.sleep(5)

    except ClientError as err:
        pm.print_warning('Athena [{0}] error'.format(athena_database))
        pm.print_error(err.response['Error']['Message'], exit_code=1)
    else:
        pm.print_info('Reload partitions succeed on table [{0}.{1}]'.format(
            athena_database, athena_table))