Exemplo n.º 1
0
def get_delta(source_table, target_table, key='id'):
    source_table_headers = etl.header(source_table)
    target_table_headers = etl.header(target_table)

    if source_table_headers != target_table_headers:
        raise Exception(
            'Source table columns do not match target table columns')

    source_ids = etl.cut(source_table, key)
    target_ids = etl.cut(target_table, key)
    added_ids_table, _ = etl.diff(source_ids, target_ids)

    merged_table = etl.merge(source_table, target_table, key=key)

    load_frame = etl.todataframe(
        etl.selectin(target_table, key, etl.values(added_ids_table, key)))
    print(load_frame)

    for row in etl.data(merged_table):
        for i, col in enumerate(row):
            if isinstance(col, etl.transform.reductions.Conflict):
                changes = tuple(col)
                print('For car {}, {} changed from {} to {}'.format(
                    row[0], source_table_headers[i], changes[1], changes[0]))
                row_dict = dict(zip(source_table_headers, list(row)))
                row_dict[source_table_headers[i]] = changes[0]
                row_dict = {key: [val] for (key, val) in row_dict.items()}
                print(row_dict)
                df = pd.DataFrame(row_dict)
                load_frame = load_frame.append(df, ignore_index=True)
                break

    return etl.fromdataframe(load_frame)
Exemplo n.º 2
0
def dataPreProcessing(fileName):
    inputData = fromcsv(fileName)
    table1 = cutout(inputData, 'member_id', 'grade', 'sub_grade', 'emp_title',
                    'url', 'desc', 'title', 'accept_d', 'exp_d', 'list_d',
                    'issue_d', 'purpose', 'addr_city', 'addr_state',
                    'earliest_cr_line', 'last_pymnt_d', 'next_pymnt_d',
                    'last_credit_pull_d')
    table2 = select(
        table1,
        lambda i: i['term'] == ' 36 months' and i['loan_status'] is not "")
    labelMapping = OrderedDict()
    labelMapping['loan_status'] = 'loan_status'
    labelMapping['id'] = 'id'
    table6 = fieldmap(table2, labelMapping)
    table8 = sort(table6, 'id')
    table10 = cutout(table8, 'id')
    mappings = OrderedDict()
    mappings['id'] = 'id'
    mappings['home_ownership'] = 'ownership', {
        'MORTGAGE': '-1',
        'RENT': '0',
        'OWN': '1'
    }
    mappings['emp_length'] = 'empLength', {'n/a': 0}
    mappings['is_inc_v'] = 'verificationStatus', {
        'Source Verified': 1,
        'Verified': 0,
        'Not Verified': -1
    }
    mappings['pymnt_plan'] = 'paymentPlan', {'n': 0, 'y': 1}
    mappings['initial_list_status'] = 'listStatus', {'f': 0, 'w': 1}
    table3 = fieldmap(table2, mappings)
    table4 = cutout(table2, 'home_ownership', 'is_inc_v', 'pymnt_plan',
                    'initial_list_status', 'term', 'loan_status')
    table5 = merge(table3, table4, key='id')
    table7 = sort(table5, 'id')
    table9 = cutout(table7, 'id')
    featureFileCsv = tocsv(table9, 'featureFileCsv.csv')
    labelsFileCsv = tocsv(table10, 'labelsFileCsv.csv')
    return featureFileCsv, labelsFileCsv
Exemplo n.º 3
0
# magic command for IPython display
# locs_only_in_a.displayall(caption='a only')

locs_only_in_b = b_locs.complement(a_locs)
b_only = locs_only_in_b.nrows()

print("B only rows: {}".format(b_only))

# Export missing locations to csv
if a_only > 0:
    locs_only_in_a.tocsv('missing_locations_a.csv')
else:
    locs_only_in_b.tocsv('missing_locations_b.csv')

# find conflicts between A/B on Chr and Pos columns
ab_merge = etl.merge(a_conv, b_conv, key=('Chr', 'Pos'))
# magic command for IPython display
# ab_merge.display(caption='ab_merge',
#                  td_styles=lambda v: highlight if isinstance(v, etl.Conflict) else '')

# Create a new list of all conflicting values
ab = etl.cat(a_conv.addfield('source', 'a', index=0),
             b_conv.addfield('source', 'b', index=0))
ab_conflicts = ab.conflicts(key=('Chr', 'Pos'), exclude='source')

# magic command for IPython display
# ab_conflicts.display(10)

# Highlight specific conflicts
ab_conflicts_mut = ab.conflicts(key=('Chr', 'Pos'), include='Mut')
Exemplo n.º 4
0
# merge

table1 = [['foo', 'bar', 'baz'],
          [1, 'A', True],
          [2, 'B', None],
          [4, 'C', True]]
table2 = [['bar', 'baz', 'quux'],
          ['A', True, 42.0],
          ['B', False, 79.3],
          ['C', False, 12.4]]

from petl import look, merge
look(table1)
look(table2)
table3 = merge(table1, table2, key='bar')
look(table3)


# aggregate
table1 = [['foo', 'bar', 'baz'],
          ['a', 3, True],
          ['a', 7, False],
          ['b', 2, True],
          ['b', 2, False],
          ['b', 9, False],
          ['c', 4, True]]
from petl import aggregate, look
look(table1)
# aggregate whole rows
table2 = aggregate(table1, 'foo', len)
Exemplo n.º 5
0
# merge

table1 = [['foo', 'bar', 'baz'],
          [1, 'A', True],
          [2, 'B', None],
          [4, 'C', True]]
table2 = [['bar', 'baz', 'quux'],
          ['A', True, 42.0],
          ['B', False, 79.3],
          ['C', False, 12.4]]

from petl import look, merge
look(table1)
look(table2)
table3 = merge(table1, table2, key='bar')
look(table3)


# aggregate

table1 = [['foo', 'bar'],
          ['a', 3],
          ['a', 7],
          ['b', 2],
          ['b', 1],
          ['b', 9],
          ['c', 4],
          ['d', 3],
          ['d'],
          ['e']]
Exemplo n.º 6
0
def transform(mmj_menu_items, mmj_categories, prices, organization_id,
              source_db, debug):
    """
    Transform data
    """
    # source data table
    source_dt = utils.view_to_list(mmj_menu_items)

    cut_menu_data = [
        'id', 'vendor_id', 'menu_id', 'dispensary_id', 'strain_id',
        'created_at', 'updated_at', 'category_id', 'name', 'sativa', 'indica',
        'on_hold', 'product_type', 'image_file_name', 'medicine_amount',
        'product_type'
    ]

    cut_prices = [
        'menu_item_id', 'dispensary_id', 'price_half_gram', 'price_gram',
        'price_two_gram', 'price_eigth', 'price_quarter', 'price_half',
        'price_ounce'
    ]

    # Cut out all the fields we don't need to load
    menu_items = etl.cut(source_dt, cut_menu_data)
    prices_data = etl.cut(prices, cut_prices)

    menu_items = (etl.addfield(
        menu_items, 'createdAtEpoch').addfield('unitOfMeasure').addfield(
            'locationProductDetails').addfield('keys').addfield('restockLevel')
                  )

    # Two-step transform and cut. First we need to cut the name
    # and id from the source data to map to.
    cut_source_cats = etl.cut(mmj_categories, 'name', 'id', 'measurement')
    source_values = etl.values(cut_source_cats, 'name', 'id')

    # Then we nede a dict of categories to compare against.
    # id is stored to match against when transforming and mapping categories
    mmj_categories = dict([(value, id) for (value, id) in source_values])

    mappings = OrderedDict()
    mappings['id'] = 'id'
    mappings['createdAt'] = 'created_at'
    mappings['updatedAt'] = 'updated_at'
    mappings['createdAtEpoch'] = lambda x: utils.create_epoch(x.created_at)
    mappings['name'] = 'name'
    mappings['shareOnWM'] = lambda x: _wm_integration(x.id, source_db)
    """
    1 = Units
    2 = Grams (weight)
    """
    mappings['unitOfMeasure'] = \
        lambda x: _map_uom(x.category_id, source_db)

    fields = etl.fieldmap(menu_items, mappings)
    data = etl.merge(menu_items, fields, key='id')

    items = []
    for item in etl.dicts(data):

        breakpoint_pricing = (etl.select(
            prices_data,
            lambda x: x.dispensary_id == item['dispensary_id']).rename({
                'price_eigth':
                'price_eighth'
            }).cutout('menu_item_id'))
        # Set image url for load to download
        url = None
        if debug and item['image_file_name'] is not None:
            url = ("https://wm-mmjmenu-images-development.s3."
                   "amazonaws.com/menu_items/images/{0}/large/"
                   "{1}").format(item['id'], item['image_file_name'])
        elif item['image_file_name'] is not None:
            url = ("https://wm-mmjmenu-images-production.s3."
                   "amazonaws.com/menu_items/images/{0}/large/"
                   "{1}").format(item['id'], item['image_file_name'])

        item['image_file_name'] = url

        item['categoryId'] = _map_categories(item['category_id'],
                                             item['sativa'], item['indica'],
                                             mmj_categories, menu_items)
        item['keys'] = {
            'dispensary_id': item['dispensary_id'],
            'id': item['id'],
            'menu_id': item['menu_id'],
            'vendor_id': item['vendor_id'],
            'strain_id': item['strain_id'],
            'category_id': item['category_id']
        }

        # set a default netMJ value if the menu item is a unit product
        if item['unitOfMeasure'] is 2:
            item['netMarijuana'] = int(item['medicine_amount'])

        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        item['locationProductDetails'] = {
            'id': item['id'],
            'active': _active(item['on_hold'])
        }

        item['restockLevel'] = _restock_level(item['dispensary_id'],
                                              item['product_type'], source_db)

        if item['shareOnWM'] is None:
            item['shareOnWM'] = False

        for price in etl.dicts(breakpoint_pricing):
            try:
                price_two_gram = price['price_two_gram']
            except KeyError:
                price_two_gram = 0.0

            item['locationProductDetails']['weightPricing'] = {
                'price_half_gram':
                utils.dollars_to_cents(price['price_half_gram']),
                'price_two_gram': utils.dollars_to_cents(price_two_gram),
                'price_gram': utils.dollars_to_cents(price['price_gram']),
                'price_eighth': utils.dollars_to_cents(price['price_eighth']),
                'price_quarter':
                utils.dollars_to_cents(price['price_quarter']),
                'price_half': utils.dollars_to_cents(price['price_half']),
                'price_ounce': utils.dollars_to_cents(price['price_ounce'])
            }

        del item['vendor_id']
        del item['indica']
        del item['dispensary_id']
        del item['id']
        del item['strain_id']
        del item['on_hold']
        del item['menu_id']
        del item['sativa']
        del item['category_id']
        del item['updated_at']
        del item['created_at']
        del item['product_type']

        if item['image_file_name'] is None:
            del item['image_file_name']

        # set up final structure for API
        items.append(item)

    # Remove inactive items
    for item in items:
        if item['locationProductDetails']['active'] is False:
            items.remove(item)

    if debug:
        result = json.dumps(items,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)

    return items
Exemplo n.º 7
0
def transform(source_data, organization_id, debug):
    """
    Load the transformed data into the destination(s)
    """
    # source data table
    source_dt = utils.view_to_list(source_data)
    cut_data = [
        'id', 'dispensary_id', 'mmjvenu_id', 'name', 'phone_number', 'email',
        'country', 'state', 'city', 'address', 'zip_code', 'liscense_no',
        'confirmed', 'website'
    ]
    vendor_data = etl.cut(source_dt, cut_data)

    vendor_mappings = OrderedDict()
    vendor_mappings['id'] = 'id'
    vendor_mappings['dispensary_id'] = 'dispensary_id'
    vendor_mappings['address'] = 'address'

    # field renames
    vendor_mappings['accountStatus'] = \
        lambda x: "ACTIVE" if x.confirmed == 1 else "INACTIVE"
    vendor_mappings['phone'] = 'phone_number'
    vendor_mappings['licenceNumber'] = 'liscense_no'
    vendor_mappings['zip'] = 'zip_code'

    vendors_fields = etl.fieldmap(vendor_data, vendor_mappings)
    merged_vendors = etl.merge(vendor_data, vendors_fields, key='id')

    vendors = []
    for item in etl.dicts(merged_vendors):
        if item['address'] is not None:
            item['address'] = {
                'line1': item['address'],
                'line2': None,
                'city': item['city'],
                'state': item['state'],
                'zip': item['zip'],
                'country': item['country'],
            }
        else:
            del item['address']

        if item['licenceNumber'] is None or item['email'] is None or item[
                'website'] is None:
            del item['licenceNumber']
            del item['email']
            del item['website']

        if item['phone'] is not None:
            item['phone'] = [{
                'name': 'business',
                'number': item['phone'],
                'default': True
            }]
        else:
            del item['phone']

        item['keys'] = {
            'dispensary_id': item['dispensary_id'],
            'id': item['id'],
            'mmjvenu_id': item['mmjvenu_id']
        }

        # remove any item['keys'] tuples with None values
        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        # mutate dict and remove fields that are mapped and no longer required
        del item['zip']
        del item['state']
        del item['country']
        del item['city']
        del item['zip_code']
        del item['phone_number']
        del item['confirmed']
        del item['liscense_no']
        # delete fk's
        del item['mmjvenu_id']
        del item['id']
        del item['dispensary_id']

        # set up final structure for API
        vendors.append(item)

    if debug:
        result = json.dumps(vendors,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)

    return vendors
Exemplo n.º 8
0
def transform(mmj_employees, organization_id, debug, fake_email, source_db):
    """
    Load the transformed data into the destination(s)
    """
    # source data table
    source_dt = utils.view_to_list(mmj_employees)
    cut_data = [
        'id', 'email', 'first_name', 'organization_id', 'last_name',
        'created_at', 'updated_at', 'login'
    ]

    employee_data = etl.cut(source_dt, cut_data)

    employees = (etl.addfield(
        employee_data,
        'keys').addfield('name').addfield('role').addfield('dateOfBirth'))

    mappings = OrderedDict()
    mappings['id'] = 'id'
    mappings['name'] = \
        lambda name: _set_name(name.first_name, name.last_name, name.login)
    """
    Roles:
        1 = site-admin
        2 = site-admin
        3 = store-manager
        4 = budtender
    """
    mappings['role'] = lambda x: _assign_role(x.id, source_db)

    mappings['createdAt'] = 'created_at'
    mappings['updatedAt'] = 'updated_at'
    mappings['dateOfBirth'] = \
        lambda _: datetime.datetime(year=1970, month=01,
                                    day=01, hour=02, minute=30)
    mappings['organization_id'] = 'organization_id'  # keep mmj org
    mappings['accountStatus'] = lambda x: _active(x.id, source_db)

    fields = etl.fieldmap(employees, mappings)
    merged_employees = etl.merge(employees, fields, key='id')

    mapped_employees = []
    for item in etl.dicts(merged_employees):
        item['keys'] = {
            'id': item['id'],
            'organization_id': item['organization_id']
        }

        # remove any item['keys'] tuples with None values
        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]

        item['email'] = _set_email(item['email'], fake_email, debug)

        del item['login']
        del item['first_name']
        del item['last_name']
        del item['created_at']
        del item['id']
        del item['organization_id']
        # set up final structure for API
        mapped_employees.append(item)

    if debug:
        result = json.dumps(mapped_employees,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)

    return mapped_employees
Exemplo n.º 9
0
import petl as etl

table1 = [['foo', 'bar', 'baz'], ['A', 1, 2.7], ['B', 2, None], ['D', 3, 9.4],
          ['B', None, 7.8], ['E', None, 42.], ['D', 3, 12.3], ['A', 2, None]]
table2 = etl.mergeduplicates(table1, 'foo')
table2

# merge()
#########

import petl as etl

table1 = [['foo', 'bar', 'baz'], [1, 'A', True], [2, 'B', None],
          [4, 'C', True]]
table2 = [['bar', 'baz', 'quux'], ['A', True, 42.0], ['B', False, 79.3],
          ['C', False, 12.4]]
table3 = etl.merge(table1, table2, key='bar')
table3

# fold()
########

import petl as etl

table1 = [['id', 'count'], [1, 3], [1, 5], [2, 4], [2, 8]]
import operator

table2 = etl.fold(table1, 'id', operator.add, 'count', presorted=True)
table2
Exemplo n.º 10
0
def transform(dispensary_details, pricing, organization_id, debug, source_db):
    """
    Load the transformed data into the destination(s)
    """
    # source data table
    general_settings = utils.view_to_list(dispensary_details)
    pricing_detail = utils.view_to_list(pricing)

    dispensary_cut_data = [
        'id', 'dispensary_id', 'menu_show_tax', 'logo_file_name',
        'inactivity_logout', 'calculate_even_totals',
        'require_customer_referrer', 'membership_fee_enabled', 'pp_enabled',
        'pp_global_dollars_to_points', 'pp_global_points_to_dollars',
        'pp_points_per_referral', 'allow_unpaid_visits', 'red_flags_enabled',
        'mmjrevu_api_key'
    ]

    pricing_cut_data = [
        'id', 'price_half_gram', 'price_gram', 'price_two_gram', 'price_eigth',
        'price_quarter', 'price_half', 'price_ounce'
    ]

    dispensary_settings_data = etl.cut(general_settings, dispensary_cut_data)
    pricing_data = etl.cut(pricing_detail, pricing_cut_data)

    settings = (etl.addfield(dispensary_settings_data, 'organizationId'))

    mappings = OrderedDict()
    mappings['id'] = 'id'

    # field renames
    mappings['organizationId'] = organization_id

    settings_fields = etl.fieldmap(settings, mappings)
    merged_settings = (
        etl.merge(settings, settings_fields, key='id').rename({
            # Global -> General -> SESSION TIMEOUT DURATION
            'inactivity_logout':
            'sessionTimeoutDuration',
            # Global -> Logo
            'logo_file_name':
            'image',
            # Global -> Members -> Membership Level
            'membership_fee_enabled':
            'membershipLevelsEnabled',
            'pp_global_dollars_to_points':
            'dollarsPerPoint',
            'pp_global_points_to_dollars':
            'pointsPerDollar',
            'pp_points_per_referral':
            'referralPoints',

            # <Location> -> Sales -> TAXES IN
            'menu_show_tax':
            'enableTaxesIn',
            # <Location> -> Sales -> PRICE ROUNDING
            'calculate_even_totals':
            'hasPriceRounding',
            # <Location> -> Members -> REFERRER REQUIRED
            'require_customer_referrer':
            'mandatoryReferral',
            # <Location> -> Members -> PAID VISITS
            'allow_unpaid_visits':
            'paidVisitsEnabled',
            # <Location> -> Members -> MEDICAL MEMBERS
            'red_flags_enabled':
            'hasLimits',
            # <Location> -> General -> STORE LOCATIONS
            'mmjrevu_api_key':
            'apiKey'
        }))
    settings = {}
    for item in etl.dicts(merged_settings):
        item['keys'] = {
            'dispensary_id': item['dispensary_id'],
            'id': item['id']
        }

        # if not item['sessionTimeoutDuration'] >= 30:
        #     del item['sessionTimeoutDuration']

        url = None
        if debug and item['image'] is not None:
            url = ("https://wm-mmjmenu-images-development.s3."
                   "amazonaws.com/logos/{0}/original/"
                   "{1}").format(item['id'], item['image'])
        elif item['image'] is not None:
            url = ("https://wm-mmjmenu-images-production.s3."
                   "amazonaws.com/logos/{0}/original/"
                   "{1}").format(item['id'], item['image'])

        item['image'] = url

        # remove any item['keys'] tuples with None values
        for key in item['keys'].keys():
            if not item['keys'][key]:
                del item['keys'][key]
        """
        Member settings nested - crm.member.settings
        """
        if item['pp_enabled']:
            item['crm_member_settings'] = {}
            item['crm_member_settings']['membershipLevel'] = {
                'membershipLevelsEnabled': \
                    utils.true_or_false(item['membershipLevelsEnabled']),
                'levelName': 'Unnamed',
                'dollarsPerPoint': item['dollarsPerPoint'],
                'pointsPerDollar': item['pointsPerDollar'],
                'referralPoints': item['referralPoints']
            }
        """
        Location settings nested.
        """
        if item['apiKey']:
            item['location_specific'] = {'apiKey': item['apiKey']}
        else:
            item['location_specific'] = {}

        item['location_specific']['members'] = {
            'paidVisitsEnabled':
            utils.true_or_false(item['paidVisitsEnabled']),
            'mandatoryReferral': utils.true_or_false(item['mandatoryReferral'])
        }
        item['location_specific']['sales'] = {
            'enableTaxesIn': utils.true_or_false(item['enableTaxesIn']),
            'hasPriceRounding': utils.true_or_false(item['hasPriceRounding'])
        }

        # sales.settings.taxes
        item['sales_settings_taxes'] = {}
        for tax in _get_taxes(item['dispensary_id'], source_db):
            item['sales_settings_taxes']['taxes'] = {
                'code': tax['name'],
                'percent': tax['amount'] / 100,
                'type': 'sales'
            }

        for pricing in etl.dicts(pricing_data):
            item['location_specific']['inventory'] = {}
            item['location_specific']['inventory']['weightPricing'] = {
                'name': 'Default',
                'defaultTier': True
            }
            item['location_specific']['inventory']['weightPricing'][
                'breakpoints'] = {
                    'price_half_gram':
                    utils.dollars_to_cents(pricing['price_half_gram']),
                    'price_gram':
                    utils.dollars_to_cents(pricing['price_gram']),
                    'price_two_gram':
                    utils.dollars_to_cents(pricing['price_two_gram']),
                    'price_eighth':
                    utils.dollars_to_cents(pricing['price_eigth']),
                    'price_quarter':
                    utils.dollars_to_cents(pricing['price_quarter']),
                    'price_half':
                    utils.dollars_to_cents(pricing['price_half']),
                    'price_ounce':
                    utils.dollars_to_cents(pricing['price_ounce']),
                }

        # monthly purchase limit is two week limit x2
        if item['hasLimits'] == 1:
            for limits in _medical_limits(item['dispensary_id'], source_db):
                item['location_specific']['members']['medicalLimits'] = {
                    'hasLimits': True,
                    'dailyPurchaseLimit': int(limits['daily_purchase_limit']),
                    'visitPurchaseLimit': int(limits['visit_purchase_limit']),
                    'dailyVisitLimit': int(limits['daily_visit_limit']),
                    'monthlyPurchaseLimit': \
                        int(limits['two_week_purchase_limit'] * 2)
                }

        if item['image'] is None or item['apiKey'] is None:
            del item['image']
            del item['apiKey']

        # delete fk's
        del item['id']
        del item['dispensary_id']
        del item['membershipLevelsEnabled']
        del item['enableTaxesIn']
        del item['hasLimits']
        del item['hasPriceRounding']
        del item['dollarsPerPoint']
        del item['mandatoryReferral']
        del item['paidVisitsEnabled']
        del item['pointsPerDollar']
        del item['pp_enabled']
        del item['referralPoints']

        # set up final structure for API
        settings.update(item)

    if debug:
        result = json.dumps(settings,
                            sort_keys=True,
                            indent=4,
                            default=utils.json_serial)
        print(result)
    return settings
Exemplo n.º 11
0
table2


# merge()
#########

import petl as etl
table1 = [['foo', 'bar', 'baz'],
          [1, 'A', True],
          [2, 'B', None],
          [4, 'C', True]]
table2 = [['bar', 'baz', 'quux'],
          ['A', True, 42.0],
          ['B', False, 79.3],
          ['C', False, 12.4]]
table3 = etl.merge(table1, table2, key='bar')
table3


# fold()
########

import petl as etl
table1 = [['id', 'count'], 
          [1, 3], 
          [1, 5],
          [2, 4], 
          [2, 8]]        
import operator
table2 = etl.fold(table1, 'id', operator.add, 'count',
                  presorted=True)