Пример #1
0
    key='zgv_id',
    attributes=['zgv_dbc_specialisme_code', 'zgv_dbc_zorgvraag_code'],
    size=0,
    prefill=True
)

FCT_SUBTRAJECT = BulkFactTable(
    name='FCT.SUBTRAJECT',
    keyrefs=['beh_id', 'dag_id_begindatum_zorgtraject',
             'dag_id_einddatum_zorgtraject', 'dag_id_begindatum_subtraject',
             'dag_id_einddatum_subtraject', 'dag_id_declaratiedatum',
             'dia_id', 'stn_id', 'zgt_id', 'zgv_id', 'zpr_id',
             'zvs_id_behandelend', 'zvs_id_verwijzend'],
    measures=['geslacht', 'heeft_oranje_zorgactiviteit',
              'heeft_zorgactiviteit_met_machtiging',
              'is_hoofdtraject', 'is_aanspraak_zvw',
              'is_aanspraak_zvw_toegepast',
              'is_zorgactiviteitvertaling_toegepast', 'fct_omzet_ziekenhuis',
              'fct_omzet_honorarium_totaal'],
    nullsubst='',
    fieldsep='\t',
    rowsep='\r\n',
    usefilename=True,
    bulkloader=mssql_bulkloader
)

def load_str_dot(file, config):
    """Method for loading one subtraject file of WOB ZZ DOT

    Main ETL method for WOB ZZ subtrajecten.
    Requires active pygrametl connection as global.
Пример #2
0
    key='product_skey',
    attributes=['product_key', 'product_name', 'category', 'energy',
        'carbohydrates', 'fat', 'protein', 'product_year'],
    lookupatts=['product_key'],
    rowexpander=producthandling)

datedim = CachedDimension(
    name='Date',
    key='date_key',
    attributes=['date', 'day_of_week', 'week_in_year', 'month', 'year', 'weekend'],
    lookupatts=['date'])

facttbl = BulkFactTable(
    name='ProductPrice',
    keyrefs=['date_key', 'pp_key', 'product_key', 'location_key'],
    measures=['price'], # TODO: determine if any other measures should be listed
    bulkloader=pgcopybulkloader,
    bulksize=5000000,
    nullsubst='N/A') # TODO: replace this with a better NULL substitution value

# Data sources

# The buffer size is set to 16384 B, because this performs best, according to
# the pygrametl developers.
data_set = CSVSource(open(DATA_FILE, 'r', 16384),
                        delimiter=',')

gdp_data_set = CSVSource(open(GDP_FILE, 'r', 16384),
                        delimiter=',')
pop_data_set = CSVSource(open(POPULATION_FILE, 'r', 16384),
                        delimiter=',')
Пример #3
0
                                 attributes=['type', 'subtype'])

customer_dim = CachedDimension(
    name='smas_water_customer',
    key='custid',
    lookupatts=['accountno'],
    attributes=['accountno', 'street', 'city', 'province', 'postcode'])

meter_dim = CachedDimension(name='smas_water_meter',
                            key='meterid',
                            lookupatts=['meterno'],
                            attributes=['meterno', 'latitude', 'longitude'])

hourlyreading_fact = BulkFactTable(
    name='smas_water_hourlyreading',
    keyrefs=['typeid', 'custid', 'meterid'],
    measures=['readtime', 'reading', 'temperature'],
    bulkloader=pgcopybulkloader,
    bulksize=500000)

dailyreading_fact = BulkFactTable(
    name='smas_water_dailyreading',
    keyrefs=['typeid', 'custid', 'meterid'],
    measures=['readtime', 'reading', 'temperature'],
    bulkloader=pgcopybulkloader,
    bulksize=500000)

# Data sources - change the path if you have your files somewhere else
#reading = SQLSource(connection,
#			'SELECT meterid,  datavalue, endtime FROM water_meterreading_tmp',
#			names=['meterno', 'reading', 'readtimestamp'])
#
Пример #4
0
                                           attributes=[
                                               "gender", "is_active",
                                               "start_year", "version",
                                               "valid_from", "valid_to"
                                           ],
                                           lookupatts=["member_id"],
                                           versionatt="version",
                                           fromatt="valid_from",
                                           toatt="valid_to",
                                           srcdateatt="lastmoddate",
                                           cachesize=-1)

# TODO: Use BulkFactTable or BatchFactTable
fact_table = BulkFactTable(
    name="fct.sale",
    keyrefs=["fk_product_id", "fk_time_id", "fk_store_id", "fk_member_id"],
    measures=["price"],
    bulkloader=pgcopybulkloader)


### Dimension Filling ###
def fill_product_dimension():
    #id;name;price;active;deactivate_date;quantity;alcohol_content_ml;start_date
    for srcrow in product_source:
        try:
            dimrow = {
                'product_id':
                srcrow['id'],
                'name':
                re.sub('<[^<]+?>', '',
                       srcrow['name']).strip()  # Remove HTML tags
Пример #5
0
DIM_ZORGTYPE = CachedDimension(name='DIM.ZORGTYPE',
                               key='zgt_id',
                               attributes=['zgt_zorgtype_code'],
                               size=0,
                               prefill=True)

FCT_SUBTRAJECT = BulkFactTable(
    name='FCT.SUBTRAJECT',

    # NB: order matters for MS SQL bulk insert!
    keyrefs=[
        'stn_id', 'dag_id_begindatum_zorgtraject',
        'dag_id_einddatum_zorgtraject', 'dag_id_begindatum_subtraject',
        'dag_id_einddatum_subtraject', 'dag_id_diagnose', 'zgt_id',
        'dia_id_primair', 'prg_id', 'psc_id', 'cct_id', 'afs_id', 'lnd_id'
    ],
    measures=[
        'geslacht', 'diagnose_trekken_van', 'fct_verkoopprijs', 'fct_tarief',
        'fct_verrekenbedrag'
    ],
    nullsubst='',
    fieldsep='\t',
    rowsep='\r\n',
    usefilename=True,
    bulkloader=mssql_bulkloader)


def load_subtraject(file, config):
    """Method for loading one subtraject file of WOB ZZ DOT

    Main ETL method for WOB GGZ subtrajecten.
Пример #6
0
                          key='testid',
                          attributes=['testname', 'testauthor'],
                          lookupatts=['testname'],
                          prefill=True,
                          defaultidvalue=-1)

datedim = CachedDimension(
    name='date',
    key='dateid',
    attributes=['date', 'day', 'month', 'year', 'week', 'weekyear'],
    lookupatts=['date'],
    rowexpander=datehandling)

facttbl = BulkFactTable(name='testresults',
                        keyrefs=['pageid', 'testid', 'dateid'],
                        measures=['errors'],
                        bulkloader=pgcopybulkloader,
                        bulksize=5000000)

# Data sources - change the path if you have your files somewhere else
downloadlog = CSVSource(file('../generator/DownloadLog.csv', 'r', 16384),
                        delimiter='\t')

testresults = CSVSource(file('../generator/TestResults.csv', 'r', 16384),
                        delimiter='\t')

inputdata = MergeJoiningSource(downloadlog, 'localfile', testresults,
                               'localfile')


def main():
ssbsect_algo = CachedDimension(
	name = 'SSBSECT',
	key = 'crn',
	attributes = ['cvemat', 'grupo', 'levl_code', 'coll_code', 'dept_code']
)

#merge tablas que utiliza fk 
ssbsect = SnowflakedDimension(
	[(ssbsect_algo,(scbcrse))]

)

facttabl= BulkFactTable(
	name='aaa',
	keyrefs=['matricula','cvemat','crn'],
	measures=['errors'],
	bulkloader=loader
)


#de donde se baja la info  // cambiar path al de bd
resSGBSTDN = CSVSource(file('./vistaSGBSTDN.csv','r',16384), delimiter='\t')
resSCBCRSE = CSVSource(file('./vistaSCBCRSE.csv','r',16384), delimiter='\t')	    
resSSBSECT = CSVSource(file('./vistaSSBSECT.csv','r',16384), delimiter='\t')

inputdata=MergeJoiningSource(resSCBCRSE, 'cvemat',resSSBSECT, 'cvemat')

def main():
	for row in inputdata:
	        print row
		row['cvemat']=scbcrse.ensure(row,'nommat':'hola')