def main_loop(): # requested query Service = request.query.Service #Converting the Service value to String Service = str(Service) csv = petl.fromcsv(file) response.headers['Content-type'] = 'application/json' response.headers['Access-Control-Allow-Origin'] = '*' for i in csv: if Service == i[1]: # select the data according to the given requested query dataSelect = petl.select(csv, "{Service} == '" + Service + "'") # cutting out the required column names jsonData = petl.cut(dataSelect, 'ClinicID', 'Suburb', 'Lat', 'Lon') # convert the dictionary data into json data jsonData = json.JSONEncoder().encode(list(petl.dicts(jsonData))) # return the json data return jsonData # this is requested link of getting all the distinct list of clinics offering any service. if Service == "0": anyServices = petl.unique(csv, key='Name') jsonData = petl.cut(anyServices, 'ClinicID', 'Suburb', 'Lat', 'Lon') jsonData = json.JSONEncoder().encode(list(petl.dicts(jsonData))) return jsonData else: jsonData = json.JSONEncoder().encode('Please Enter a Service.') return jsonData
def main_loop(): # requested query inputServiceID = request.query.serviceid csv = pt.fromcsv('clinicservicelocations.csv') response.headers['Content-type'] = 'application/json' response.headers['Access-Control-Allow-Origin'] = '*' for i in csv: if inputServiceID == i[0]: # select the data according to the given requested query dataSelect = pt.select( csv, "{ServiceID} == '" + str(inputServiceID) + "'") # cutting out the required column names jsonData = pt.cut(dataSelect, 'Name', 'Service', 'Suburb', 'State', 'Email', 'Lat', 'Lon') # convert the dictionary data into json data jsonData = json.JSONEncoder().encode(list(pt.dicts(jsonData))) # return the json data return jsonData # this is requested link of getting all the distinct list of # clinics offering any service. if inputServiceID == "0": anyServices = pt.unique(csv, key='Name') jsonData = pt.cut(anyServices, 'Name', 'Service', 'Suburb', 'State', 'Email', 'Lat', 'Lon') jsonData = json.JSONEncoder().encode(list(pt.dicts(jsonData))) return jsonData else: jsonData = json.JSONEncoder().encode('Unable to find this id.') return jsonData
def analyze(season_bins, game_bins, season_param, game_param, position): # extract bucketed season / game performance gbs = game_buckets(game_param, game_bins, position) sbs = season_buckets(season_param, season_bins, position) # create probability distribution tables for seasons / games s_bucket_probs = bucket_dist(sbs) sg_bucket_probs = season_game_bucket_combo_probs(sbs, gbs) # generate likelihoods and prior s_bucket_dicts = list(petl.dicts(s_bucket_probs)) sg_bucket_dicts = list(petl.dicts(sg_bucket_probs)) s_buckets = sorted(set(s['bucket'] for s in s_bucket_dicts)) likelihoods = { sb: bayes.Pdf({ gb['g_bucket']: gb['prob'] for gb in sg_bucket_dicts if gb['s_bucket'] == sb }) for sb in s_buckets } prior = {s['bucket']: s['prob'] for s in s_bucket_dicts} # get appropriately-formed data data = data_dicts(sbs, gbs, game_param) # run analysis on data runs = conduct_runs(data, likelihoods, prior) # conduct rmse analysis overall and by bucket rmse_results = dict( overall=rmse_analysis(runs), by_bucket=dict(), ) for sb in range(season_bins): tmp_runs = [r for r in runs if r['data']['season']['bucket'] == sb] rmse_results['by_bucket'][sb] = rmse_analysis(tmp_runs) # return all data! return dict( runs=runs, likelihoods={k:v.probs for k,v in likelihoods.items()}, prior=prior, rmse_analysis=rmse_results, )
def dicts(self, sql): try: if isinstance(sql, str): sql = execute(sql, self) return etl.dicts(sql) except SQLError as e: print(e)
def drained_entries(ctx: typer.Context, issues, entries, project): config = ctx.meta['config'] empty_entries, unset_entries = petl.biselect( entries, lambda row: row['issue_id'] is None) drain_issues = list( petl.dicts( transform.select_drain_issues( issues, assignee_id=ctx.meta['rdm_user']['id'], drain_cf_id=get_proj_attr(config, project, 'rdm_drain_cf_id')))) if not len(drain_issues): log.error('No drain issues found') return petl.head(unset_entries, 0), entries if len(drain_issues) > 1: log.warning( f'Found {len(drain_issues)} drain issues. Will use only first one') drain_issue = drain_issues[0] drained = petl.addfield(petl.cutout(empty_entries, 'issue_id'), 'issue_id', drain_issue['id']) return drained, unset_entries
def anyServices(): # requested query Postcode = request.query.Postcode #Converting the Service value to String Postcode = str(Postcode) # reading the csv file csv = petl.fromcsv(file) # json content type declaration response.headers['Content-type'] = 'application/json' response.headers['Access-Control-Allow-Origin'] = '*' for i in csv: if Postcode == i[4]: # select the data according to the given requested query dataSelect = petl.select(csv, "{Postcode} == '" + Postcode + "'") # cutting out the required column names jsonData = petl.cut(dataSelect, 'Service', 'Suburb') # convert the dictionary data into json data jsonData = json.JSONEncoder().encode(list(petl.dicts(jsonData))) # return the json data return jsonData else: jsonData = json.JSONEncoder().encode('Unable to find this Service.') return jsonData
def _get_record_identifiers(self, cfg): p = cfg.get('record_identifiers') if not (p and os.path.isfile(p)): p = os.path.join(cfg['root'], 'record_ids.json') table = petl.fromjson(p) return petl.dicts(table)
def sustain_summary(self): success, result = self.clip_and_dissolve_esri_feature_layer( feature_layer_query_url=RAINWAYS_RESOURCES['sustain'], feature_layer_fields=['GI_Type'], clipping_mask_gdf=self.aoi_gdf, clipping_mask_bbox=self.aoi_bbox, in_epsg_code=self.aoi_gdf_epsg) if success: # post-process the dataframe; add additional field t = etl\ .fromdataframe(result)\ .cutout('geometry')\ .addfield('area_acres', lambda r: r['area'] * 0.00002295682) # convert to list of dictionaries results = list(etl.dicts(t)) self.results.sustain.extend(results) return results else: self.status = 'failed' messages = result['error']['details'] + result['message'] self.messages.extend(messages) return None
def upsert_many(table, rows, keys, ensure=None, types=None): if isinstance(rows, Table): rows = etl.dicts(rows) elif isinstance(rows, DataFrame): rows = rows.to_dict(orient='records') for row in rows: table.upsert(row, keys, ensure, types)
def update_user_logins(data, url, api_key, source, account_id_field, login_id_field, unique_id_field, password_field, sis_user_id_field, integration_id_field, results_table): if not url.startswith('http') or not url.startswith('https'): url = 'https://' + url client = LoginsAPI(url, api_key) source = data.get(source) debug = data.config.debug results = [] for row in petl.dicts(source): account_id = row[account_id_field] login_id = row[login_id_field] kwargs = {} if unique_id_field is not None and row[unique_id_field] is not None: kwargs['login_unique_id'] = row[unique_id_field] if password_field is not None and row[password_field] is not None: kwargs['login_password'] = row[password_field] if sis_user_id_field is not None and row[sis_user_id_field] is not None: kwargs['login_sis_user_id'] = row[sis_user_id_field] if integration_id_field is not None and row[ integration_id_field] is not None: kwargs['login_integration_id'] = row[integration_id_field] try: r = client.edit_user_login(login_id, account_id, **kwargs) unsync.secho('Successfully updated login: {} with data: {}'.format( login_id, str(kwargs)), fg='green') if results_table: row['_data'] = str(kwargs) row['_response_status'] = r row['_response_content'] = r results.append(row) if debug: unsync.secho(str(r), fg='yellow') except (CanvasAPIError) as e: unsync.secho('Failed updating login: {} with data: {}'.format( login_id, str(kwargs)), fg='red') unsync.secho('Response Status: {} Response Reason: {}'.format( e.response.status_code, e.response.content), fg='red') if results_table: row['_data'] = str(kwargs) row['_response_status'] = e.response.status_code row['_response_content'] = e.response.content results.append(row) results = petl.fromdicts(results) data.cat(results_table, results)
def precip_table_etl_noaa( precip_table, rainfall_adjustment=1, frequency_min=1, frequency_max=1000, conversion_factor=2.54, desc_field="by duration for ARI (years):", duration_val="24-hr:" ): """ Extract, Transform, and Load data from a NOAA PRECIPITATION FREQUENCY ESTIMATES matrix (in a csv) into an array used by the runoff calculator. Required Inputs: - precip_table: NOAA PRECIPITATION FREQUENCY ESTIMATES csv, in inches. Optional Inputs: - rainfall_adjustment: multipler to adjust for future rainfall conditions. defaults to 1. - frequency_min: the min. annual frequency to be returned. Default: 1 - frequency_max: the max. annual frequency to be returned. Default: 1000 - conversion_factor: apply to rainfall values. Default: 2.54 (convert inches to centimeters). - desc_field: exact field name from NOAA table in first column. Defaults to "by duration for ARI (years):". Used for selecting data. - duration_val: exact row value in the desc_field from NOAA table that contains the duration of interest. Defaults to "24-hr:". Used for selecting data. Outputs: - precip_array: 1D array containing 24-hour duration estimate for frequencies 1,2,5,10,25,50,100,200,500,and 1000 year storm events """ # load the csv table, skip the file header information, extract rows we need t1 = etl\ .fromcsv(precip_table)\ .skip(13)\ .rowslice(0,19) # grab raw data from the row containing the x-hour duration event info t2 = etl\ .select(t1, desc_field, lambda v: v == duration_val)\ .cutout(desc_field) # generate a new header with only columns within frequency min/max h = tuple( i for i in list(etl.header(t2)) if (int(i) >= frequency_min and int(i) <= frequency_max) ) # for events within freq range, convert to cm, adjust for future rainfall t3 = etl\ .cut(t2, h)\ .convertall(lambda v: round(float(v) * conversion_factor * rainfall_adjustment, 2)) # convert to a 1D array (values cast to floats) precips = list(etl.data(t3)[0]) # also convert to a dictionary, for lookup by event precips_lookup = list(etl.dicts(t3))[0] # return 1D array and dictionary return precips, precips_lookup
def to_dicts(self): """ Output table as a list of dicts. `Returns:` list """ return list(petl.dicts(self.table))
def _medical_limits(id, source_db): """ get the member limits """ sql = ("SELECT dispensary_id, daily_purchase_limit, visit_purchase_limit, " "daily_visit_limit, two_week_purchase_limit " "FROM red_flags " "WHERE dispensary_id={0}").format(id) data = etl.fromdb(source_db, sql) limits = etl.select(data, lambda rec: rec.dispensary_id == id) return etl.dicts(limits)
def anyServices(): # reading the csv file csv = pt.fromcsv('services.csv') # json content type declaration response.headers['Content-type'] = 'application/json' response.headers['Access-Control-Allow-Origin'] = '*' # cutting out the required column names jsonData = pt.cut(csv, 'ServiceID', 'Service') # convert the dictionary data into json data jsonData = json.JSONEncoder().encode(list(pt.dicts(jsonData))) # returning the json data return jsonData
def __getitem__(self, index): self._index_count += 1 if self._index_count >= DIRECT_INDEX_WARNING_COUNT: logger.warning(""" You have indexed directly into this Table multiple times. This can be inefficient, as data transformations you've made will be computed _each time_ you index into the Table. If you are accessing many rows of data, consider switching to this style of iteration, which is much more efficient: `for row in table:` """) return petl.dicts(self.table)[index]
def _get_taxes(id, source_db): """ get the dispensary taxes settings for each dispensary_id """ sql = ("SELECT DISTINCT dispensary_id, amount, name " "FROM taxes " "WHERE dispensary_id={0}").format(id) data = etl.fromdb(source_db, sql) try: lookup_taxes = etl.select(data, lambda rec: rec.dispensary_id == id) return etl.dicts(lookup_taxes) except KeyError: return 0
def data_dicts(sbs, gbs, game_param): season_dicts = petl.dicts(sbs) game_dicts = petl.dicts(gbs) data = [] for s in season_dicts: gd = [ { k:v for k,v in x.items() if k in [game_param, 'week', 'bucket'] } for x in game_dicts if x['year'] == s['year'] and x['name'] == s['name'] ] data.append(dict( season=s, games=sorted(gd, key=lambda r: r['week']), )) return data
def etl(self, record_id): for model in self.__models__: location_table = self.extract(model, record_id) nrows = petl.nrows(location_table) if nrows == 1: record = petl.dicts(location_table)[0] if self._has_observations(record): self._added = True location_id = self._post_location(record, model) thing_id = self._post_thing(record, model, location_id) self.add_package(record) self.observation.etl(tids=self._make_tids(thing_id, record), models=(model,)) else: print(f'multipe records found for given record_id. Skipping {record_id}')
def dictFromTable(): insights = {} #number of applications filed insights['Building Permits Filed'] = 0 #number of commercial in the works insights['Commercial Project Count'] = 0 #number of residential in the works insights['Residential Project Count'] = 0 listings = [] # print "in here " years = ['2016'] quarters = ['Q1', 'Q2', 'Q3', 'Q4'] tableList = openFileTable(years, quarters) for table in tableList: #transforms into dictionary d = etl.dicts(table) # print etl.look(table) #list of dictionaries b = list(d) for i in range(0, len(b)): #print b[i] if 'BP FILED' == b[i]['BESTSTAT']: insights['Building Permits Filed'] = insights[ 'Building Permits Filed'] + 1 if 'Resident' == b[i]['PROJECT_TYPE']: insights['Residential Project Count'] = insights[ 'Residential Project Count'] + 1 if 'Mixed' == b[i]['PROJECT_TYPE']: insights['Commercial Project Count'] = insights[ 'Commercial Project Count'] + 1 listings.append(b[i]) listings.insert(0, insights) return listings
def process_animal_extended(shelter_id, session, input_directory): table = petl.fromxls(os.path.join(input_directory, 'AnimalIntakeExtended.xls'), sheet='AnimalIntakeExtended') ## Because an animal can appear in the intake report more than once, ## we must sort the table in order to upsert the latest value table_sorted = petl.sort(table, key='Intake Date/Time') for row in petl.dicts(table_sorted): id = row['Animal ID'] set_values = { 'arn': normalize_string(row['ARN']), 'name': normalize_string(row['Animal Name']), 'species': normalize_string(row['Species']), 'primary_breed': normalize_string(row['Primary Breed']), 'secondary_bred': normalize_string(row['Secondary Breed']), 'gender': normalize_string(row['Gender']), 'pre_altered': to_bool(row['Pre Altered']), 'altered': to_bool(row['Altered']), 'primary_color': normalize_string(row['Primary Colour']), 'secondary_color': normalize_string(row['Secondary Colour']), 'third_color': normalize_string(row['Third Colour']), 'color_pattern': normalize_string(row['Colour Pattern']), 'second_color_pattern': normalize_string(row['Second Colour Pattern']), 'size': normalize_string(row['Size']) } insert_stmt = insert(Animal)\ .values( id=id, shelter_id=shelter_id, ## TODO: add to index for constraint? make composite pk? **set_values)\ .on_conflict_do_update( constraint='animals_pkey', set_={ 'shelter_id': shelter_id, **set_values, 'updated_at': func.now() }) session.execute(insert_stmt) session.commit()
def _add_observations(self, datastream_id, records, model): if not isinstance(records, list): records = petl.dicts(records) for wti in tqdm(records): t = self._timestamp_extract(wti[model.timestamp_column]) t = MT_TIMEZONE.localize(t) v = wti[model.mapped_column] if v is not None: payload = { 'phenomenonTime': t.isoformat(timespec='milliseconds'), 'resultTime': t.isoformat(timespec='milliseconds'), 'result': v, 'Datastream': make_id(datastream_id) } self._post_item(f'Observations', payload)
def row_data(self, row_index): """ Returns a row in table `Args:` row_index: int `Returns:` dict A dictionary of the row with the column as the key and the cell as the value. """ self._index_count += 1 if self._index_count >= DIRECT_INDEX_WARNING_COUNT: logger.warning(""" You have indexed directly into this Table multiple times. This can be inefficient, as data transformations you've made will be computed _each time_ you index into the Table. If you are accessing many rows of data, consider switching to this style of iteration, which is much more efficient: `for row in table:` """) return petl.dicts(self.table)[row_index]
def test_dicts_shortrows(): table = (('foo', 'bar'), ('a', 1), ('b', )) actual = dicts(table) expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': None}) ieq(expect, actual)
def test_dicts(): table = (("foo", "bar"), ("a", 1), ("b", 2)) actual = dicts(table) expect = ({"foo": "a", "bar": 1}, {"foo": "b", "bar": 2}) ieq(expect, actual)
def test_dicts(): table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = dicts(table) expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': 2}) ieq(expect, actual)
def collect_import_objects(self): """ Query and put together all QA objects which will be imported. """ def get_tables(mappings): """ Recursively collect all WOCAT tables of the mappings. Args: mappings: list. Returns: list. A list of tables. """ tables = [] for mapping in mappings: table = mapping.get('wocat_table') if table: tables.append(table) tables.extend(get_tables(mapping.get('mapping', []))) tables.extend(get_tables(mapping.get('conditions', []))) return tables self.output('Fetching data from WOCAT QA database.', v=1) # Extend the default tables by adding the ones from the mapping. tables = self.default_tables for qg_properties in self.mapping.values(): questions = qg_properties.get('questions', {}) for q_properties in questions.values(): tables.extend(get_tables(q_properties.get('mapping', []))) # Remove duplicates tables = list(set(tables)) # Try to query the lookup table and collect its values. try: lookup_query = """ SELECT * FROM {schema}.{table_name}; """.format(schema=self.schema, table_name=self.lookup_table_name) lookup_table = {} for row in petl.dicts(petl.fromdb(self.connection, lookup_query)): lookup_table[row.get('id')] = row except AttributeError: lookup_table = {} # So far, lookup_text is never used. Therefore it can be left empty. lookup_table_text = {} # Try to query file infos try: lookup_query_files = """ SELECT * FROM {schema}.{table_name}; """.format(schema=self.schema, table_name=self.file_info_table) file_infos = {} for row in petl.dicts( petl.fromdb(self.connection, lookup_query_files)): file_infos[row.get('blob_id')] = row except AttributeError: file_infos = {} for table_name in tables: query = 'SELECT {columns} FROM {schema}.{table_name};'.format( columns='*', schema=self.schema, table_name=table_name) queried_table = petl.fromdb(self.connection, query) row_errors = False for row in petl.dicts(queried_table): if row_errors is True: continue # Inconsistent naming throughout the tables questionnaire_identifier = self.questionnaire_identifier if table_name == 'approach': questionnaire_identifier = 'id' elif table_name == 'qa_quality_review': questionnaire_identifier = 'qa_id' identifier = row.get(questionnaire_identifier) if identifier is None: self.output('No identifier found for table "{}".'.format( table_name), v=1, l='error') row_errors = True if identifier in self.import_objects_exclude: continue import_object = self.get_import_object(identifier) if import_object is None: import_object = QAImportObject( identifier, self.command_options, lookup_table, lookup_table_text, file_infos, self.image_url) import_object.add_custom_mapping_messages( self.custom_mapping_messages) self.import_objects.append(import_object) # Set the code if it is available in the current table code = row.get(self.questionnaire_code) if code: import_object.set_code(code) # The main contributor is the compiler compiler_id = row.get(self.questionnaire_owner) if compiler_id: # If the main contributer is "Not registered" (ID 661), use # the default compiler if compiler_id == 661: compiler_id = self.default_compiler_id import_object.add_mapping_message( 'Using "Unknown User" as compiler in QCAT as main ' 'contributor in QA was "Not registered"') # The following QAs have a main contributor which is not # available through the API call. Set the default user and # add a mapping message. elif identifier in [131, 128, 89, 47, 106, 82, 195, 212, 76, 107, 84, 139, 130, 276, 72, 147, 138, 43, 44, 46, 49, 50, 52, 57, 173, 171, 170, 166, 125, 78, 102, 45, 197, 48]: compiler_id = self.default_compiler_id import_object.add_mapping_message( 'The compiler needs to be set manually. Use the ' 'main contributor of QA.') import_object.set_owner(compiler_id) # Use the creation date available on the approach table created = row.get('date') if created and table_name == 'approach': creation_time = datetime.strptime( created, WOCAT_DATE_FORMAT) import_object.created = timezone.make_aware( creation_time, timezone.get_current_timezone()) import_object.add_wocat_data(table_name, row)
etl.header(table) # data() ######## import petl as etl table = [['foo', 'bar'], ['a', 1], ['b', 2]] d = etl.data(table) list(d) # dicts() ######### import petl as etl table = [['foo', 'bar'], ['a', 1], ['b', 2]] d = etl.dicts(table) d list(d) # namedtuples() ############### import petl as etl table = [['foo', 'bar'], ['a', 1], ['b', 2]] d = etl.namedtuples(table) d list(d) # records() ###############
def test_dicts_shortrows(): table = (("foo", "bar"), ("a", 1), ("b",)) actual = dicts(table) expect = ({"foo": "a", "bar": 1}, {"foo": "b", "bar": None}) ieq(expect, actual)
def __iter__(self): return iter(petl.dicts(self.table))
def test_dicts_shortrows(): table = (('foo', 'bar'), ('a', 1), ('b',)) actual = dicts(table) expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': None}) ieq(expect, actual)
######## import petl as etl table = [["foo", "bar"], ["a", 1], ["b", 2]] d = etl.data(table) list(d) # dicts() ######### import petl as etl table = [["foo", "bar"], ["a", 1], ["b", 2]] d = etl.dicts(table) d list(d) # namedtuples() ############### import petl as etl table = [["foo", "bar"], ["a", 1], ["b", 2]] d = etl.namedtuples(table) d list(d)
def transform_and_aggregate_datetimes(query_results, rollup): """transform datetime to the correct TZ; aggregate the values in the query results based on the datetime rollup args. Aggregation is performed for: * hourly or daily time intervals * total NOTE: in order to handle potential No-Data values in the DB during aggregation, we convert them to 0. The `src` field then indicates if any values in the rollup were N/D. Then, if the value field in the aggregated row still shows 0 after summation, *and* the src field shows N/D, we turn that zero into None. If there was a partial reading (e.g., the sensor has values for the first half hour but N/D for the second, and we are doing an hourly rollup), then the values will stay there, but the source field will indicate both N/D and whatever the source was for the workable sensor values. TODO: move this work over to the database query """ t1 = etl\ .fromdicts(query_results)\ .convert('xts', lambda v: v.astimezone(TZ).isoformat(), failonerror=True) #.rename('xts', 'ts') # print("t1") # print(t1) # print("rollup", rollup) if rollup in [INTERVAL_DAILY, INTERVAL_HOURLY]: petl_aggs = OrderedDict( val=('val', _sumround), # sum the rainfall vales src=('src', _listset) # create a list of all rainfall sources included in the rollup ) t2 = etl\ .convert( t1, 'xts', lambda v: _rollup_date(v, rollup), # convert datetimes to their rolled-up value in iso-format failonerror=True )\ .convert( 'val', lambda v: 0 if v is None else v, # convert rainfall values to 0 if no-data failonerror=True )\ .aggregate( ('xts', 'sid'), petl_aggs # aggregate rainfall values (sum) and sources (list) for each timestamp+ID combo, )\ .convert( 'val', lambda v, r: None if ('N/D' in r.src and v == 0) else v, # replace 0 values with no data if aggregated source says its N/D pass_row=True, failonerror=True )\ .sort('sid') # .convert( # 'xts', # lambda v: TZ.localize(parse(v)).isoformat(), # convert that datetime to iso format w/ timezone # failonerror=True # ) # print("t2 time rollup") elif rollup in [INTERVAL_SUM]: petl_aggs = OrderedDict( val=('val', _sumround), # sum the rainfall vales src=('src', _listset), # create a list of all rainfall sources included in the rollup xts=('xts', _minmax) # create a iso datetime range string from the min and max datetimes found ) t2 = etl\ .aggregate( t1, 'sid', petl_aggs # aggregate rainfall values (sum) and sources (list), and datetimes (str) for each ID, )\ .convert( 'val', lambda v, r: None if ('N/D' in r.src and v == 0) else v, # replace 0 values with no data if aggregated source says its N/D pass_row=True )\ .sort('sid')\ # print("t2 sum") else: t2 = t1 # print("t2 = t1") # print(t2) # h = etl.header(t2) # rename the timestamp and sensor id fields, # print("t2 header:", list(etl.header(t2))) # rename_kw = {} # for h1, h0 in [('xts', 'ts'), ('sid', 'id')]: # if h1 in h: # rename_kw[h1] = h0 # if len(rename_kw.items()) > 0: # {'xts':'ts', 'sid':'id'} t3 = etl.rename(t2, {'xts':'ts', 'sid':'id'}, strict=False) # else: # t3 = t2 # print("t3") # print(t3) # convert to list of dicts and return return list(etl.dicts(t3))
def transform(mmj_menu_items, mmj_categories, prices, organization_id, source_db, debug): """ Transform data """ # source data table source_dt = utils.view_to_list(mmj_menu_items) cut_menu_data = [ 'id', 'vendor_id', 'menu_id', 'dispensary_id', 'strain_id', 'created_at', 'updated_at', 'category_id', 'name', 'sativa', 'indica', 'on_hold', 'product_type', 'image_file_name', 'medicine_amount', 'product_type' ] cut_prices = [ 'menu_item_id', 'dispensary_id', 'price_half_gram', 'price_gram', 'price_two_gram', 'price_eigth', 'price_quarter', 'price_half', 'price_ounce' ] # Cut out all the fields we don't need to load menu_items = etl.cut(source_dt, cut_menu_data) prices_data = etl.cut(prices, cut_prices) menu_items = (etl.addfield( menu_items, 'createdAtEpoch').addfield('unitOfMeasure').addfield( 'locationProductDetails').addfield('keys').addfield('restockLevel') ) # Two-step transform and cut. First we need to cut the name # and id from the source data to map to. cut_source_cats = etl.cut(mmj_categories, 'name', 'id', 'measurement') source_values = etl.values(cut_source_cats, 'name', 'id') # Then we nede a dict of categories to compare against. # id is stored to match against when transforming and mapping categories mmj_categories = dict([(value, id) for (value, id) in source_values]) mappings = OrderedDict() mappings['id'] = 'id' mappings['createdAt'] = 'created_at' mappings['updatedAt'] = 'updated_at' mappings['createdAtEpoch'] = lambda x: utils.create_epoch(x.created_at) mappings['name'] = 'name' mappings['shareOnWM'] = lambda x: _wm_integration(x.id, source_db) """ 1 = Units 2 = Grams (weight) """ mappings['unitOfMeasure'] = \ lambda x: _map_uom(x.category_id, source_db) fields = etl.fieldmap(menu_items, mappings) data = etl.merge(menu_items, fields, key='id') items = [] for item in etl.dicts(data): breakpoint_pricing = (etl.select( prices_data, lambda x: x.dispensary_id == item['dispensary_id']).rename({ 'price_eigth': 'price_eighth' }).cutout('menu_item_id')) # Set image url for load to download url = None if debug and item['image_file_name'] is not None: url = ("https://wm-mmjmenu-images-development.s3." "amazonaws.com/menu_items/images/{0}/large/" "{1}").format(item['id'], item['image_file_name']) elif item['image_file_name'] is not None: url = ("https://wm-mmjmenu-images-production.s3." "amazonaws.com/menu_items/images/{0}/large/" "{1}").format(item['id'], item['image_file_name']) item['image_file_name'] = url item['categoryId'] = _map_categories(item['category_id'], item['sativa'], item['indica'], mmj_categories, menu_items) item['keys'] = { 'dispensary_id': item['dispensary_id'], 'id': item['id'], 'menu_id': item['menu_id'], 'vendor_id': item['vendor_id'], 'strain_id': item['strain_id'], 'category_id': item['category_id'] } # set a default netMJ value if the menu item is a unit product if item['unitOfMeasure'] is 2: item['netMarijuana'] = int(item['medicine_amount']) for key in item['keys'].keys(): if not item['keys'][key]: del item['keys'][key] item['locationProductDetails'] = { 'id': item['id'], 'active': _active(item['on_hold']) } item['restockLevel'] = _restock_level(item['dispensary_id'], item['product_type'], source_db) if item['shareOnWM'] is None: item['shareOnWM'] = False for price in etl.dicts(breakpoint_pricing): try: price_two_gram = price['price_two_gram'] except KeyError: price_two_gram = 0.0 item['locationProductDetails']['weightPricing'] = { 'price_half_gram': utils.dollars_to_cents(price['price_half_gram']), 'price_two_gram': utils.dollars_to_cents(price_two_gram), 'price_gram': utils.dollars_to_cents(price['price_gram']), 'price_eighth': utils.dollars_to_cents(price['price_eighth']), 'price_quarter': utils.dollars_to_cents(price['price_quarter']), 'price_half': utils.dollars_to_cents(price['price_half']), 'price_ounce': utils.dollars_to_cents(price['price_ounce']) } del item['vendor_id'] del item['indica'] del item['dispensary_id'] del item['id'] del item['strain_id'] del item['on_hold'] del item['menu_id'] del item['sativa'] del item['category_id'] del item['updated_at'] del item['created_at'] del item['product_type'] if item['image_file_name'] is None: del item['image_file_name'] # set up final structure for API items.append(item) # Remove inactive items for item in items: if item['locationProductDetails']['active'] is False: items.remove(item) if debug: result = json.dumps(items, sort_keys=True, indent=4, default=utils.json_serial) print(result) return items
def transform(source_data, organization_id, debug): """ Load the transformed data into the destination(s) """ # source data table source_dt = utils.view_to_list(source_data) cut_data = [ 'id', 'dispensary_id', 'picture_file_name', 'name', 'email', 'address', 'phone_number', 'dob', 'license_type', 'registry_no', 'membership_id', 'given_caregivership', 'tax_exempt', 'drivers_license_no', 'points', 'locked_visits', 'locked_visits_reason', 'caregiver_id', 'picture_file_name', 'card_expires_at', 'created_at', 'updated_at', 'physician_id', 'custom_membership_id', 'organization_membership_id', 'city', 'state', 'zip_code', 'address', 'organization_id' ] member_data = etl.cut(source_dt, cut_data) members = ( etl .addfield(member_data, 'identificationType') .addfield('createdAtEpoch') ) member_mapping = OrderedDict() member_mapping['id'] = 'id' member_mapping['caregiver_id'] = 'caregiver_id' member_mapping['dispensary_id'] = 'dispensary_id' member_mapping['physician_id'] = 'physician_id' member_mapping['custom_membership_id'] = 'custom_membership_id' member_mapping['organization_membership_id'] = 'organization_membership_id' member_mapping['picture_file_name'] = 'picture_file_name' member_mapping['dateOfBirth'] = 'dob' member_mapping['name'] = 'name' member_mapping['phone_number'] = 'phone_number' member_mapping['email'] = 'email' member_mapping['organization_id'] = 'organization_id' # MEDICAL 1, RECREATIONAL 2 member_mapping['memberType'] = \ lambda m: 'MEDICAL' if m.license_type == 1 else 'RECREATIONAL' member_mapping['mmjCard'] = 'registry_no' member_mapping['isCaregiver'] = \ lambda x: utils.true_or_false(x.given_caregivership) member_mapping['identificationNumber'] = 'drivers_license_no' member_mapping['points'] = 'points' member_mapping['card_expires_at'] = 'card_expires_at' member_mapping['taxExempt'] = lambda x: utils.true_or_false(x.tax_exempt) member_mapping['locked_visits'] = 'locked_visits' member_mapping['locked_visits_reason'] = 'accountStatusNotes' member_mapping['address'] = 'address' member_mapping['city'] = 'city' member_mapping['zip_code'] = 'zip_code' member_mapping['state'] = 'state' member_mapping['createdAt'] = 'created_at' member_mapping['updatedAt'] = 'updated_at' member_mapping['accountStatus'] = \ lambda x: utils.account_status(x.locked_visits) member_fields = etl.fieldmap(members, member_mapping) members = [] for item in etl.dicts(member_fields): item['keys'] = { 'id': item['id'], 'caregiver_id': item['caregiver_id'], 'dispensary_id': item['dispensary_id'], 'physician_id': item['physician_id'], 'custom_membership_id': item['custom_membership_id'], 'organization_membership_id': item['organization_membership_id'], 'picture_file_name': item['picture_file_name'], 'organization_id': item['organization_id'], } if item['card_expires_at'] is not None: item['expiryDate'] = item['card_expires_at'] # remove any item['keys'] tuples with None values for key in item['keys'].keys(): if not item['keys'][key]: del item['keys'][key] # set up final structure for API item['identificationType'] = 'Drivers License' # We may not need this in the data item['address'] = [{ 'line1': item['address'], 'city': item['city'], 'state': item['state'], 'zip': item['zip_code'], }] # replace None value dobs with the epoch beginning of time if not item['dateOfBirth']: dob = time.strftime('%Y-%m-%d %H:%M:%S.000Z', time.gmtime(0)) item['dateOfBirth'] = datetime.strptime(dob, '%Y-%m-%d %H:%M:%S.000Z') #del item['address'] del item['city'] del item['zip_code'] del item['state'] del item['dispensary_id'] del item['id'] del item['physician_id'] del item['caregiver_id'] del item['custom_membership_id'] del item['organization_membership_id'] del item['organization_id'] del item['picture_file_name'] del item['locked_visits_reason'] del item['locked_visits'] del item['card_expires_at'] members.append(item) if debug: result = json.dumps(members, sort_keys=True, indent=4, default=utils.json_serial) print(result) return members
"""Return a list of valid NUTS codes.""" with open(GEOCODES_FILE) as stream: lines = csv.DictReader(stream) geocodes = [] for i, line in enumerate(lines): # The first line has an empty NUTS-code if i > 0: geocode = line['NUTS-Code'] geocodes.append(geocode) logging.debug('Loaded %d NUTS geocodes', len(geocodes)) return tuple(geocodes) GEOCODES = list(dicts(fromcsv(GEOCODES_FILE))) def get_all_codelists(): """Return all codelists as a dictionary of dictionaries.""" codelists = {} for codelist_file in os.listdir(CODELISTS_DIR): codelist_name, _ = os.path.splitext(codelist_file) codelist = get_codelist(codelist_name) codelists.update({codelist_name: codelist}) return codelists
def sales_summary(start_dt=None, end_dt=None): """tally up gross (sale over list) profits TODO: tally up net profites (gross profit vs inventory purchase total) TODO: Keyword Arguments: start_dt {[type]} -- datetime for start of query (default: {None}) end_dt {[type]} -- datetime for start of query [description] (default: {None}) Returns: [dict] -- various types of sales information, stored in a dictionary. """ # products = db.session.query(Product).all() # sales = db.session.query(Sale).all() # retrieve existing tables products_records = etl.fromdb(db.engine, 'SELECT * FROM product') sales_records = etl.fromdb(db.engine, 'SELECT * FROM sale') # join product info to sales data sales_data = etl.join(sales_records, products_records, lkey='product_id', rkey='id') # prep joined sales data for tabulation sales_data = etl.convert(sales_data, 'date', lambda dt: format_date(dt)) sales_data = etl.sort(sales_data, 'date') sales_data = etl.convert(sales_data, 'quantity', lambda q: handle_none(q, replace_with=1)) sales_data = etl.addfield(sales_data, 'profit', lambda rec: calculate_profit(rec)) sales_data = etl.addfield(sales_data, 'gross_sales', lambda rec: calculate_gross_sales(rec)) # summarize data into charting-friendly data structures chart_count = etl.fold(sales_data, 'date', operator.add, 'quantity', presorted=True) chart_count = etl.rename(chart_count, {'key': 'x', 'value': 'y'}) chart_count, chart_count_missing_date = etl.biselect( chart_count, lambda rec: rec.x is not None) # print(chart_count) # etl.lookall(chart_count) chart_gross = etl.fold(sales_data, 'date', operator.add, 'gross_sales', presorted=True) chart_gross = etl.rename(chart_gross, {'key': 'x', 'value': 'y'}) chart_gross, chart_gross_missing_date = etl.biselect( chart_gross, lambda rec: rec.x is not None) # print(chart_gross) # etl.lookall(chart_gross) chart_profit = etl.fold(sales_data, 'date', operator.add, 'profit', presorted=True) chart_profit = etl.rename(chart_profit, {'key': 'x', 'value': 'y'}) chart_profit, chart_profit_missing_date = etl.biselect( chart_profit, lambda rec: rec.x is not None) # tabulate some figures gross_sales = 0 profits = 0 for sale in etl.dicts(sales_data): profits += calculate_profit(sale) gross_sales += calculate_gross_sales(sale) # for i in etl.dicts(chart_count): # print(i) # for i in etl.dicts(chart_gross): # print(i) return { 'gross_sales': gross_sales, 'profits': profits, 'chart_gross': list(etl.dicts(chart_gross)), 'chart_gross_missing_date': list(etl.dicts(chart_gross_missing_date)), 'chart_profit': list(etl.dicts(chart_profit)), 'chart_profit_missing_date': list(etl.dicts(chart_profit_missing_date)), 'chart_count': list(etl.dicts(chart_count)), 'chart_count_missing_date': list(etl.dicts(chart_count_missing_date)) }
def __repr__(self): return repr(petl.dicts(self.table))