def test_dataframe(): def dfprint(label, val): sc.colorize('cyan', f'\n{label}') print(val) return None print('Testing dataframe:') a = sc.dataframe(cols=['x', 'y'], data=[[1238, 2], [384, 5], [666, 7]]) dfprint('Create dataframe', a) dfprint('Print out a column', a['x']) dfprint('Print out a row', a[0]) dfprint('Print out an element', a['x', 0]) a[0] = [123, 6] dfprint('Set values for a whole row', a) a['y'] = [8, 5, 0] dfprint('Set values for a whole column', a) a['z'] = [14, 14, 14] dfprint('Add new column', a) a.addcol('m', [14, 15, 16]) dfprint('Alternate way to add new column', a) a.rmcol('z') dfprint('Remove a column', a) a.pop(1) dfprint('Remove a row', a) a.append([555, 2, -1]) dfprint('Append a new row', a) a.insert(1, [660, 3, -1]) dfprint('Insert a new row', a) a.sort() dfprint('Sort by the first column', a) a.sort('y') dfprint('Sort by the second column', a) a.addrow([770, 4, -1]) dfprint('Replace the previous row and sort', a) dfprint('Return the row starting with value "555"', a.findrow(555)) a.rmrow() dfprint('Remove last row', a) a.rmrow(123) dfprint('Remove the row starting with element "123"', a) p = a.pandas() dfprint('Convert to pandas', p) q = p.add(p) dfprint('Do a pandas operation', q) a.pandas(q) dfprint('Convert back', a) a.filtercols(['m', 'x']) dfprint('Filter to columns m and x', a) b = sc.dcp(a) dfprint('Dataframe copying:', a == b) return a
def loaddata(): print('Loading data...') dataurl = 'https://raw.githubusercontent.com/rstudio/shiny-examples/master/120-goog-index/data/trend_data.csv' rawdata = sc.wget(dataurl).splitlines() data = [] for r,rawline in enumerate(rawdata): line = rawline.split(',') if r==0: # Read header cols = line else: # Read data tag = line[0] yearnum = convertdate(line[1], '%Y-%m-%dT%I:%M:%fZ') value = float(line[2]) if r>0 else line[2] data.append([tag, yearnum, value]) df = sc.dataframe(cols=cols, data=data) return df
def common_interventions(region=None, income=None, byplatform=False, max_entries=10, entry=None, label=''): sc.heading('Top interventions figure') if not byplatform: category_list = interv_data['Category 1'].tolist() else: category_list = interv_data['Platform'].tolist() categories = sorted(set(category_list)) if byplatform: order = [0, 2, 1, 3] categories = [categories[o] for o in order] keycols = ['Short name', 'Category 1', 'Category 2', 'Platform', 'ICER'] df = sc.dataframe(cols=keycols + ['Percent'], nrows=len(interv_data)) for key in keycols: df[key] = interv_data[key] df['Percent'] = 0.0 df.sort('Short name') nspends, nintervs = R[0]['alloc'].shape all_counts = pl.zeros((nspends, nintervs)) include_counts = sc.dcp(all_counts) for co, country in enumerate(R.keys()): proceed = True if region and country_data['who_region', co] != region: proceed = False # Could use continue if income and country_data['income_group', co] != income: proceed = False # Could use continue if proceed: alloc = R[country]['alloc'] counts = pl.array(alloc > 0, dtype=float) if entry is None: entries = range(nspends) else: entries = [entry] for i in entries: for j in range(nintervs): all_counts[i, j] += 1 include_counts[i, j] += counts[i, j] for j in range(nintervs): include = include_counts[:, j].sum() total = all_counts[:, j].sum() df['Percent', j] = include / total # Ensure sorted!! data = sc.odict().make(keys=categories, vals=[]) counts = sc.odict().make(keys=categories, vals=0) df.sort(col='Percent', reverse=True) for j in range(nintervs): if byplatform: this_category = df['Platform', j] else: this_category = df['Category 1', j] if counts[this_category] < max_entries: data[this_category].append(df[j].tolist()) counts[this_category] += 1 if dosave: df.export('results/rapid_top-interventions.xlsx') fig = pl.figure(figsize=(9, 17)) ax = fig.add_axes([0.5, 0.1, 0.45, 0.85]) count = 50 ticklocs = [] ticklabels = [] if not byplatform: darkest = [ pl.array([0.5, 0.1, 0.0]), pl.array([0.0, 0.1, 0.5]), pl.array([0.5, 0.0, 0.5]), pl.array([0.1, 0.5, 0.0]), ] else: darkest = [ (0.5, 0.2, 0.0), (0.0, 0.2, 0.5), (0.0, 0.5, 0.0), (0.0, 0.5, 0.3), ] darkest = darkest[::-1] position = df.cols.index('Percent') for k, key, vals in sc.odict(data).enumitems(): count -= 2 count2 = 0 pl.text(-2, count, key, fontweight='bold', horizontalalignment='right', fontsize=6) maxval = len(vals) for row in vals: count -= 1 count2 += 1 thiscolor = darkest[k] + (count2 / (maxval * 2)) * pl.array([1, 1, 1]) ticklocs.append(count) ticklabels.append(row[0]) percentage = float(row[position]) * 100 # WARNING, fragile!! pl.barh(count, percentage, facecolor=thiscolor, edgecolor='none') ax.set_yticks(ticklocs) ax.set_yticklabels(ticklabels, fontsize=4) ax.set_title(label) pl.xlim([0, 100]) pl.xlabel('Frequency of inclusion of intervention in EUHC package (%)') if dosave: connector = '' if not label else '-' pl.savefig(f'results/rapid_top-interventions{connector}{label}.png', dpi=200) return fig
def makepackage(self, burdenset=None, intervset=None, frpwt=None, equitywt=None, verbose=True, die=False): ''' Make results ''' # Handle inputs if burdenset is not None: self.burdenset = burdenset # Warning, name is used both as key and actual set! if intervset is not None: self.intervset = intervset if frpwt is None: frpwt = 0.25 if equitywt is None: equitywt = 0.25 self.frpwt = frpwt self.equitywt = equitywt burdenset = self.projectref().burden(key=self.burdenset) intervset = self.projectref().interv(key=self.intervset) intervset.parse() # Ensure it's parsed colnames = intervset.colnames # Create new dataframe origdata = sc.dcp(intervset.data) critical_cols = [ 'active', 'shortname', 'unitcost', 'spend', 'icer', 'frp', 'equity' ] df = sc.dataframe() for col in critical_cols: # Copy columns over df[col] = sc.dcp(origdata[colnames[col]]) df['parsedbc'] = sc.dcp(origdata['parsedbc']) # Since not named df.filter_out(key=0, col='active', verbose=True) # Calculate people covered (spending/unitcost) df['coverage'] = hp.arr( df['spend']) / (self.eps + hp.arr(df['unitcost'])) # Pull out DALYS and prevalence df.addcol('total_dalys', value=0) # Value=0 by default, but just to be explicit df.addcol('max_dalys', value=0) df.addcol('total_prevalence', value=0) df.addcol('dalys_averted', value=0) notfound = [] lasterror = None for r in range(df.nrows): theseburdencovs = df['parsedbc', r] for burdencov in theseburdencovs: key = burdencov[0] val = burdencov[1] # WARNING, add validation here try: thisburden = burdenset.data.findrow( key=key, col=burdenset.colnames['cause'], asdict=True, die=True) df['total_dalys', r] += thisburden[burdenset.colnames['dalys']] df['max_dalys', r] += thisburden[burdenset.colnames['dalys']] * val df['total_prevalence', r] += thisburden[burdenset.colnames['prevalence']] except Exception as E: lasterror = E # Stupid Python 3 print('HIIII %s' % str(E)) print(type(df['total_dalys', r])) print(type(df['max_dalys', r])) print(type(df['total_prevalence', r])) print(type(thisburden[burdenset.colnames['dalys']])) print(type(thisburden[burdenset.colnames['prevalence']])) notfound.append(key) # Validation if len(notfound): errormsg = 'The following burden(s) were not found: "%s"\nError:\n%s' % ( notfound, str(lasterror)) raise hp.HPException(errormsg) invalid = [] for r in range(df.nrows): df['dalys_averted', r] = df['spend', r] / (self.eps + df['icer', r]) if df['dalys_averted', r] > df['max_dalys', r]: errormsg = 'Data input error: DALYs averted for "%s" greater than total DALYs (%0.0f vs. %0.0f); please reduce total spending, increase ICER, increase DALYs, or increase max coverage' % ( df['shortname', r], df['dalys_averted', r], df['max_dalys', r]) df['dalys_averted', r] = df[ 'max_dalys', r] # WARNING, reset to maximum rather than give error if die=False invalid.append(errormsg) if len(invalid): errors = '\n\n'.join(invalid) if die: raise Exception(errors) else: print(errors) # To populate with optimization results and fixed spending self.budget = hp.arr(df['spend']).sum() df.addcol('opt_spend') df.addcol('opt_dalys_averted') df.addcol('fixed') # Store colors nintervs = df.nrows colors = sc.gridcolors(nintervs + 2, asarray=True)[2:] # Skip black and white colordict = sc.odict() for c, name in enumerate(df['shortname']): colordict[name] = colors[c] self.colordict = colordict self.data = df # Store it if verbose: print( 'Health package %s recalculated from burdenset=%s and intervset=%s' % (self.name, self.burdenset, self.intervset)) return None
import sciris as sc count = 0 def dfprint(label, val): global count count += 1 sc.colorize('blue', '\n%s. ' % count + label) print(val) return None print('Testing dataframe:') a = sc.dataframe(cols=['x', 'y'], data=[[1238, 2], [384, 5], [666, 7]]) dfprint('Create dataframe', a) dfprint('Print out a column', a['x']) dfprint('Print out a row', a[0]) dfprint('Print out an element', a['x', 0]) a[0] = [123, 6] dfprint('Set values for a whole row', a) a['y'] = [8, 5, 0] dfprint('Set values for a whole column', a) a['z'] = [14, 14, 14] dfprint('Add new column', a) a.addcol('m', [14, 15, 16]) dfprint('Alternate way to add new column', a) a.rmcol('z') dfprint('Remove a column', a) a.pop(1)