def __init__(self, spreadsheet): # database session config_file = os.path.join(os.path.dirname(__file__), 'openhea.cfg') self.config = OpenHEAConfig() read = self.config.read(config_file) if len(read) != 1: print 'Need openhea.cfg setup with database parameters' sys.exit(1) cs = self.config.sqlalchemy_connection_string() engine = create_engine(cs, echo=True) Session = sessionmaker(bind=engine) self.session = Session() # open workbook self.workbook = xlrd.open_workbook(spreadsheet)
class DataImporter: def __init__(self, spreadsheet): # database session config_file = os.path.join(os.path.dirname(__file__), 'openhea.cfg') self.config = OpenHEAConfig() read = self.config.read(config_file) if len(read) != 1: print 'Need openhea.cfg setup with database parameters' sys.exit(1) cs = self.config.sqlalchemy_connection_string() engine = create_engine(cs, echo=True) Session = sessionmaker(bind=engine) self.session = Session() # open workbook self.workbook = xlrd.open_workbook(spreadsheet) def saveSiteData(self): """Expecting site_data to look like: [ ['One sample point', ''] ['Country name', 'Namibia'] ['LZ name', 'Caprivi Lowland Maize and Cattle Zone'] ['Name of village or settlement', 'avillage'] ['Interview date', '40321.0'] ['Interviewer name', 'James Acidri'] ['Interview number', '1'] ['Start of reference/ consumption year', 'March'] ] Note that the date being a number is an Excel thing - see code for how we deal with it """ site_data = sheetToGridNoBlank(self.workbook.sheet_by_index(0)) testPrint(site_data) DATACOL = 1 project = site_data[1][DATACOL] livelihoodzone = site_data[2][DATACOL] # TODO: not currently in database schema #village_name = site_data[3][DATACOL] date_tuple = xlrd.xldate_as_tuple(site_data[4][DATACOL], self.workbook.datemode) datecreated = date(date_tuple[0], date_tuple[1], date_tuple[2]) createdby = site_data[5][DATACOL] # TODO: not currently in database schema #interview_number = site_data[6][DATACOL] consumptionyearbegins = site_data[7][DATACOL] lz = Livelihoodzone( livelihoodzone=livelihoodzone, createdby=createdby, datecreated=datecreated, consumptionyearbegins=consumptionyearbegins) self.session.add(lz) self.session.commit() def saveExpenditureData(self): """Expecting expenditure_data to look like: [ ['', '', '', '', '', u'HOUSEHOLD EXPENDITURE', '', '', '', '', '', '', '', '', ''], ['', '', '', u'WG1 Lower', u'WG1 upper', u'WG2 Lower', u'WG2 Upper', u'WG3 Lower', u'WG3 Upper', u'WG4 Lower', u'WG4 Upper', u'WG1', u'WG2', u'WG3', u'WG4'], [u'Category', u'Food type', u'Unit', '', '', '', u'No. Units purchased', '', '', '', '', '', u'Price per unit', '', ''], [u'Staple food', u'Maize meal', u'Kg', 390.0, 390.0, 208.0, 208.0, 182.0, 182.0, 97.5, 97.5, 4.0, 4.0, 4.0, 4.0], [u'Non-staple food', u'Sugar', u'Kg', 11.0, 11.0, 22.0, 22.0, 36.0, 36.0, 18.0, 18.0, 7.5, 11.0, 6.0, 8.5], ['', '', '', '', '', '', u'Annual expenditure', '', '', '', '', '', '', '', ''], [u'Household items', u'Candles', u'N$', 92.0, 92.0, 95.0, 95.0, 164.0, 164.0, 235.0, 235.0, '', '', '', ''], ['', u'Soap/Vaseline', u'N$', 258.0, 258.0, 480.0, 480.0, 326.0, 326.0, 597.0, 597.0, '', '', '', ''], ['', u'Kerosine', u'N$', 0.0, 0.0, 0.0, 0.0, 140.0, 140.0, 360.0, 360.0, '', '', '', ''], [u'Essential inputs', u'Tools', '', 200.0, 200.0, 200.0, 200.0, 300.0, 300.0, 300.0, 300.0, '', '', '', ''] ] """ expenditure_data = sheetToGridNoBlank(self.workbook.sheet_by_index(3)) testPrint(expenditure_data) # check the first row is what we expect title_row = expenditure_data.pop(0) stripped_title_row = [x for x in title_row if x != ''] assert len(stripped_title_row) == 1 wealth_groups = {} expenditure = [] standard_of_living = [] wg_row = expenditure_data.pop(0) for index, col in enumerate(wg_row): if col.lower().endswith('lower') and wg_row[index+1].lower().endswith('upper'): wealth_group_name = col.split()[0] assert wealth_group_name.lower() == wg_row[index+1].lower().split()[0] wealth_groups[wealth_group_name] = { 'lower_col': index, 'upper_col': index+1, } expenditure_data.pop(0) data = expenditure category = '' for row in expenditure_data: strip_row = [x for x in row if x != ''] # change data dictionary after "Annual Expenditure" if len(strip_row) == 1 and strip_row[0].lower().startswith('annual'): data = standard_of_living continue # cache category - it is often not repeated, so we will keep using the same # value until it changes datadict = {} if row[0]: category = row[0] datadict['category'] = category datadict['type'] = row[1] datadict['unit'] = row[2] for wg in wealth_groups.keys(): datadict[wg] = { 'lower': row[wealth_groups[wg]['lower_col']], 'upper': row[wealth_groups[wg]['upper_col']], } data.append(datadict) def saveWealthGroupAssetsData(self): """Expecting wgassets_data to look like: [['', u'Wealth group characteristics', '', '', '', '', '', '', '', '', '', '', '', ''], ['', '', '', u'WG1', u'WG2', u'WG3', u'WG4', u'WG5 etc', '', '', '', '', '', ''], ['', u'Wealth group name', '', u'very poor', u'poor ', u'middle ', u'better off', '', '', '', '', '', '', ''], ['', u'Percent in wealth group', '', 0.31, 0.39, 0.22, 0.08, '', '', '', '', '', '', ''], ['', u'Number of people in household', '', 7.0, 7.0, 6.0, 5.0, '', '', '', '', '', '', ''], ['', u'Wealth group rank 1= poorest', '', 1.0, 2.0, 3.0, 4.0, '', '', '', '', '', '', ''], [u'ASSETS', '', '', '', '', u'Asset holdings', '', '', '', '', '', '', u'Asset price', ''], ['', '', '', u'WG1', u'WG1', u'WG2', u'WG2', u'WG3', u'WG3', u'WG4 ', u'WG4', u'WG5 etc', '', ''], ['', '', '', u'Lower', u'Upper', u'Lower', u'Upper', u'Lower', u'Upper', u'Lower', u'Upper', '', u'Lower', u'Upper'], [u'Category', u'Asset Type', u'Unit', '', '', '', '', '', '', '', '', '', '', ''], [u'Land', u'Upland', u'Acre', 1.25, 2.0, 2.5, 3.0, 20.0, 25.0, 22.5, 30.0, '', '', ''], ['', u'Owned Irrigated', u'Acre', 0.5, 1.0, 1.0, 2.0, 1.0, 2.0, '', '', '', '', ''], [u'Trees', u'Mango', u'Item', 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 3.0, 5.0, '', '', ''], [u'Other tradeable goods', u'Cell phone', u'Item', 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, '', '', ''], ['', u'Ox plough', u'Item', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, '', '', ''], ['', u'Livestock assets setup missing', '', '', '', '', '', '', '', '', '', '', '', ''], [u'Livestock', u'Cattle', u'Item', 0.0, 5.0, 3.0, 6.0, 7.0, 14.0, 35.0, 87.0, '', 300.0, 300.0], ['', u'Goats', u'Item', 0.0, 6.0, 7.0, 11.0, 12.0, 23.0, 20.0, 22.0, '', 50.0, 50.0], ['', u'?cash?foodstocks', '', '', '', '', '', '', '', '', '', '', '', '']] """ wgassets_data = sheetToGridNoBlank(self.workbook.sheet_by_index(1)) # check the first row has one cell (the title) title_row = wgassets_data[0] stripped_title_row = [x for x in title_row if x != ''] assert len(stripped_title_row) == 1 wealth_groups = {} category = '' data = [] wealth_groups_list = set(wgassets_data[7]) wealth_groups_list.remove('') wealth_groups = {} for wg in wealth_groups_list: wealth_groups[wg] = { 'upper':'', 'lower':'', } for row in wgassets_data[10:]: this_wealth_groups = wealth_groups.copy() # cache category - it is often not repeated, so we will keep using the same # value until it changes if row[0]: category = row[0] datadict = {} datadict['category'] = category datadict['type'] = row[1] datadict['unit'] = row[2] for index,val in enumerate(row[3:]): if val != '' and wgassets_data[7][index+3] != '': this_wealth_groups[wgassets_data[7][index+3]][wgassets_data[8][index+3].lower()] = val datadict['data'] = this_wealth_groups data.append(datadict)