def getData(crimefile=infile): f = open(crimefile, 'rU') reader = csv.reader(f) out = [] for row in reader: # no data, move along if len(row) == 0: continue # possible we have a new state state_shift = re.match(state_title, row[0]) if state_shift: state = state_shift.groups()[0] state = getStateAbbr(state) # other text rows we can ignore try: int(row[0]) except ValueError: continue # data! rowdata = dataRowToDict(row) rowdata['state'] = state # for now we only need 2001-2010 so if rowdata['year'] < 2001: continue out.append(rowdata) f.close() return out