예제 #1
0
def parse_file(fn):
    """Break down the xls into a 2d data array, stripping off first rows which do not have data."""
    data = xls2list.xls2list(fn)
    for n, row in enumerate(data[3:]):
        r = parse_row(row)
        # All of the earmarks have a description, stop when we finish all
        # earmarks
        if not r.description: break 
        # The id's aren't remotely uniq, map to something that is
        r.id=n+1 # Lets start at 1 instead of 0
        yield r
예제 #2
0
def parse_file(fn):
    """Break down the xls into a 2d data array, stripping off first rows which do not have data."""
    data = xls2list.xls2list(fn)
    for n, row in enumerate(data[3:]):
        r = parse_row(row)
        # All of the earmarks have a description, stop when we finish all
        # earmarks
        if not r.description: break
        # The id's aren't remotely uniq, map to something that is
        r.id = n + 1  # Lets start at 1 instead of 0
        yield r
예제 #3
0
def parse_state(state):
    def fixnum(x, multiply=1):
        if isinstance(x, unicode) and '*' in x:
            return None
        else:
            return x * multiply
    
    stats = xls2list.xls2list(SOI_PATH % state)
    
    loc = 11 # rest is all headers
    while loc+7 < len(stats):
        out = web.storage()
        bundle = stats[loc:loc+7]
        if bundle[0][0] == None:
            break

        out.loc = bundle[0][0]
        if isinstance(out.loc, float):
            out.loc = str(int(out.loc)).zfill(5)
        
        if out.loc.strip() == "MISSOURI":
            loc += 8 # duped data
            continue

        out.brackets = []
        
        for line in bundle:
            if (isinstance(line[0], unicode) and line[0].strip() == 'Total'
               ) or isinstance(line[0], float):
                line[0] = None
            elif line[0].strip() == "Under $10,000":
                line[0] = 0
            else:
                line[0] = int(''.join([x for x in line[0].split()[0] if x.isdigit()]))
            out.brackets.append(web.storage(
              bracket_low=line[0], 
              n_filers=fixnum(line[1]), 
              agi=fixnum(line[4], 1000),
              tot_tax=fixnum(line[35], 1000),
              n_dependents=fixnum(line[3]),
              n_eitc=fixnum(line[36]),
              tot_eitc=fixnum(line[37], 1000),
              tot_charity=fixnum(line[26], 1000),
              n_prepared=fixnum(line[38])
            ))
            
            br = out.brackets[-1]            
            err = (TypeError, ZeroDivisionError)
            
            try: br.pct_prepared = float(br.n_prepared)/br.n_filers
            except err: pass

            try: br.pct_charity = float(br.tot_charity)/br.agi
            except err: pass

            try:
                br.avg_eitc = float(br.tot_eitc)/br.n_eitc
            except TypeError:
                pass
            except ZeroDivisionError:
                br.avg_eitc = 0

            try: br.pct_eitc = float(br.n_eitc)/br.n_filers
            except err: pass

            try: br.avg_dependents = float(br.n_dependents)/br.n_filers
            except err: pass

            try: br.avg_taxburden = float(br.tot_tax)/br.agi
            except err: pass

            try: br.avg_income = float(br.agi)/br.n_filers
            except err: pass
                
        try: out.gini = gini_est(out.brackets)
        except MissingData: pass
        
        yield out
        
        loc += 8
예제 #4
0
def parse_state(state):
    def fixnum(x, multiply=1):
        if isinstance(x, unicode) and '*' in x:
            return None
        else:
            return x * multiply

    stats = xls2list.xls2list(SOI_PATH % state)

    loc = 11  # rest is all headers
    while loc + 7 < len(stats):
        out = web.storage()
        bundle = stats[loc:loc + 7]
        if bundle[0][0] == None:
            break

        out.loc = bundle[0][0]
        if isinstance(out.loc, float):
            out.loc = str(int(out.loc)).zfill(5)

        if out.loc.strip() == "MISSOURI":
            loc += 8  # duped data
            continue

        out.brackets = []

        for line in bundle:
            if (isinstance(line[0], unicode)
                    and line[0].strip() == 'Total') or isinstance(
                        line[0], float):
                line[0] = None
            elif line[0].strip() == "Under $10,000":
                line[0] = 0
            else:
                line[0] = int(''.join(
                    [x for x in line[0].split()[0] if x.isdigit()]))
            out.brackets.append(
                web.storage(bracket_low=line[0],
                            n_filers=fixnum(line[1]),
                            agi=fixnum(line[4], 1000),
                            tot_tax=fixnum(line[35], 1000),
                            n_dependents=fixnum(line[3]),
                            n_eitc=fixnum(line[36]),
                            tot_eitc=fixnum(line[37], 1000),
                            tot_charity=fixnum(line[26], 1000),
                            n_prepared=fixnum(line[38])))

            br = out.brackets[-1]
            err = (TypeError, ZeroDivisionError)

            try:
                br.pct_prepared = float(br.n_prepared) / br.n_filers
            except err:
                pass

            try:
                br.pct_charity = float(br.tot_charity) / br.agi
            except err:
                pass

            try:
                br.avg_eitc = float(br.tot_eitc) / br.n_eitc
            except TypeError:
                pass
            except ZeroDivisionError:
                br.avg_eitc = 0

            try:
                br.pct_eitc = float(br.n_eitc) / br.n_filers
            except err:
                pass

            try:
                br.avg_dependents = float(br.n_dependents) / br.n_filers
            except err:
                pass

            try:
                br.avg_taxburden = float(br.tot_tax) / br.agi
            except err:
                pass

            try:
                br.avg_income = float(br.agi) / br.n_filers
            except err:
                pass

        try:
            out.gini = gini_est(out.brackets)
        except MissingData:
            pass

        yield out

        loc += 8