def extract_year_month(input): """ Turn something like ("January", 2011) into the tuple (2011, 1) """ month = month2num(input[0].lower()) year = int(input[1]) if month: return year, month else: return None
def process_summons(filename): """ Convert a single Excel file to two rows of a CSV. """ sh = xlrd.open_workbook(filename).sheet_by_index(0) geog = sh.row(1)[0].value month = month2num(sh.row(2)[0].value) year = int(sh.row(3)[2].value[-4:]) data = {} for rnum in xrange(4, 39): prev_row = sh.row(rnum - 1) row = sh.row(rnum) if row[0].ctype == 0: col_val = prev_row[0].value.split(u'\n')[-1] else: col_val = row[0].value.split(u'\n')[0] if row[1].ctype == 0: mtd_val = int(prev_row[1].value.split(u'\n')[-1]) else: mtd_val = int(row[1].value) if row[1].ctype == 2 else int( row[1].value.split(u'\n')[0]) if row[2].ctype == 0: ytd_val = int(prev_row[2].value.split(u'\n')[-1]) else: ytd_val = int(row[2].value) if row[2].ctype == 2 else int( row[2].value.split(u'\n')[0]) mtd_col = columnize(col_val + u' mtd') ytd_col = columnize(col_val + u' ytd') data[mtd_col] = mtd_val data[ytd_col] = ytd_val sys.stdout.write(u'\t'.join([unicode(geog), unicode(year), unicode(month)])) for c in DATA_COLUMNS: sys.stdout.write(u'\t' + unicode(data[c])) sys.stdout.write(u'\n')
def process_summons(filename): """ Convert a single Excel file to two rows of a CSV. """ sh = xlrd.open_workbook(filename).sheet_by_index(0) geog = sh.row(1)[0].value month = month2num(sh.row(2)[0].value) year = int(sh.row(3)[2].value[-4:]) data = {} for rnum in xrange(4, 39): prev_row = sh.row(rnum - 1) row = sh.row(rnum) if row[0].ctype == 0: col_val = prev_row[0].value.split(u'\n')[-1] else: col_val = row[0].value.split(u'\n')[0] if row[1].ctype == 0: mtd_val = int(prev_row[1].value.split(u'\n')[-1]) else: mtd_val = int(row[1].value) if row[1].ctype == 2 else int(row[1].value.split(u'\n')[0]) if row[2].ctype == 0: ytd_val = int(prev_row[2].value.split(u'\n')[-1]) else: ytd_val = int(row[2].value) if row[2].ctype == 2 else int(row[2].value.split(u'\n')[0]) mtd_col = columnize(col_val + u' mtd') ytd_col = columnize(col_val + u' ytd') data[mtd_col] = mtd_val data[ytd_col] = ytd_val sys.stdout.write(u'\t'.join([unicode(geog), unicode(year), unicode(month)])) for c in DATA_COLUMNS: sys.stdout.write(u'\t' + unicode(data[c])) sys.stdout.write(u'\n')
if all([c != 200 for c in codes.values()]): sys.stderr.write(u"No more archives! {codes} at {year}/{month} " u"\n".format(year=year, month=month, codes=codes)) break # Also download current acc reports in case NYPD forgot to add them to the # zip archive sys.stderr.write(u"Downloading current acc reports...\n") resp = requests.get(CURRENT_EXCEL_ROOT + u'cityacc.xlsx') sh = xlrd.open_workbook(file_contents=resp.content).sheet_by_index(0) _, cur_month_name, cur_year = sh.row(1)[0].value.split() month = month2num(cur_month_name) year = int(cur_year) path = os.path.join(archive_path, str(year), u'{:0>2}'.format(month)) try: os.makedirs(path) except OSError: pass for boro in ('city', 'bk', 'bx', 'mn', 'qn', 'si'): pdf_resp = requests.get(u'{0}{1}{2}'.format(CURRENT_PDF_ROOT, boro, 'acc.pdf')) filename = pdf_resp.url.split('/')[-1] open(os.path.join(path, filename), 'w').write(pdf_resp.content) sys.stderr.write(u"Wrote {0} to {1}\n".format(filename, path)) excel_resp = requests.get(u'{0}{1}{2}'.format(CURRENT_EXCEL_ROOT, boro, 'acc.xlsx'))
if all([c != 200 for c in codes.values()]): sys.stderr.write(u"No more archives! {codes} at {year}/{month} " u"\n".format(year=year, month=month, codes=codes)) break # Also download current acc reports in case NYPD forgot to add them to the # zip archive sys.stderr.write(u"Downloading current acc reports...\n") resp = requests.get(CURRENT_EXCEL_ROOT + u'cityacc.xlsx') sh = xlrd.open_workbook(file_contents=resp.content).sheet_by_index(0) _, cur_month_name, cur_year = sh.row(1)[0].value.split() month = month2num(cur_month_name) year = int(cur_year) path = os.path.join(archive_path, str(year), u'{:0>2}'.format(month)) try: os.makedirs(path) except OSError: pass for boro in ('city', 'bk', 'bx', 'mn', 'qn', 'si'): pdf_resp = requests.get(u'{0}{1}{2}'.format(CURRENT_PDF_ROOT, boro, 'acc.pdf')) filename = pdf_resp.url.split('/')[-1] open(os.path.join(path, filename), 'w').write(pdf_resp.content) sys.stderr.write(u"Wrote {0} to {1}\n".format(filename, path))