def extract_species_distributions(outfile):
    """
    Extracts species distribution data from TNC catchment data
    and writes to presence-absence matrix CSV
    """
    
    # imports
    import sys, arcpy, csv, numpy
    
    # input files
    hydroCodePath = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\Lamprey Cap Tradeoffs\analyses' # hydrogaphy code path
    hydroMDB = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\Lamprey Cap Tradeoffs\raw_data\GL_pruned_hydrography.mdb'
    sppData = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\Lamprey Cap Tradeoffs\raw_data\Catchments_with_FishPresence_Updated_4-25-2016\catchments_w_fish_presence_4-25-2016.shp'
    
    # other parameters
    cidField = 'HydroID'
    speciesField = 'Species'
    
    # import helper functions
    cPath = sys.path
    sys.path = [hydroCodePath] + cPath
    from hydrography.hydrography import Hydrography
    from hydrography.load_data import load_hydro_mdb
    
    # load hydrography data
    hydrodata = load_hydro_mdb(hydroMDB)
    hydrography = Hydrography(hydrodata)
    
    # load species trib data
    spp2tid = {}
    cid2tid = dict((c.id, c.tributary.id) for c in hydrography.get_catchments())
    tids = sorted(set(cid2tid.values()))
    ntid = len(tids)
    tid2ind = dict((tids[i], i) for i in xrange(ntid))
    for cid, species in arcpy.da.SearchCursor(sppData, [cidField, speciesField]):
        tid = cid2tid.get(cid, None)
        if tid is None: continue
        species = species.strip()
        if species == '': continue
        if species not in spp2tid: spp2tid[species] = numpy.zeros(len(tids), dtype=numpy.uint8)
        spp2tid[species][tid2ind[tid]] = 1

    # write species data to output csv
    species = sorted(spp2tid.keys())
    writer = csv.writer(open(outFile, 'wb'))
    header = [''] + tids
    writer.writerow(header)
    for s in species:
        row = [s] + list(spp2tid[s])
        writer.writerow(row)
        
    return outFile
def main():
    
    # ~~ IMPORTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
    from make_gdx import make_gdx, read_gms, load_data, prune_barriers
    from hydrography.hydrography import Hydrography
    from hydrography.load_data import load_hydro_mdb
    import os
    
    
    
    # ~~ DEFINE INPUTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
    
    # input files
    barriers = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\Lamprey Cap Tradeoffs\raw_data\GL_pruned_hydrography.mdb\barriers'
    tableCSV = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\code\benchmarks\data\table.csv'
    defFile = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\code\benchmarks\data\definitions.csv'
    gmsFile = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\code\benchmarks\scripts\Habitat_Opt.gms'
    outFolder = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\code\benchmarks\data\gdxs'
    hydroMDB = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\Lamprey Cap Tradeoffs\raw_data\GL_pruned_hydrography.mdb'
    sppData = r'C:\Users\milt\Dropbox\UW Madison Post Doc\Lamprey Control\Lamprey Cap Tradeoffs\raw_data\Catchments_with_FishPresence_Updated_4-25-2016\catchments_w_fish_presence_4-25-2016.shp'
    
    # other parameters
    bidColumn = 'BID'
    dsidColumn = 'BID_DS'
    
    # get hydrography to map from bids to species presence
    hydrodata = load_hydro_mdb(hydroMDB)
    hydrography = Hydrography(hydrodata)
    
    # assess which species might use which sections above which barriers
    #   (i.e. if that species is found in the river network of the barrier)
    bid2spp = map_species_to_barriers(hydrography, sppData)
    del hydrography, hydrodata
    def spp_hab(sid, bid, hab):
        if sid in bid2spp[bid]: return hab
        else: return 0.
    fish1_hab = lambda b,h: spp_hab('Esox lucius', b, h)
    fish2_hab = lambda b,h: spp_hab('Moxostoma anisurum', b, h)
    fish3_hab = lambda b,h: spp_hab('Acipenser fulvescens', b, h)
    lamp_hab = lambda b,h: spp_hab('Petromyzon marinus', b, h)
    
    # mapping from hydrography database fields to output fields for gams model
    fields = {
        'bid': ([bidColumn], lambda s: long(float(s))),
        'dsid': ([dsidColumn], lambda s: long(float(s))),
        'hab_fish1': ([bidColumn, 'HAB_UP'], fish1_hab),
        'hab_fish2': ([bidColumn, 'HAB_UP'], fish2_hab),
        'hab_fish3': ([bidColumn, 'HAB_UP'], fish3_hab),
        'hab_lamprey': ([bidColumn, 'HAB_UP'], lamp_hab),
        'pass_fish1': (['PASS04'], lambda s: s), # swimming guilds from Allison's paper
        'pass_fish2': (['PASS07'], lambda s: s),
        'pass_fish3': (['PASS10'], lambda s: s),
        'pass_lamprey': (['PASS07'], lambda s: s),
        'cost': (['COST'], lambda s: s),
        'is_root': ([dsidColumn], lambda s: {-1: 1}.get(int(float(s)), 0)),
        'candidate': ([], lambda: 1),
        'passchange_fish1': (['PASS04'], lambda s: 1-float(s)),
        'passchange_fish2': (['PASS07'], lambda s: 1-float(s)),
        'passchange_fish3': (['PASS10'], lambda s: 1-float(s)),
        'passchange_lamprey': (['PASS07'], lambda s: 1-float(s)),
        'habchange_lamprey':([bidColumn, 'HAB_UP'], lambda b,h: -0.95*lamp_hab(b,h))
    }
    

    # ~~ CREATE GDX ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #

    
    
    # make table.csv
    make_table(barriers, fields, tableCSV)
    
    # load the gams parameter definitions from the gams model file
    gamsParameters = read_gms(gmsFile)
    
    # load the data from tables and definitions file
    data = load_data(tableCSV, defFile, gamsParameters)
        
    # make the gdx's for every model run
    print '\n'.join(make_gdx(
        data, outFolder, parameters=gamsParameters, zip=False
    ))