def main(): usage = "usage: %prog [options]" parser = OptionParser( usage=usage,version="%prog 1.0" ) # allow_interspersed_args=True parser.add_option("-g", dest="gtf", help="genome annotation gtf/gff [requires -f]" ) parser.add_option("-f", dest="fasta", help="genome fasta [can be gzipped]" ) parser.add_option("-i", dest="fpath", help="input file [stdin]") parser.add_option("-o", dest="outfn", help="output fname [stdout]") parser.add_option("-d", dest="minDepth", default=10, type=int, help="minimal depth [%default]") parser.add_option("-m", dest="minFreq", default=0.8, type=float, help="min frequency of alternative base [%default]") parser.add_option("-n", dest="indels", default=True, action="store_false", help="ignore indels") parser.add_option("-b", dest="bothStrands", default=True, action="store_false", help="report events confirmed by single strand algs") parser.add_option("-v", dest="verbose", default=True, action="store_false") ( o, args ) = parser.parse_args() if o.verbose: sys.stderr.write( "%s\n" % ( str(o), ) ) ctg2cds,id2gene,ctg2seq = {},{},{} if o.gtf: # if annotation # load genome if not o.fasta: # fasta has to be provided parser.errer( "Fasta file (-f) is requeired!" ) elif not os.path.isfile( o.fasta ): parser.error( "No such file: %s" % o.fasta ) ctg2seq = genome2dict( o.fasta ) # load genome annotation if not os.path.isfile( o.gtf ): # check if correct file parser.error( "No such file: %s" % o.gtf ) # load gtf/gff if o.gtf.endswith(".gff"): id2gene,ctg2cds = load_gff( o.gtf ) else: id2gene,ctg2cds = load_gtf( o.gtf ) if o.verbose: sys.stderr.write( "Loaded annotation of %s CDS from %s\n" % ( len(id2gene),o.gtf ) ) # parse pileup parse_vcf( o.fpath,o.outfn,ctg2cds,id2gene,ctg2seq,o.minDepth,o.minFreq,o.indels,o.bothStrands )
def get_options(): ''' subroutine to capture the optional inputs ''' from os import remove as rmfile from optparse import OptionParser, OptionGroup parser = OptionParser(usage="Usage: %prog [options] <station list filename>", \ description="Program to query a datacenter using the obspy fdsn client. " \ "All station returned in this query are saved into both a csv format 1sls " \ "file as well as a stationdb (stdb.StDbElement) pickled dictionary. The input " \ "argument, <station file name> is the prefix for the output file, which is by " \ "default <station file name>.csv and <station file name>.pkl.") # General Settings parser.add_option("-D", "--debug", action="store_true", dest="debug", default=False, \ help="Debug mode. After the client query is complete (and successful), instead of " \ "parsing the inventory, it is instead pickled to <station file name>_query_debug.pkl " \ "which can be loaded in ipython to examine manually.") parser.add_option("--long-keys", action="store_true", dest="lkey", default=False, \ help="Specify Key format. Default is Net.Stn. Long keys are Net.Stn.Chn") parser.add_option("-a", "--ascii", action="store_false", dest="use_binary", default=True, \ help="Specify to write ascii Pickle files instead of binary. Ascii are larger file size, " \ "but more likely to be system independent.") # Server Settings ServerGroup = OptionGroup(parser, title="Server Settings", description="Settings associated with " \ "which datacenter to log into.") ServerGroup.add_option("--Server", action="store", type=str, dest="Server", default="IRIS", \ help="Specify the server to connect to. Options include: BGR, ETH, GEONET, GFZ, INGV, IPGP, " \ "IRIS, KOERI, LMU, NCEDC, NEIP, NERIES, ODC, ORFEUS, RESIF, SCEDC, USGS, USP. [Default IRIS]") ServerGroup.add_option("--User-Auth", action="store", type=str, dest="UserAuth", default="", \ help="Enter your IRIS Authentification Username and Password (--User-Auth='username:authpassword') " \ "to access and download restricted data. [Default no user and password]") # Selection Settings SelectGroup = OptionGroup(parser, title="Channel Priority/Selection Settings", description="Settings " \ "associated with selecting the channels to retain.") SelectGroup.add_option("--channel-rank", action="store", type=str, dest="chnrank", default="HH,BH,LH", \ help="If requesting more than one type of channel, specify a comma separated list of the first two " \ "lettres of the desired components to retain. Default is HH > BH > LH : ['HH,BH,LH']") # Channel Settings ChannelGroup=OptionGroup(parser, title="Station-Channel Settings", description="Options to narrow down " \ "the specific channels based on network, station, etc") ChannelGroup.add_option("-N","--networks", action="store", type=str, dest="nets", default="*", \ help="Specify a comma separated list of network codes to search for [Default *]") ChannelGroup.add_option("-S","--stations", action="store", type=str, dest="stns", default="*", \ help="Specify a comma separated list of station names. If you want wildcards, enclose in quotes [Default *]") ChannelGroup.add_option("-L","--locations", action="store", type=str, dest="locs", default="*", \ help="Specify a comma separated list of location codes. If you want wildcards, enclose in quotes [Default *]") ChannelGroup.add_option("-C","--channels", action="store", type=str, dest="chns", default="HH*,BH*,LH*", \ help="Specify a comma separated, wildcarded list of channel names. [Default HH*,BH*,LH*]") # Geographic Settings BoxGroup = OptionGroup(parser, title="Geographic Lat/Lon Box Search", description="Define the coordinates " \ "of a lat/lon box in which to select stations. If filled out, takes precedence over values for " \ "Radius Search (below).") BoxGroup.add_option("--minlat","--min-latitude", action="store", type="float", dest="minlat", default=None, \ help="Specify minimum latitude to search (must specify all of minlat, maxlat, minlon, maxlon).") BoxGroup.add_option("--maxlat","--max-latitude", action="store", type="float", dest="maxlat", default=None, \ help="Specify maximum latitude to search (must specify all of minlat, maxlat, minlon, maxlon).") BoxGroup.add_option("--minlon","--min-longitude", action="store", type="float", dest="minlon", default=None, \ help="Specify minimum longitude to search (must specify all of minlat, maxlat, minlon, maxlon).") BoxGroup.add_option("--maxlon","--max-longitude", action="store", type="float", dest="maxlon", default=None, \ help="Specify maximum longitude to search (must specify all of minlat, maxlat, minlon, maxlon).") RadGroup=OptionGroup(parser, title="Geographic Radius Search", description="Central point and min/max " \ "radius search settings. Box Search Settings take precedence over radius search.") RadGroup.add_option("--lat","--latitude", action="store", type="float", dest="lat", default=None, \ help="Specify a Lat (if any of --lon --min-radius and --max-radius are empty, an error will prompt).") RadGroup.add_option("--lon","--longitude", action="store", type="float", dest="lon", default=None, \ help="Specify a Lon (if any of --lat --min-radius and --max-radius are empty, an error will prompt).") RadGroup.add_option("--minr","--min-radius", action="store", type="float", dest="minr", default=0., \ help="Specify a minimum search radius (in degrees) around the point defined by --lat and --lon " \ "(if any of --lat --lon and --max-radius are empty, an error will prompt). [Default 0. degrees]") RadGroup.add_option("--maxr","--max-radius", action="store", type="float", dest="maxr", default=None, \ help="Specify a maximum search radius (in degrees) around the point defined by --lat and --lon " \ "(if any of --lat --lon and --min-radius are empty, an error will prompt).") # Temporal Settings FixedRangeGroup = OptionGroup(parser, title="Fixed Time Range Settings", description="Find all stations " \ "operating within the start and end date/time. If either are filled out, they take precedence over " \ "Non-Specific time range search (below)") FixedRangeGroup.add_option("--start","--start-date", action="store", type=None, dest="stdate", default=None, \ help="Specify the Start Date/Time in a UTCDateTime compatible String (ie, 2010-01-15 15:15:45.2). [Default Blank]") FixedRangeGroup.add_option("--end","--end-date", action="store", type=None, dest="enddate", default=None, \ help="Specify the End Date/Time in a UTCDateTime compatible String (ie, 2010-01-15 15:15:45.2). [Default Blank]") VarRangeGroup = OptionGroup(parser, title="Non-Specific Time Range Settings", description="Time settings " \ "with less specificity. Ensure that those you specify do not interfere with each other. If above Fixed " \ "Range values are set, they will take precedence over these values.") VarRangeGroup.add_option("--start-before", action="store", type=None, dest="stbefore", default=None, \ help="Specify a Date/Time which stations must start before (must be UTCDateTime compatible string, " \ "ie 2010-01-15 15:15:45.2). [Default empty]") VarRangeGroup.add_option("--start-after", action="store", type=None, dest="stafter", default=None, \ help="Specify a Date/Time which stations must start after (must be UTCDateTime compatible string, " \ "ie 2010-01-15 15:15:45.2). [Default empty]") VarRangeGroup.add_option("--end-before", action="store", type=None, dest="endbefore", default=None, \ help="Specify a Date/Time which stations must end before (must be UTCDateTime compatible string, " \ "ie 2010-01-15 15:15:45.2). [Default empty]") VarRangeGroup.add_option("--end-after", action="store", type=None, dest="endafter", default=None, \ help="Specify a Date/Time which stations must end after (must be UTCDateTime compatible string, " \ "ie 2010-01-15 15:15:45.2). [Default empty]") # Add All Groups parser.add_option_group(ServerGroup) parser.add_option_group(SelectGroup) parser.add_option_group(ChannelGroup) parser.add_option_group(BoxGroup) parser.add_option_group(RadGroup) parser.add_option_group(FixedRangeGroup) parser.add_option_group(VarRangeGroup) # Run Parser (opts, args) = parser.parse_args() # Check output file name if len(args) != 1: parser.error("Need station database file") outpref = args[0] if not opts.debug: if exists(outpref + ".csv") and exists(outpref + ".pkl"): print ("Warning: Output Files " + outpref + ".csv and " + outpref + \ ".pkl already exist. These will be overwritten...") rmfile(outpref + ".pkl") rmfile(outpref + ".csv") elif exists(outpref + ".csv"): print("Warning: Output File " + outpref + ".csv already exists. It will be overwritten...") rmfile(outpref + ".csv") elif exists(outpref + ".pkl"): print("Warning: Output File " + outpref + ".pkl already exists. It will be overwritten...") rmfile(outpref + ".pkl") # Parse User Authentification if not len(opts.UserAuth) == 0: tt = opts.UserAuth.split(':') if not len(tt) == 2: parser.errer( "Error: Incorrect Username and Password Strings for User Authentification" ) else: opts.UserAuth = tt else: opts.UserAuth = [] # Parse Channel Rank to List opts.chnrank = opts.chnrank.split(',') # Check Geographic Settings if opts.minlat is not None or opts.maxlat is not None or opts.minlon is not None or opts.maxlon is not None: if opts.minlat is None or opts.maxlat is None or opts.minlon is None or opts.maxlon is None: # Not all value set opts.minlat = None opts.maxlat = None opts.minlon = None opts.maxlon = None print( "Warning: one of minlat,maxlat,minlon,maxlon were not set. All values reset to None. " ) print("") else: # Ensure proper min/max set tempminlat = min([opts.minlat, opts.maxlat]) tempmaxlat = max([opts.minlat, opts.maxlat]) opts.minlat = tempminlat opts.maxlat = tempmaxlat tempminlon = min([opts.minlon, opts.maxlon]) tempmaxlon = max([opts.minlon, opts.maxlon]) opts.minlon = tempminlon opts.maxlon = tempmaxlon print("Performing Geographic Box Search:") print(" LL: {0:9.4f}, {1:8.4f}".format(opts.minlat, opts.minlon)) print(" UR: {0:9.4f}, {1:8.4f}".format(opts.maxlat, opts.maxlon)) print(" ") # set all other box parameters to none opts.minr = None opts.maxr = None opts.lat = None opts.lon = None elif opts.lat is not None or opts.lon is not None or opts.minr is not None or opts.maxr is not None: if opts.lat is None or opts.lon is None or opts.minr is None or opts.maxr is None: opts.lat = None opts.lon = None opts.minr = None opts.maxr = None print( "Warning: one of lat,lon,minr,maxr were not set. All values reset to None. " ) print(" ") else: # Ensure minr/maxr set opts.minr = min([opts.minr, opts.maxr]) opts.maxr = max([opts.minr, opts.maxr]) print("Performing Geographic Radius Search: ") print(" Centre Point: {0:9.4f}, {1:8.4f}".format( opts.lon, opts.lat)) print(" Radius: {0:6.2f} to {1:6.2f} degrees".format( opts.minr, opts.maxr)) print(" ") # Check Time Settings if opts.stdate is not None or opts.enddate is not None: # Use Fixed Range, not other opts.stbefore = None opts.stafter = None opts.endbefore = None opts.endafter = None # Fix End Date if opts.enddate is None: opts.enddate = UTCDateTime("2599-12-31 23:59:59.9") else: opts.enddate = UTCDateTime(opts.enddate) # Assign stdate as UTCDateTime if opts.stdate is not None: opts.stdate = UTCDateTime(opts.stdate) print("Performing Fixed Time Range Search: ") print(" Start: " + opts.stdate.strftime("%Y-%m-%d %H:%M:%S")) print(" End: " + opts.enddate.strftime("%Y-%m-%d %H:%M:%S")) print(" ") else: # No Fixed Range Set. Are other values set? if opts.stbefore is not None or opts.stafter is not None or opts.endbefore is not None or opts.endafter is not None: print("Performing Non-Specific Time Search: ") if opts.stbefore is not None: opts.stbefore = UTCDateTime(opts.stbefore) print(" Start Before: " + opts.stbefore.strftime("%Y-%m-%d %H:%M:%S")) if opts.stafter is not None: opts.stafter = UTCDateTime(opts.stafter) print(" Start After: " + opts.stafter.strftime("%Y-%m-%d %H:%M:%S")) if opts.endbefore is not None: opts.endbefore = UTCDateTime(opts.endbefore) print(" End Before: " + opts.endbefore.strftime("%Y-%m-%d %H:%M:%S")) if opts.endafter is not None: opts.endafter = UTCDateTime(opts.endafter) print(" End After: " + opts.endafter.strftime("%Y-%m-%d %H:%M:%S")) print(" ") else: print("Warning: No Time Range Specified for Search") print(" ") # Station/Channel Search Parameters print("Station/Channel Search Parameters:") print(" Network: {0:s}".format(opts.nets)) print(" Stations: {0:s}".format(opts.stns)) print(" Locations: {0:s}".format(opts.locs)) print(" Channels: {0:s}".format(opts.chns)) print(" Channel Rank: {0:s}".format(",".join(opts.chnrank))) print(" ") if opts.debug: print("Output Files: {0:s}_query_debug.pkl and {0:s}_query_debug.kcsv". format(outpref)) else: print("Output Files: {0:s}.csv and {0:s}.pkl".format(outpref)) print(" ") # Return Values return opts, outpref
def main(): usage = "usage: %prog [options] *.vcf" parser = OptionParser( usage=usage,version="%prog 1.0" ) # allow_interspersed_args=True parser.add_option("-g", dest="gtf", help="genome annotation gtf/gff [requires -f]" ) parser.add_option("-f", dest="fasta", help="genome fasta" ) parser.add_option("-1", dest="bam1", help="sample bam") parser.add_option("-2", dest="bam2", help="reference bam") parser.add_option("-o", dest="outfn", help="output fname [stdout]") parser.add_option("-d", dest="minDepth", default=5, type=int, help="""minimal depth; note both samples need to have pass depth filtering [%default]""") parser.add_option("-m", dest="minFreq", default=0.8, type=float, help="min frequency of alternative base [%default]") parser.add_option("-n", dest="indels", default=True, action="store_false", help="ignore indels [%default]") parser.add_option("-b", dest="bothStrands", default=True, action="store_false", help="report events confirmed by single strand algs") parser.add_option("-v", dest="verbose", default=True, action="store_false") ( o, args ) = parser.parse_args() if o.verbose: sys.stderr.write( "%s\n" % ( str(o), ) ) if not args: parser.error( "At least one vcf file has to be specified!" ) for fn in args: if not os.path.isfile( fn ): parser.error( "No such file: %s" % fn ) ctg2cds,id2gene,ctg2seq = {},{},{} if o.gtf: # if annotation # load genome if not o.fasta: # fasta has to be provided parser.errer( "Fasta file (-f) is requeired!" ) elif not os.path.isfile( o.fasta ): parser.error( "No such file: %s" % o.fasta ) ctg2seq = genome2dict( o.fasta ) # load genome annotation if not os.path.isfile( o.gtf ): # check if correct file parser.error( "No such file: %s" % o.gtf ) # load gtf/gff if o.gtf.endswith(".gff"): id2gene,ctg2cds = load_gff( o.gtf ) else: id2gene,ctg2cds = load_gtf( o.gtf ) if o.verbose: sys.stderr.write( "Loaded annotation of %s CDS from %s\n" % ( len(id2gene),o.gtf ) ) # load possible SNPs coordinates coords = load_vcf( args,o.indels ) # check with mpileup check_snps( coords,o.bam1,o.bam2,o.fasta,o.outfn,ctg2cds,id2gene,ctg2seq,o.minDepth,o.minFreq,o.indels,o.bothStrands )
def main(): usage = "usage: %prog [options] *.vcf" parser = OptionParser(usage=usage, version="%prog 1.0") # allow_interspersed_args=True parser.add_option("-g", dest="gtf", help="genome annotation gtf/gff [requires -f]") parser.add_option("-f", dest="fasta", help="genome fasta") parser.add_option("-1", dest="bam1", help="sample bam") parser.add_option("-2", dest="bam2", help="reference bam") parser.add_option("-o", dest="outfn", help="output fname [stdout]") parser.add_option( "-d", dest="minDepth", default=5, type=int, help= """minimal depth; note both samples need to have pass depth filtering [%default]""" ) parser.add_option("-m", dest="minFreq", default=0.8, type=float, help="min frequency of alternative base [%default]") parser.add_option("-n", dest="indels", default=True, action="store_false", help="ignore indels [%default]") parser.add_option("-b", dest="bothStrands", default=True, action="store_false", help="report events confirmed by single strand algs") parser.add_option("-v", dest="verbose", default=True, action="store_false") (o, args) = parser.parse_args() if o.verbose: sys.stderr.write("%s\n" % (str(o), )) if not args: parser.error("At least one vcf file has to be specified!") for fn in args: if not os.path.isfile(fn): parser.error("No such file: %s" % fn) ctg2cds, id2gene, ctg2seq = {}, {}, {} if o.gtf: # if annotation # load genome if not o.fasta: # fasta has to be provided parser.errer("Fasta file (-f) is requeired!") elif not os.path.isfile(o.fasta): parser.error("No such file: %s" % o.fasta) ctg2seq = genome2dict(o.fasta) # load genome annotation if not os.path.isfile(o.gtf): # check if correct file parser.error("No such file: %s" % o.gtf) # load gtf/gff if o.gtf.endswith(".gff"): id2gene, ctg2cds = load_gff(o.gtf) else: id2gene, ctg2cds = load_gtf(o.gtf) if o.verbose: sys.stderr.write("Loaded annotation of %s CDS from %s\n" % (len(id2gene), o.gtf)) # load possible SNPs coordinates coords = load_vcf(args, o.indels) # check with mpileup check_snps(coords, o.bam1, o.bam2, o.fasta, o.outfn, ctg2cds, id2gene, ctg2seq, o.minDepth, o.minFreq, o.indels, o.bothStrands)