def getassembly(args, parsedict): """ loads assembly or creates a new one and set its params from parsedict. Does not launch ipcluster. """ ## Creating an assembly with a full path in the name will "work" ## but it is potentially dangerous, so here we have assembly_name ## and assembly_file, name is used for creating new in cwd, file is ## used for loading existing. ## ## Be nice if the user includes the extension. #project_dir = ip.core.assembly._expander(parsedict['1']) #assembly_name = parsedict['0'] project_dir = ip.core.assembly._expander(parsedict['project_dir']) assembly_name = parsedict['assembly_name'] assembly_file = os.path.join(project_dir, assembly_name) ## Assembly creation will handle error checking on ## the format of the assembly_name ## make sure the working directory exists. if not os.path.exists(project_dir): os.mkdir(project_dir) try: ## If 1 and force then go ahead and create a new assembly if ('1' in args.steps) and args.force: data = ip.Assembly(assembly_name, cli=True) else: data = ip.load_json(assembly_file, cli=True) data._cli = True except IPyradWarningExit as _: ## if no assembly is found then go ahead and make one if '1' not in args.steps: raise IPyradWarningExit(\ " Error: You must first run step 1 on the assembly: {}"\ .format(assembly_file)) else: ## create a new assembly object data = ip.Assembly(assembly_name, cli=True) ## for entering some params... for param in parsedict: ## trap assignment of assembly_name since it is immutable. if param == "assembly_name": ## Raise error if user tried to change assembly name if parsedict[param] != data.name: data.set_params(param, parsedict[param]) else: ## all other params should be handled by set_params try: data.set_params(param, parsedict[param]) except IndexError as _: print(" Malformed params file: {}".format(args.params)) print(" Bad parameter {} - {}".format(param, parsedict[param])) sys.exit(-1) return data
def getassembly(args, parsedict): """ loads assembly or creates a new one and set its params from parsedict. Does not launch ipcluster. """ ## Creating an assembly with a full path in the name will "work" ## but it is potentially dangerous, so here we have assembly_name ## and assembly_file, name is used for creating new in cwd, file is ## used for loading existing. ## ## Be nice if the user includes the extension. project_dir = ip.core.assembly.expander(parsedict['1']) assembly_name = parsedict['0'] assembly_file = os.path.join(project_dir, assembly_name) ## Assembly creation will handle error checking on ## the format of the assembly_name ## make sure the working directory exists. if not os.path.exists(project_dir): os.mkdir(project_dir) try: ## If 1 and force then go ahead and create a new assembly if '1' in args.steps and args.force: data = ip.Assembly(assembly_name) else: data = ip.load_json(assembly_file) except IPyradWarningExit as inst: ## if no assembly is found then go ahead and make one if '1' not in args.steps: raise IPyradWarningExit(""" Error: Steps >1 ({}) requested but no current assembly found - {} """.format(args.steps, assembly_file)) else: ## create a new assembly object data = ip.Assembly(assembly_name) ## for entering some params... for param in parsedict: ## trap assignment of assembly_name since it is immutable. if param == str(0): ## only pass to set_params if user tried to change assembly_name ## it will raise an Exit error if parsedict[param] != data.name: data.set_params(param, parsedict[param]) else: ## all other params should be handled by set_params data.set_params(param, parsedict[param]) return data
def parse_params(params): """ Parse the params file args, create and return Assembly object.""" ## check that params.txt file is correctly formatted. with open(params) as paramsin: plines = paramsin.readlines() ## check header: big version changes can be distinguished by the header assert len(plines[0].split()[0]) == 6, \ "params file is not compatible with ipyrad v.{}.".format(ip.__version__) \ + "Create a new params file with: ipyrad -n" ## check length assert len(plines) > 30, "params file error. Format not recognized." ## make into a dict items = [i.split("##")[0].strip() for i in plines[1:]] keys = range(1, 30) parsedict = {str(i):j for i, j in zip(keys, items)} ## create a default Assembly object print('parsedict:\n', parsedict) data = ip.Assembly(parsedict['14']) data.set_params("datatype", parsedict['10']) ## set_params for all keys in parsedict. There may be a preferred order ## for entering some params, e.g., datatype to know if data are paired. for param in parsedict: data.set_params(param, parsedict[param]) return data
def test_assembly(data): """ Check to see if the assembly you're trying to load is concordant with the current assembly version. Basically it creates a new tmp assembly and tests whether the paramsdicts are the same. It also tests the _hackersonly dict.""" new_assembly = ip.Assembly(data.name, quiet=True) new_params = set(new_assembly.paramsdict.keys()) my_params = set(data.paramsdict.keys()) ## Find all params that are in the new paramsdict and not in the old one. params_diff = new_params.difference(my_params) result = False if params_diff: result = True ## Test hackersonly dict as well. my_hackerdict = set(data._hackersonly.keys()) new_hackerdict = set(new_assembly._hackersonly.keys()) hackerdict_diff = new_hackerdict.difference(my_hackerdict) if hackerdict_diff: result = True return result
def main(): """ main function """ ## not in ipython ip.__interactive__ = 0 header = \ "\n --------------------------------------------------"+\ "\n Analysis tools for ipyrad [v.{}]".format(ip.__version__)+\ "\n svd4tet -- fast quartet and tree inference "+\ "\n --------------------------------------------------" print(header) ## parse params file input (returns to stdout if --help or --version) args = parse_command_line() ## if JSON, load it if args.json: data = ip.load_json(args.json) data.outfiles.svdinput = data.outfiles.svdinput ## else create a tmp assembly for the seqarray else: if not args.output: raise IPyradWarningExit(" -o output_prefix required") if not args.seq: raise IPyradWarningExit(" -s sequence file required") ## create new JSON (Assembly) object data = ip.Assembly(args.output, quiet=True) data.outfiles.svdinput = args.seq data.set_params(1, "./") ## parse samples from the sequence file names = [] with iter(open(args.seq, 'r')) as infile: infile.next().strip().split() while 1: try: names.append(infile.next().split()[0]) except StopIteration: break ## store as Samples in Assembly data.samples = {name:ip.Sample(name) for name in names} ## store ipcluster info data._ipcluster["cores"] = args.cores if args.MPI: data._ipcluster["engines"] = "MPI" else: data._ipcluster["engines"] = "Local" ## launch ipcluster and register for later destruction data = ipcontroller_init(data) ## run svd4tet args = [data, args.boots, args.method, args.nquartets, args.force] data._clientwrapper(ipa.svd4tet.run, args, 45)
def getassembly(args, parsedict): """ loads assembly or creates a new one and set its params from parsedict. Does not launch ipcluster. """ working_directory = parsedict['1'] assembly_name = os.path.split(parsedict['1'])[1] assembly_file = os.path.join(working_directory, assembly_name) ## make sure the working directory exists. if not os.path.exists(working_directory): os.mkdir(working_directory) os.chdir(working_directory) ## if forcing or doing step 1 then do not load existing Assembly if args.force and '1' in args.steps: ## create a new assembly object data = ip.Assembly(assembly_name) else: ## try loading an existing one try: #print("Loading - {}".format(assembly_name)) data = ip.load.load_assembly(assembly_file, launch=False) ## if not found then create a new one except AssertionError: LOGGER.info("No current assembly found, create new - {}".\ format(assembly_file)) data = ip.Assembly(assembly_name) ## for entering some params... for param in parsedict: if parsedict[param]: try: data.set_params(param, parsedict[param]) except Exception as inst: print(inst) print("Bad parameter in the params file - param {} value {}".\ format(param, parsedict[param])) raise return data
def get_assembly(self): """ loads assembly or creates a new one and set its params from parsedict. Does not launch ipcluster. """ # Be nice if the user includes the extension. project_dir = self.parsedict['project_dir'] assembly_name = self.parsedict['assembly_name'] json_file = os.path.join(project_dir, assembly_name) if not json_file.endswith(".json"): json_file += ".json" # make sure the working directory exists. if not os.path.exists(project_dir): os.mkdir(project_dir) # Create new Assembly instead of loading if NEW if self.args.steps: # starting a new assembly if '1' in self.args.steps: if self.args.force: data = ip.Assembly(assembly_name, cli=True) else: if os.path.exists(json_file): raise IPyradError( "Assembly already exists, use force to overwrite") else: data = ip.Assembly(assembly_name, cli=True) else: data = ip.load_json(json_file, cli=True) else: data = ip.load_json(json_file, cli=True) # Update json assembly with params in paramsfile in case they changed for key, param in self.parsedict.items(): if key not in ["assembly_name"]: data.set_params(key, param) # store it. self.data = data
def update_assembly(data): """ Create a new Assembly() and convert as many of our old params to the new version as we can. Also report out any parameters that are removed and what their values are. """ print("##############################################################") print("Updating assembly to current version") ## New assembly object to update pdate from. new_assembly = ip.Assembly("update", quiet=True) ## Hackersonly dict gets automatically overwritten ## Always use the current version for params in this dict. data._hackersonly = deepcopy(new_assembly._hackersonly) new_params = set(new_assembly.paramsdict.keys()) my_params = set(data.paramsdict.keys()) ## Find all params in loaded assembly that aren't in the new assembly. ## Make a new dict that doesn't include anything in removed_params removed_params = my_params.difference(new_params) for i in removed_params: print("Removing parameter: {} = {}".format(i, data.paramsdict[i])) ## Find all params that are in the new paramsdict and not in the old one. ## If the set isn't emtpy then we create a new dictionary based on the new ## assembly parameters and populated with currently loaded assembly values. ## Conditioning on not including any removed params. Magic. added_params = new_params.difference(my_params) for i in added_params: print("Adding parameter: {} = {}".format(i, new_assembly.paramsdict[i])) print("\nPlease take note of these changes. Every effort is made to\n"\ +"ensure compatibility across versions of ipyrad. See online\n"\ +"documentation for further details about new parameters.") time.sleep(5) print("##############################################################") if added_params: for i in data.paramsdict: if i not in removed_params: new_assembly.paramsdict[i] = data.paramsdict[i] data.paramsdict = deepcopy(new_assembly.paramsdict) data.save() return data
def parse_params(args): """ Parse the params file args, create and return Assembly object.""" ## check that params.txt file is correctly formatted. try: with open(args.params) as paramsin: plines = paramsin.readlines() except IOError as _: sys.exit(" No params file found") ## check header: big version changes can be distinguished by the header legacy_version = 0 try: ## try to update the Assembly ... legacy_version = 1 if not len(plines[0].split()[0]) == 7: raise IPyradWarningExit(""" Error: file '{}' is not compatible with ipyrad v.{}. Please create and update a new params file using the -n argument. For info on which parameters have changed see the changelog: (http://ipyrad.readthedocs.io/releasenotes.html) """.format(args.params, ip.__version__)) except IndexError: raise IPyradWarningExit(""" Error: Params file should not have any empty lines at the top of the file. Verify there are no blank lines and rerun ipyrad. Offending file - {} """.format(args.params)) ## update and backup if legacy_version: #which version... #update_to_6() pass ## make into a dict. Ignore blank lines at the end of file ## Really this will ignore all blank lines items = [ i.split("##")[0].strip() for i in plines[1:] if not i.strip() == "" ] #keys = [i.split("]")[-2][-1] for i in plines[1:]] #keys = range(len(plines)-1) keys = ip.Assembly('null', quiet=True).paramsdict.keys() parsedict = {str(i): j for i, j in zip(keys, items)} return parsedict
def _flagnew(self): # Create a tmp assembly, call write_params to make default params.txt tmpassembly = ip.Assembly( self.args.new, quiet=True, cli=True, force=self.args.force, ) # write the new params file tmpassembly.write_params( "params-{}.txt".format(self.args.new), force=self.args.force, ) # print log to screen print("\n New file 'params-{}.txt' created in {}\n".format( self.args.new, os.path.realpath(os.path.curdir)))
def parse_params(self): "Parse the params file args, create and return Assembly object." # check that params.txt file is correctly formatted. if not self.args.params: raise IPyradError("\n No params file found\n") elif not os.path.exists(self.args.params): raise IPyradError("\n No params file found\n") else: with open(self.args.params) as paramsin: lines = paramsin.readlines() # get values from the params file lines vals = [i.split("##")[0].strip() for i in lines[1:] if i.strip()] # get keys in order from a tmp assembly keys = [i[1:] for i in ip.Assembly('null', quiet=True).params._keys] # store as a dict self.parsedict = {str(i): j for (i, j) in zip(keys, vals)}
def main(): """ main function """ ## turn off traceback for the CLI ip.__interactive__ = 0 ## Check for a new version on anaconda _check_version() ## parse params file input (returns to stdout if --help or --version) args = parse_command_line() ## Turn the debug output written to ipyrad_log.txt up to 11! ## Clean up the old one first, it's cleaner to do this here than ## at the end (exceptions, etc) if os.path.exists(ip.__debugflag__): os.remove(ip.__debugflag__) if args.debug: print("\n ** Enabling debug mode ** ") ip._debug_on() atexit.register(ip._debug_off) ## create new paramsfile if -n if args.new: ## Create a tmp assembly, call write_params to make default params.txt try: tmpassembly = ip.Assembly(args.new, quiet=True, cli=True) tmpassembly.write_params("params-{}.txt".format(args.new), force=args.force) except Exception as inst: print(inst) sys.exit(2) print("\n New file 'params-{}.txt' created in {}\n".\ format(args.new, os.path.realpath(os.path.curdir))) sys.exit(2) ## if params then must provide action argument with it if args.params: if not any([args.branch, args.results, args.steps]): print(""" Must provide action argument along with -p argument for params file. e.g., ipyrad -p params-test.txt -r ## shows results e.g., ipyrad -p params-test.txt -s 12 ## runs steps 1 & 2 e.g., ipyrad -p params-test.txt -b newbranch ## branch this assembly """) sys.exit(2) if not args.params: if any([args.branch, args.results, args.steps]): print(""" Must provide params file for branching, doing steps, or getting results. e.g., ipyrad -p params-test.txt -r ## shows results e.g., ipyrad -p params-test.txt -s 12 ## runs steps 1 & 2 e.g., ipyrad -p params-test.txt -b newbranch ## branch this assembly """) ## if branching, or merging do not allow steps in same command ## print spacer if any([args.branch, args.merge]): args.steps = "" print("") ## always print the header when doing steps header = \ "\n -------------------------------------------------------------"+\ "\n ipyrad [v.{}]".format(ip.__version__)+\ "\n Interactive assembly and analysis of RAD-seq data"+\ "\n -------------------------------------------------------------" ## Log the current version. End run around the LOGGER ## so it'll always print regardless of log level. with open(ip.__debugfile__, 'a') as logfile: logfile.write(header) logfile.write("\n Begin run: {}".format(time.strftime("%Y-%m-%d %H:%M"))) logfile.write("\n Using args {}".format(vars(args))) logfile.write("\n Platform info: {}".format(os.uname())) ## if merging just do the merge and exit if args.merge: print(header) merge_assemblies(args) sys.exit(1) ## if download data do it and then exit. Runs single core in CLI. if args.download: if len(args.download) == 1: downloaddir = "sra-fastqs" else: downloaddir = args.download[1] sratools_download(args.download[0], workdir=downloaddir, force=args.force) sys.exit(1) ## create new Assembly or load existing Assembly, quit if args.results elif args.params: parsedict = parse_params(args) if args.branch: branch_assembly(args, parsedict) elif args.steps: ## print header print(header) ## Only blank the log file if we're actually going to run a new ## assembly. This used to be in __init__, but had the side effect ## of occasionally blanking the log file in an undesirable fashion ## for instance if you run a long assembly and it crashes and ## then you run `-r` and it blanks the log, it's crazymaking. if os.path.exists(ip.__debugfile__): if os.path.getsize(ip.__debugfile__) > 50000000: with open(ip.__debugfile__, 'w') as clear: clear.write("file reset") ## run Assembly steps ## launch or load assembly with custom profile/pid data = getassembly(args, parsedict) ## set CLI ipcluster terms data._ipcluster["threads"] = args.threads ## if ipyclient is running (and matched profile) then use that one if args.ipcluster: ipyclient = ipp.Client(profile=args.ipcluster) data._ipcluster["cores"] = len(ipyclient) ## if not then we need to register and launch an ipcluster instance else: ## set CLI ipcluster terms ipyclient = None data._ipcluster["cores"] = args.cores if args.cores else detect_cpus() data._ipcluster["engines"] = "Local" if args.MPI: data._ipcluster["engines"] = "MPI" if not args.cores: raise IPyradWarningExit("must provide -c argument with --MPI") ## register to have a cluster-id with "ip- name" data = register_ipcluster(data) ## set to print headers data._headers = 1 ## run assembly steps steps = list(args.steps) data.run( steps=steps, force=args.force, preview=args.preview, show_cluster=1, ipyclient=ipyclient) if args.results: showstats(parsedict)
make_stats(data, raws) finally: ## cleans up chunk files and stats pickles tmpfiles = glob.glob(os.path.join(data.dirs.fastqs, "chunk*")) tmpfiles += glob.glob(os.path.join(data.dirs.fastqs, "tmp_*.gz")) tmpfiles += glob.glob(os.path.join(data.dirs.fastqs, "*.pickle")) if tmpfiles: for tmpfile in tmpfiles: os.remove(tmpfile) if __name__ == "__main__": ## run test import ipyrad as ip #from ipyrad.core.assembly import Assembly ## get current location PATH = os.path.abspath(os.path.dirname(__file__)) ## get location of test files IPATH = os.path.dirname(os.path.dirname(PATH)) DATA = os.path.join(IPATH, "tests", "test_rad") TEST = ip.Assembly("test-demultiplex") #TEST = ip.load_assembly(os.path.join(DATA, "testrad")) TEST.set_params(1, "./") TEST.set_params(2, "./tests/data/sim_rad_test_R1_.fastq.gz") TEST.set_params(3, "./tests/data/sim_rad_test_barcodes.txt") TEST.step1()
def load_json(path, quiet=False): """ Load a json serialized object and ensure it matches to the current Assembly object format """ ## load the JSON string and try with name+.json checkfor = [path+".json", path] for inpath in checkfor: inpath = inpath.replace("~", os.path.expanduser("~")) try: with open(inpath, 'r') as infile: ## uses _tup_and_byte to ensure ascii and tuples are correct fullj = json.loads(infile.read(), object_hook=_tup_and_byte) except IOError: pass ## create a new empty Assembly try: oldname = fullj["assembly"].pop("name") olddir = fullj["assembly"]["dirs"]["project"] oldpath = os.path.join(olddir, os.path.splitext(oldname)[0]+".json") null = ip.Assembly(oldname, quiet=True) except (UnboundLocalError, AttributeError) as inst: raise IPyradWarningExit(""" Could not find saved Assembly file (.json) in expected location. Checks in: [project_dir]/[assembly_name].json Checked: {} """.format(inpath)) ## print msg with shortpath if not quiet: oldpath = oldpath.replace(os.path.expanduser("~"), "~") print(" loading Assembly: {}".format(oldname)) print(" from saved path: {}".format(oldpath)) ## First get the samples. Create empty sample dict of correct length samplekeys = fullj["assembly"].pop("samples") null.samples = {name: "" for name in samplekeys} ## Next get paramsdict and use set_params to convert values back to ## the correct dtypes. Allow set_params to fail because the object will ## be subsequently updated by the params from the params file, which may ## correct any errors/incompatibilities in the old params file oldparams = fullj["assembly"].pop("paramsdict") for param, val in oldparams.iteritems(): ## a fix for backward compatibility with deprecated options if param not in ["assembly_name", "excludes", "outgroups"]: try: null.set_params(param, val) except IPyradWarningExit as inst: #null.set_params(param, "") LOGGER.warning(""" Load assembly error setting params. Not critical b/c new params file may correct the problem. Recorded here for debugging: {} """.format(inst)) ## Import the hackersonly dict. In this case we don't have the nice ## set_params so we're shooting from the hip to reset the values try: oldhackersonly = fullj["assembly"].pop("_hackersonly") for param, val in oldhackersonly.iteritems(): if val == None: null._hackersonly[param] = None else: null._hackersonly[param] = val except Exception as inst: LOGGER.warning(""" Load assembly error resetting hackersonly dict element. We will just use the default value in the current assembly.""") #Here was the param that failed: {} - {} #The error: {} #""".format(param, val, inst)) ## Check remaining attributes of Assembly and Raise warning if attributes ## do not match up between old and new objects newkeys = null.__dict__.keys() oldkeys = fullj["assembly"].keys() ## find shared keys and deprecated keys sharedkeys = set(oldkeys).intersection(set(newkeys)) lostkeys = set(oldkeys).difference(set(newkeys)) ## raise warning if there are lost/deprecated keys if lostkeys: LOGGER.warning(""" load_json found {a} keys that are unique to the older Assembly. - assembly [{b}] v.[{c}] has: {d} - current assembly is v.[{e}] """.format(a=len(lostkeys), b=oldname, c=fullj["assembly"]["_version"], d=lostkeys, e=null._version)) ## load in remaining shared Assembly attributes to null for key in sharedkeys: null.__setattr__(key, fullj["assembly"][key]) ## load in svd results if they exist try: if fullj["assembly"]["svd"]: null.__setattr__("svd", fullj["assembly"]["svd"]) null.svd = ObjDict(null.svd) except Exception: LOGGER.debug("skipping: no svd results present in old assembly") ## Now, load in the Sample objects json dicts sample_names = fullj["samples"].keys() if not sample_names: raise IPyradWarningExit(""" No samples found in saved assembly. If you are just starting a new assembly the file probably got saved erroneously, so it's safe to try removing the assembly file (e.g., rm {}.json) and restarting. If you fully completed step 1 and you see this message you should probably contact the developers. """.format(inpath)) sample_keys = fullj["samples"][sample_names[0]].keys() stats_keys = fullj["samples"][sample_names[0]]["stats"].keys() stats_dfs_keys = fullj["samples"][sample_names[0]]["stats_dfs"].keys() ind_statkeys = \ [fullj["samples"][sample_names[0]]["stats_dfs"][i].keys() \ for i in stats_dfs_keys] ind_statkeys = list(itertools.chain(*ind_statkeys)) ## check against a null sample nsamp = ip.Sample() newkeys = nsamp.__dict__.keys() newstats = nsamp.__dict__["stats"].keys() newstatdfs = nsamp.__dict__["stats_dfs"].keys() newindstats = [nsamp.__dict__["stats_dfs"][i].keys() for i in newstatdfs] newindstats = list(itertools.chain(*[i.values for i in newindstats])) ## different in attributes? diffattr = set(sample_keys).difference(newkeys) diffstats = set(stats_keys).difference(newstats) diffindstats = set(ind_statkeys).difference(newindstats) ## Raise warning if any oldstats were lost or deprecated alldiffs = diffattr.union(diffstats).union(diffindstats) if any(alldiffs): LOGGER.warning(""" load_json found {a} keys that are unique to the older Samples. - assembly [{b}] v.[{c}] has: {d} - current assembly is v.[{e}] """.format(a=len(alldiffs), b=oldname, c=fullj["assembly"]["_version"], d=alldiffs, e=null._version)) ## save stats and statsfiles to Samples for sample in null.samples: ## create a null Sample null.samples[sample] = ip.Sample() ## save stats sdat = fullj["samples"][sample]['stats'] ## Reorder the keys so they ascend by step, only include ## stats that are actually in the sample. newstats is a ## list of the new sample stat names, and stats_keys ## are the names of the stats from the json file. newstats = [x for x in newstats if x in stats_keys] null.samples[sample].stats = pd.Series(sdat).reindex(newstats) ## save stats_dfs for statskey in stats_dfs_keys: null.samples[sample].stats_dfs[statskey] = \ pd.Series(fullj["samples"][sample]["stats_dfs"][statskey])\ .reindex(nsamp.__dict__["stats_dfs"][statskey].keys()) ## save Sample files for filehandle in fullj["samples"][sample]["files"].keys(): null.samples[sample].files[filehandle] = \ fullj["samples"][sample]["files"][filehandle] ## build the Assembly object stats_dfs for statskey in stats_dfs_keys: indstat = null._build_stat(statskey) if not indstat.empty: null.stats_dfs[statskey] = indstat ## add remaning attributes to null Samples shared_keys = set(sample_keys).intersection(newkeys) shared_keys.discard("stats") shared_keys.discard("files") shared_keys.discard("stats_files") shared_keys.discard("stats_dfs") for sample in null.samples: ## set the others for key in shared_keys: null.samples[sample].__setattr__(key, fullj["samples"][sample][key]) ## ensure objects are object dicts null.dirs = ObjDict(null.dirs) null.stats_files = ObjDict(null.stats_files) null.stats_dfs = ObjDict(null.stats_dfs) null.populations = ObjDict(null.populations) null.outfiles = ObjDict(null.outfiles) return null
def main(): """ main function """ ## turn off traceback for the CLI ip.__interactive__ = 0 ## parse params file input (returns to stdout if --help or --version) args = parse_command_line() ## create new paramsfile if -n if args.new: ## Create a tmp assembly and call write_params to write out ## default params.txt file try: tmpassembly = ip.Assembly(args.new, quiet=True) tmpassembly.write_params("params-{}.txt".format(args.new), force=args.force) except Exception as inst: print(inst) sys.exit(2) print("\n New file `params-{}.txt` created in {}\n".\ format(args.new, os.path.realpath(os.path.curdir))) sys.exit(2) ## if params then must provide action argument with it if args.params: if not any([args.branch, args.results, args.steps]): print(""" Must provide action argument along with -p argument for params file. e.g., ipyrad -p params-test.txt -r ## shows results e.g., ipyrad -p params-test.txt -s 12 ## runs steps 1 & 2 """) sys.exit(2) ## if branching or info do not allow steps in same command, print spacer if any([args.branch, args.info]): args.steps = "" print("") ## always print the header when doing steps header = \ "\n --------------------------------------------------"+\ "\n ipyrad [v.{}]".format(ip.__version__)+\ "\n Interactive assembly and analysis of RADseq data"+\ "\n --------------------------------------------------" ## if info print the info and exit if not args.info == False: if args.info: ip.paramsinfo(int(args.info)) else: ip.paramsinfo() sys.exit(1) ## create new Assembly or load existing Assembly, quit if args.results elif args.params: parsedict = parse_params(args) if args.branch: branch_assembly(args, parsedict) elif args.steps: ## print header print(header) ## run Assembly steps ## launch or load assembly with custom profile/pid data = getassembly(args, parsedict) ## if cores was entered, limit cores to this number ## otherwise use all available cores. By default _ipcluster[cores] ## is set to detect_cpus in Assembly.__init__) if args.cores: data.cpus = args.cores if args.MPI: data._ipcluster["engines"] = "MPI" else: data._ipcluster["engines"] = "Local" ## launch ipcluster and register for later destruction data = ipcontroller_init(data) ## set to print headers data._headers = 1 ## run assembly steps steps = list(args.steps) data.run(steps=steps, force=args.force, preview=args.preview) if args.results: showstats(parsedict)
## update sample stats fsamplehits.update(samplehits) fbarhits.update(barhits) fmisses.update(misses) fdbars.update(dbars) statdicts = perfile, fsamplehits, fbarhits, fmisses, fdbars return statdicts if __name__ == "__main__": ## run test import ipyrad as ip #from ipyrad.core.assembly import Assembly ## get current location #PATH = os.path.abspath(os.path.dirname(__file__)) ## get location of test files #IPATH = os.path.dirname(os.path.dirname(PATH)) #DATA = os.path.join(IPATH, "tests", "test_rad") TEST = ip.Assembly("profile_s1") TEST.set_params(1, "./maintest") TEST.set_params(2, "./ipsimdata/sim_rad_test_R1_.fastq.gz") TEST.set_params(3, "./ipsimdata/sim_rad_test_barcodes.txt") print(TEST.cpus) TEST.cpus = 4 TEST.step1()