def __init__(self, settingsFile, analysisId, genome=None, expType=None): Analysis.__init__(self, settingsFile, analysisId=analysisId, genome=genome) if expType != None: self.type = expType self._resultsDir = None # Outside of galaxy and tmpDir struct. Same as inputFile location self._stayWithinGalaxy = self._settings.getBoolean( 'stayWithinGalaxy', False) self._galaxyInputs = { } # May be in galaxy-dist/database/files or else symlinked lib self._galaxyOutputs = {} # In galaxy-dist/database/files self._nonGalaxyOutputs = { } # In resultsDir (same as inputFiles location directory self._fileSets = { 'galaxyInput': self._galaxyInputs, 'galaxyOutput': self._galaxyOutputs, 'nonGalaxyInput': self._inputFiles, 'nonGalaxyOutput': self._nonGalaxyOutputs, 'target': self._targetOutput, 'intermediate': self._interimFiles } self._deliverToGalaxyKeys = None self.createAnalysisDir( ) # encode pipeline creates this via the manifest. self.declareLogFile()
def __init__(self, verbose=False): Analysis.__init__(self, verbose=verbose) # Settings: self.tries = 20 # number of repetitions for a certain signal height self.min_percent = 0.001 # Quantiles self.max_percent = 100 # Quantiles self.binning = 100 self.minimum_statistics = 100 self.extremaconfiguration = "1.04 / 55 / 0.97 / 45"
def __init__(self, settingsFile,analysisId,genome=None,expType=None): Analysis.__init__(self, settingsFile, analysisId=analysisId, genome=genome) if expType != None: self.type = expType self._resultsDir = None # Outside of galaxy and tmpDir struct. Same as inputFile location self._stayWithinGalaxy = self._settings.getBoolean('stayWithinGalaxy', False) self._galaxyInputs = {} # May be in galaxy-dist/database/files or else symlinked lib self._galaxyOutputs = {} # In galaxy-dist/database/files self._nonGalaxyOutputs = {} # In resultsDir (same as inputFiles location directory self._fileSets = { 'galaxyInput' : self._galaxyInputs, 'galaxyOutput': self._galaxyOutputs, 'nonGalaxyInput' : self._inputFiles, 'nonGalaxyOutput': self._nonGalaxyOutputs, 'target': self._targetOutput, 'intermediate': self._interimFiles } self._deliverToGalaxyKeys = None self.createAnalysisDir() # encode pipeline creates this via the manifest. self.declareLogFile()
def __init__(self, settingsFile, manifestFile, resume=0): manifest = Settings(manifestFile) Analysis.__init__(self, settingsFile, manifest['expName']) self.resume = resume self.name = manifest['expName'] self.dataType = manifest['dataType'] self.readType = manifest['readType'] self.replicates = [] self.json = {} if self.readType == 'single': if 'fileRep1' in manifest: self.replicates.append(1) self.registerInputFile('tagsRep1.fastq', manifest['fileRep1']) if 'fileRep2' in manifest: self.replicates.append(2) self.registerInputFile('tagsRep2.fastq', manifest['fileRep2']) elif self.readType == 'paired': if 'fileRd1Rep1' in manifest: self.replicates.append(1) self.registerInputFile('tagsRd1Rep1.fastq', manifest['fileRd1Rep1']) self.registerInputFile('tagsRd2Rep1.fastq', manifest['fileRd2Rep1']) if 'fileRd1Rep2' in manifest: self.replicates.append(2) self.registerInputFile('tagsRd1Rep2.fastq', manifest['fileRd1Rep2']) self.registerInputFile('tagsRd2Rep2.fastq', manifest['fileRd2Rep2']) self.interimDir = None self.targetDir = None self.pipeline = None if self.dataType == 'DNAse': self.pipeline = DnasePipeline(self) else: pass
def view_waveform(self, start=0, end=None, precision=50): """ cat: info desc: show the waveform of this audio args: [start: seconds/beats to begin view window. default beginning] [end: seconds/beats to end view window. -1 selects end. default end] [precision: percent of how detailed the plot should be. default 50] """ start = inpt_validate(start, 'beatsec') if (end is None) or (end == "-1"): end = self.size_samps() else: end = inpt_validate(end, 'beatsec') if end.samples_value >= self.size_samps(): end = self.size_samps() if end <= start: err_mess("End cannot be before or equal to start") return precision = inpt_validate(precision, 'pcnt', allowed=[5, 10000]) info_block("Generating waveform at {0}%...".format(precision)) anlsys = Analysis(self, start=start, end=end) frame_len = (end - start) / (precision * 2) anlsys.set_frame_lengths(frame_len) left = anlsys.arr[:, 0] right = anlsys.arr[:, 1] anlsys.plot(left, right, fill=True)
def __init__(self, analysis=None, test_campaign=None, dut=None, begin=None, end=None, averaging=None, verbose=None): Analysis.__init__(self, test_campaign if analysis is None else analysis.TCString, verbose=verbose, sub_dir='currents') # Settings self.Averaging = averaging self.TimeZone = timezone('Europe/Zurich') self.DataDir = join(self.TCDir, 'hv') # Config self.Ana = analysis self.IsCollection = hasattr(analysis, 'Runs') self.Type = self.Ana.Type if analysis is not None and self.IsCollection else 'None' self.RunSelection = RunSelector(testcampaign=self.TCString) self.RunLogs = self.RunSelection.RunInfos self.Run = self.RunSelection.Run if analysis is None else self.Ana.FirstAnalysis.Run if self.IsCollection else self.Ana.Run if self.IsCollection: self.Runs = self.Ana.Runs # required for plotting self.RunPlan = self.load_run_plan() # required for plotting self.HVConfig = self.load_parser() self.Bias = self.Ana.Bias if hasattr(self.Ana, 'Bias') else None self.Draw.ServerDir = analysis.Draw.ServerDir if analysis is not None else None # Times self.Begin, self.End = self.load_times(begin, end, dut) # DUT self.DUT = self.init_dut(dut) # HV Device Info self.Number = self.load_device_number() self.Channel = self.load_device_channel() self.Name = self.HVConfig.get('HV{}'.format(self.Number), 'name') self.Brand = remove_digits(self.Name.split('-')[0]) self.Model = self.HVConfig.get('HV{}'.format(self.Number), 'model') self.Precision = .005 if '237' in self.Name else .05 # data self.IgnoreJumps = True self.Data = self.load_data()
def onFail(self, step): """ Override Analysis.onFail() to include galaxy specific things """ #if self.dryRun: self.printPaths(log=step.log) # For posterity step.log.out('') # skip a lineline self.runCmd('ls -l ' + step.dir, dryRun=False, log=step.log) step.log.out('') retVal = Analysis.onFail(self, step) if retVal > 255: # This case has been returning 0 !!! retVal = 55 if retVal == 0: retVal = 1 # Must fail! return retVal
def onFail(self, step): """ Override Analysis.onFail() to include galaxy specific things """ #if self.dryRun: self.printPaths(log=step.log) # For posterity step.log.out('') # skip a lineline self.runCmd('ls -l ' + step.dir, dryRun=False, log=step.log) step.log.out('') retVal = Analysis.onFail(self,step) if retVal > 255: # This case has been returning 0 !!! retVal = 55 if retVal == 0: retVal = 1 # Must fail! return retVal
def createAnalysisDir(self): Analysis.createAnalysisDir(self) self.interimDir = self.dir + 'interim/' os.mkdir(self.interimDir) self.targetDir = self.dir + 'target/' os.mkdir(self.targetDir)
def onFail(self, step): self.pipeline.stop() Analysis.onFail(self, step) raise Exception('just failing')
'wmintaunu': { 'data_path': '../data/test_mc16a_wmintaunu/*/*.root', # 'data_path': '/data/atlas/HighMassDrellYan/test_mc16a/wmintaunu_*/*.root', 'cutfile_path': '../options/cutfile_EXAMPLE.txt', 'TTree_name': 'truth', 'year': '2015+2016', 'hard_cut': r'Muon $|#eta|$', 'lepton': 'tau', 'label': r'$W^-\rightarrow\tau\nu\rightarrow\mu\nu$', } } my_analysis = Analysis(datasets, analysis_label='test_analysis', force_rebuild=False, log_level=10, log_out='both', timedatelog=False, separate_loggers=False) my_analysis.print_latex_table(['wminmunu', 'wmintaunu']) # my_analysis.apply_cuts() # my_analysis.merge_datasets('wminmunu', 'wmintaunu', apply_cuts=r'Muon $|#eta|$') my_analysis.plot_hist(['wminmunu', 'wmintaunu'], 'MC_WZmu_el_eta_born', bins=(30, -5, 5), weight='truth_weight', normalise='lumi', lepton='muon', yerr='rsumw2',
def run(self): if not len(self.Runs): return info('There are no runs to convert :-)') self.multi() if self.Multi else self.auto_convert() if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('-m', action='store_true', help='turn parallel processing ON') parser.add_argument('-tc', nargs='?', default=None) parser.add_argument('s', nargs='?', default=None, help='run number where to start, default [None], = stop if no end is provided', type=int) parser.add_argument('e', nargs='?', default=None, help='run number where to stop, default [None]') parser.add_argument('-v', action='store_false', help='turn verbose OFF') parser.add_argument('-t', action='store_true', help='turn test mode ON') parser.add_argument('-pad', action='store_true', help='convert only pad runs') parser.add_argument('-pixel', action='store_true', help='convert only pixel runs') args = parser.parse_args() from src.analysis import Analysis z = AutoConvert(args.m, args.s, args.e, Analysis.find_testcampaign(args.tc), 'pad' if args.pad else 'pixel' if args.pixel else None, args.v) if not args.t: if len(z.Runs): print_banner(f'Starting {"multi" if z.Multi else "auto"} conversion for runs {z.Runs[0]} - {z.Runs[-1]}', color='green') z.run() print_banner('Finished Conversion!', color='green') else: info('There is nothing to convert :-)\n', blank_lines=1)
plot.efficiency(arange(e.size), e, make_bins(e.size, n=n)) def get_efficiencies(): return [get_raw_efficiency(i) for i in range(z.NPlanes)] if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('run') parser.add_argument('-tc', nargs='?', default=None) args = parser.parse_args() from src.analysis import Analysis tc = Analysis.find_testcampaign(args.tc) if isint(args.run): z = Run(int(args.run), tc) t = z.Tree else: rootfile = TFile(args.run) try: run = int(argv[1].split('/')[-1].strip('.root').split('00')[-1]) except (IndexError, ValueError): if argv[1].endswith('Tracks.root'): run = int(argv[1].split('/')[-1].strip('_withTracks.roottest')) elif 'Tracked' in argv[1]: run = int(argv[1].split('/')[-1].strip('.root').strip('TrackedRun')) tc = remove_letters(Path(args.run).absolute().parts[3]).replace('_', '') if 'psi' in args.run else tc else:
def __lint_dir(directory: str) -> None: ''' ディレクトリから`.md`を抽出し、それら全てを静的解析、整形します。 Args: directory (str): `.md`ファイルが存在するディレクトリ。 Raises: FileNotFoundError: `.md` file not found. ''' save_file_dir = click.prompt('save direcry.', default=directory) md_set_path = glob(os.path.join(directory, '*.md')) for md_file in md_set_path: old_file_name = os.path.splitext(os.path.basename(md_file))[0] new_file_name = click.prompt( f'save file name.(read file: {old_file_name}.md)', default='lint_' + old_file_name) analysis = Analysis(save_file_dir, md_file) analysis.check_blank_line() analysis.check_title() analysis.check_header() analysis.check_link(vaild_link=True) analysis.check_image() analysis.export_md(new_file_name) else: # pylint: disable=W0120 raise FileNotFoundError('`.md`file not found.')
def __lint_file(file_path: str) -> None: ''' ファイルを静的解析、整形します。 Args: file_path (str): 静的解析、整形する`.md`ファイル。 Raises: FileNotFounfError: It does not have the extension `.md`. ''' if os.path.splitext(os.path.basename(file_path))[1] != '.md': raise FileNotFoundError('It does not have the extension `.md`.') directory = os.path.dirname(file_path) save_file_dir = click.prompt('save direcry.', default=directory) old_file_name = os.path.splitext(os.path.basename(file_path))[0] new_file_name = click.prompt( f'save file name.(read file: {old_file_name}.md)', default='lint_' + old_file_name) analysis = Analysis(save_file_dir, file_path) analysis.check_blank_line() analysis.check_title() analysis.check_header() analysis.check_link(vaild_link=True) analysis.check_image() analysis.export_md(new_file_name)
# Open -------------------------------------------------------------------------------------------------------- if args.open: pkg = open_pkl(args.open) # set vd vd = {'pkg': 'Content in .pkl'} args.shell = True # Analyze ------------------------------------------------------------------------------------------------------ if args.analyze: from src.analysis import Analysis if args.analyze == 'model': # This analyze the training results stored with each model in the .pkl if args.data: analysis = Analysis(p_data=args.data) else: analysis = Analysis(p_data=_d_model_) analysis.use_f1 = args.use_f1 data = analysis.compile_batch_train_results() vd = {'data': 'Compiled training data (Pandas dataframe)'} if args.analyze == 'precision' and args.pred_data and args.pred_data is not 'param': # Generate a series of predictions with incremental rounding cutoff (greater precision) Analysis().step_precisions(d_out=args.out, model=m, data=m.datas[-1], predictions=res['pred'], evaluations=res['eval'])
tc=None, dut=None, has_collection=True, return_parser=True, has_verbose=True) p.add_argument('-s', '--show', action='store_true', help='activate show') p.add_argument('-ms', '--master_selection', action='store_true', help='run master selection') p.add_argument('-d', '--diamond', nargs='?', default=None, help='diamond for show runplans') args = p.parse_args() from src.analysis import Analysis z = RunSelector(Analysis.find_testcampaign(args.testcampaign), args.runplan, args.dut, args.verbose) if args.show: if args.runplan is not None: print_banner(z.TCString) z.select_runs_from_runplan(args.runplan) z.show_selected_runs() else: z.show_run_plans(diamond=args.diamond) if args.master_selection: z.master_selection()
x_tit='Column DUT', y_tit='Row DUT', canvas=c.cd(2)) # endregion DRAW # ---------------------------------------- if __name__ == '__main__': from pixel.run import PixelRun from src.converter import Converter from src.analysis import Analysis # eg. (489/490, 201610), (147, 201810) pp = init_argparser(return_parser=True) pp.add_argument('dut', nargs='?', default=None, type=int) pp.add_argument('-x', action='store_true') pp.add_argument('-y', action='store_true') pargs = pp.parse_args() this_tc = Analysis.find_testcampaign(pargs.testcampaign) zrun = PixelRun(pargs.run, testcampaign=this_tc, load_tree=False, verbose=True) z = PixAlignment(Converter(zrun), dut_plane=pargs.dut, mode='x' if pargs.x else 'y' if pargs.y else '') z.reload()
def DoSignalHeightScan(self, heights=None, hits_per_height=300000): gc.disable() starttime = datetime.today() # ROOT Logfile: # path = "MC/Performance_Results/"+str(starttime) path = "MC/Performance_Results/_" + str( self.minimum_statistics) + "_" + str( self.binning) + "_" + str(hits_per_height) + "_" os.makedirs(path) rootfile = TFile(path + '/MCPerformanceLog.root', 'RECREATE') LogTree = TTree('LogTree', 'MC Log Tree') RealSignalAmplitude = array('f', [0]) Repetition = array('i', [0]) TrueNPeaks = array('i', [0]) Ninjas = array('i', [0]) Ghosts = array('i', [0]) Minimas = array('i', [0]) RecSA_Quantiles = array('f', [0]) RecSA_MinMax = array('f', [0]) LogTree.Branch('RealSignalAmplitude', RealSignalAmplitude, 'RealSignalAmplitude/F') LogTree.Branch('Repetition', Repetition, 'Repetition/I') LogTree.Branch('TrueNPeaks', TrueNPeaks, 'TrueNPeaks/I') LogTree.Branch('Ninjas', Ninjas, 'Ninjas/I') LogTree.Branch('Ghosts', Ghosts, 'Ghosts/I') LogTree.Branch('Minimas', Minimas, 'Minimas/I') LogTree.Branch('RecSA_Quantiles', RecSA_Quantiles, 'RecSA_Quantiles/F') LogTree.Branch('RecSA_MinMax', RecSA_MinMax, 'RecSA_MinMax/F') # copy Config files: shutil.copy("Configuration/MonteCarloConfig.cfg", path + "/MonteCarloConfig.cfg") shutil.copy("Configuration/AnalysisConfig.cfg", path + "/AnalysisConfig.cfg") if heights == None: heights = [ 0.0, 0.05, 0.08, 0.1, 0.125, 0.15, 0.175, 0.2, 0.3, 0.5, 0.8, 1.0 ] # infofile: infofile = open(path + "/info.txt", "w") infofile.write("DoSignalHeightScan\n\n") infofile.write("Timestamp: " + str(starttime) + "\n\n") infofile.write("Number of Repetitions for each Amplitude: " + str(self.tries) + "\n") infofile.write("Number of different Amplitudes: " + str(len(heights)) + "\n") infofile.write("Hits per Amplitude: " + str(hits_per_height) + "\n") infofile.write("Quantiles: " + str(self.min_percent) + "/" + str(self.max_percent) + "\n") infofile.write("Binning: " + str(self.binning) + "\n") infofile.write("Minimum Statistics: " + str(self.minimum_statistics) + "\n") infofile.write("Extrema Configuration: " + self.extremaconfiguration) success_prob = [] ghost_prob = [] cycle_nr = 0 cycles = self.tries * len(heights) for height in heights: # add more statistics for each height, not just one try.. fails = 0 tot_ghosts = 0 peaks_generated = 0 for repetition in range(self.tries): cycle_nr += 1 print("\n{0}th repetition with Signal height set to: {1}\n". format(repetition, height)) run_object = MCRun(validate=False, verbose=self.verbose, run_number=364) run_object.MCAttributes['PeakHeight'] = height run_object.SetNumberOfHits(hits_per_height) print("newAnalysis = Analysis(run_object)") newAnalysis = Analysis(run_object, verbose=self.verbose) print("newAnalysis.FindMaxima()") newAnalysis.FindMaxima( binning=self.binning, minimum_bincontent=self.minimum_statistics) print("newAnalysis.FindMinima()") newAnalysis.FindMinima( binning=self.binning, minimum_bincontent=self.minimum_statistics) npeaks = newAnalysis.ExtremeAnalysis.ExtremaResults[ 'TrueNPeaks'] ninjas = newAnalysis.ExtremeAnalysis.ExtremaResults['Ninjas'] ghosts = newAnalysis.ExtremeAnalysis.ExtremaResults['Ghosts'] maxima = newAnalysis.ExtremeAnalysis.ExtremaResults[ 'FoundMaxima'] minima = newAnalysis.ExtremeAnalysis.ExtremaResults[ 'FoundMinima'] # Reconstruct Signal Amplitude: if len(maxima) * len(minima) > 0: maxbin = newAnalysis.ExtremeAnalysis.Pad.GetBinByCoordinates( *(maxima[0])) maxbin.FitLandau() minbin = newAnalysis.ExtremeAnalysis.Pad.GetBinByCoordinates( *(minima[0])) minbin.FitLandau() rec_sa_minmax = maxbin.Fit['MPV'] / minbin.Fit['MPV'] - 1. else: rec_sa_minmax = -99 q = array('d', [ 1. * self.min_percent / 100., 1. * self.max_percent / 100. ]) y = array('d', [0, 0]) newAnalysis.ExtremeAnalysis.CreateMeanSignalHistogram() newAnalysis.ExtremeAnalysis.MeanSignalHisto.GetQuantiles( 2, y, q) rec_sa_quantiles = y[1] / y[0] - 1. # Fill ROOT file: RealSignalAmplitude[0] = height Repetition[0] = repetition TrueNPeaks[0] = npeaks Ninjas[0] = ninjas Ghosts[0] = ghosts Minimas[0] = len(minima) RecSA_Quantiles[0] = rec_sa_quantiles RecSA_MinMax[0] = rec_sa_minmax LogTree.Fill() assert (npeaks > 0), 'no peak in MC created' peaks_generated += npeaks fails += ninjas tot_ghosts += ghosts # self.AddAnalysis(newAnalysis) del newAnalysis del run_object elapsed_time = datetime.today() - starttime estimated_time = elapsed_time / cycle_nr * cycles remaining_time = estimated_time - elapsed_time print("\n\nAPPROXIMATED TIME LEFT: " + str(remaining_time) + "\n") success = 1. * (peaks_generated - fails) / peaks_generated ghost = 4. * ghosts / self.tries success_prob.append(success) ghost_prob.append(ghost) print("Write ROOT-file") rootfile.Write() rootfile.Close() print("ROOT File written. Write infofile") infofile.write("\nTotal Time elapsed: " + str(datetime.today() - starttime)) infofile.close() print("infofile written") # canvas = ROOT.TCanvas('canvas', 'canvas') # HERE IT CRASHES DUE TO MEMORY PROBLEMS # canvas.cd() # graph1 = ROOT.TGraph() # graph1.SetNameTitle('graph1', 'success') # graph1.SaveAs(path+"/SuccessGraph.root") # graph2 = ROOT.TGraph() # graph2.SetNameTitle('graph2', 'ghosts') # graph2.SaveAs(path+"/GhostsGraph.root") # for i in range(len(heights)): # graph1.SetPoint(i, heights[i], success_prob[i]) # graph2.SetPoint(i, heights[i], ghost_prob[i]) # graph1.Draw('ALP*') # graph2.Draw('SAME LP*') # self.SavePlots("PerformanceResult", "png", path+"/") answer = input('Wanna crash?') ROOT.gDirectory.GetList().ls() ROOT.gROOT.GetListOfFiles().ls() if answer == 'yes': gc.collect()
SLACK_URL = args.url DATA_FILE = f"last_post_data_{args.script_name}.json" # endregion # Get data and convert accordingly cb = Coinbase(Currencies.default(), args.interval) prices = cb.price_list() cur_price = prices[0] # Get history from last runs, use it to work out what test to make history = History(DATA_FILE) # Get stats from coinbase data # If change isn't large enough, then update history and exit stats = HourData(prices, EMA_NUM_HOURS) if Analysis.ema_checks(stats, history, EMA_THRESHOLD_PERCENT, EMA_RESET_PERCENT): sys.exit(1) if not Analysis.should_post(history, stats, prices, EMA_THRESHOLD_PERCENT): sys.exit(1) logging.info("Message should be posted, generating attachment") attachments = Slack.generate_post(prices, stats, Currencies.default()) image_url = SlackImages.get_image(stats.is_diff_positive) logging.info("Posting to slack") Slack.post_to_slack(BOT_NAME, image_url, "", attachments, SLACK_URL, SLACK_CHANNEL) history.price = stats.cur_price history.rising = stats.is_diff_positive history.ema_reset = False
def main(argv = None): # hardcode defaults RESULT_DIR = '%s%sresult' % (sys.path[0], os.sep) PARAM_FILE = '%s%sparameter.conf' % (sys.path[0], os.sep) STEPS = ['preprocessing', 'annotate', 'assembly', 'analysis'] # Get the starting time starting_time = time.time() # setup Argument Parser for stdin arguments parser = argparse.ArgumentParser(add_help = True) # define arguments parser.add_argument('input', nargs = '+', action = 'store', help = 'single or paired input files in <fastq> format') parser.add_argument('--version', action = 'version', version = '%(prog)s 0.5') parser.add_argument('-v', dest = 'verbose', action = 'store_true', default = False, help = 'more detailed output (default = False)') parser.add_argument('-t', dest = 'threads', type = int, action = 'store', default = multiprocessing.cpu_count() - 1, help = 'number of threads to use (default = %d)' % (multiprocessing.cpu_count() - 1)) parser.add_argument('-p', dest = 'param', action = 'store', default = PARAM_FILE, help = 'use alternative config file (default = parameter.conf)') parser.add_argument('-s', dest = 'skip', action = 'store', default = '', choices = ['preprocessing', 'assembly', 'annotation','analysis'], help = 'skip steps in the pipeline (default = None)') parser.add_argument('-o', dest = 'output', action = 'store', default = RESULT_DIR, help = 'use alternative output folder') parser.add_argument('-a', dest = 'assembler', default = 'MetaVelvet', choices = ['metavelvet', 'flash','both'], help = 'assembling program to use (default = MetaVelvet)') parser.add_argument('-c', dest = 'annotation', default = 'both', choices = ['metacv', 'blastn', 'both'], help = 'classifier to use for annotation (default = both)') parser.add_argument('--use_contigs', dest = 'use_contigs', action = 'store_true', default = 'False', help = 'should MetaCV use assembled Reads or RAW Reads (default = RAW') parser.add_argument('--notrimming', dest = 'trim', action = 'store_false', default = True, help = 'trim and filter input reads? (default = True)') parser.add_argument('--noquality', dest = 'quality', action = 'store_false', default = True, help = 'create no quality report (default = True)') parser.add_argument('--noreport', dest = 'krona', action = 'store_false', default = True, help = 'create no pie chart with the annotated taxonomical data (default = True)') parser.add_argument('--merge', dest = 'merge_uncombined', action = 'store_true', default = False, help = 'merge concatinated reads with not concatinated (default = False)') args = parser.parse_args() # init the Pipeline RESULT_DIR = args.output if args.output else RESULT_DIR # check if param File exists if os.path.isfile(args.param): PARAM_FILE = args.param else: if os.path.isfile(PARAM_FILE): sys.stderr.write('ERROR 3: Parameter File could not be found!\n') sys.stderr.write('Use standard Parameter File:\n%s\n\n' % (PARAM_FILE)) else: raise ParamFileNotFound(args.param) # check if input exists if not all(os.path.isfile(file) for file in args.input): raise InputNotFound(to_string(args.input)) if __name__ == '__main__': # create outputdir and log folder create_outputdir(RESULT_DIR) create_outputdir(RESULT_DIR + os.sep +'log') # create the global settings object settings = General(args.threads, args.verbose, args.skip, starting_time, args.trim, args.quality, args.krona, args.use_contigs, args.merge_uncombined, args.assembler, args.annotation, 1) # setup the input, outputs and important files files = FileSettings(absolute_path(args.input), os.path.normpath(RESULT_DIR), PARAM_FILE) exe = Executables(PARAM_FILE) # get the all skipped steps skip = to_string(settings.get_skip()) try: print "hello" # START the modules of Pipeline and wait until completion if skip in 'preprocessing' and skip: skip_msg(skip) else: # init the preprocessing module pre = Preprocess(settings.get_threads(), settings.get_step_number(), settings.get_verbose(), settings.get_actual_time(), files.get_input(), files.get_logdir(), exe.get_FastQC(), settings.get_quality(), files.get_quality_dir(), parse_parameter(FastQC_Parameter(PARAM_FILE)), exe.get_TrimGalore(), settings.get_trim(), files.get_trim_dir(), parse_parameter(TrimGalore_Parameter(PARAM_FILE))) # run preprocessing functions results = pre.manage_preprocessing() # update pipeline variables with results settings.set_step_number(results[0]) if len(results) > 1: files.set_input(absolute_path(results[1])) files.set_preprocessed_output(absolute_path(results[1])) if skip in 'assembly' and skip: skip_msg(skip) else: # init the assembly module assembly = Assembly(settings.get_threads(), settings.get_step_number(), settings.get_verbose(), settings.get_actual_time(), files.get_logdir(), files.get_input(), settings.get_assembler(), exe.get_Flash(), files.get_concat_dir(), parse_parameter(FLASH_Parameter(PARAM_FILE)), settings.get_merge_uncombined(), exe.get_Velveth(), exe.get_Velvetg(), exe.get_MetaVelvet(), files.get_assembly_dir(), Velveth_Parameter(PARAM_FILE).get_kmer(PARAM_FILE), parse_parameter(Velveth_Parameter(PARAM_FILE)), parse_parameter(Velvetg_Parameter(PARAM_FILE)), parse_parameter(MetaVelvet_Parameter(PARAM_FILE))) # run assembly functions results = assembly.manage_assembly() # update pipeline variables with results settings.set_step_number(results[0]) files.set_input(absolute_path(results[1])) files.set_concatinated_output(absolute_path(results[2])) files.set_assembled_output(absolute_path(results[3])) if skip in 'annotation'and skip: skip_msg(skip) else: # init the annotation module anno = Annotation(settings.get_threads(), settings.get_step_number(), settings.get_verbose(), settings.get_actual_time(), files.get_logdir(), files.get_input(), files.get_raw(), settings.get_annotation(), settings.get_use_contigs(), exe.get_Blastn(), exe.get_Blastn_DB(), exe.get_Converter(), files.get_blastn_dir(), Blastn_Parameter(PARAM_FILE).outfmt, parse_parameter(Blastn_Parameter(PARAM_FILE)), exe.get_MetaCV(), exe.get_MetaCV_DB(), files.get_metacv_dir(), MetaCV_Parameter(PARAM_FILE).get_seq(), MetaCV_Parameter(PARAM_FILE).get_mode(), MetaCV_Parameter(PARAM_FILE).get_orf(), MetaCV_Parameter(PARAM_FILE).get_total_reads(), MetaCV_Parameter(PARAM_FILE).get_min_qual(), MetaCV_Parameter(PARAM_FILE).get_taxon(), MetaCV_Parameter(PARAM_FILE).get_name()) # run the annotation functions results = anno.manage_annotation() settings.set_step_number(results[0]) files.set_blastn_output(absolute_path(results[1])) files.set_metacv_output(absolute_path(results[2])) if skip in 'analysis' and skip: skip_msg(skip) else: # init the analysis module analysis = Analysis(settings.get_threads(), settings.get_step_number(), settings.get_verbose(), settings.get_actual_time(), files.get_logdir(), settings.get_annotation(), files.get_output(), files.get_parsed_db_dir(), files.get_annotated_db_dir(), files.get_subseted_db_dir(), files.get_krona_report_dir(), files.get_blastn_output(), files.get_metacv_output(), exe.get_Parser(), parse_parameter(blastParser_Parameter(PARAM_FILE)), blastParser_Parameter(PARAM_FILE).get_name(), exe.get_Annotate(), parse_parameter(Rannotate_Parameter(PARAM_FILE)), Rannotate_Parameter(PARAM_FILE).get_name(), Rannotate_Parameter(PARAM_FILE).get_taxon_db(), exe.get_Subset(), subsetDB_Parameter(PARAM_FILE).get_bitscore(), subsetDB_Parameter(PARAM_FILE).get_classifier(), subsetDB_Parameter(PARAM_FILE).get_rank(), subsetDB_Parameter(PARAM_FILE).get_taxon_db(), exe.get_Krona_Blast(), parse_parameter(Krona_Parameter(PARAM_FILE)), Krona_Parameter(PARAM_FILE).get_name(), settings.get_krona(), exe.get_Perl_lib()) # run the analysis function results = analysis.manage_analysis() files.set_parser_output(absolute_path(results[0])) files.set_annotated_output(absolute_path(results[1])) except KeyboardInterrupt: sys.stdout.write('\nERROR 1 : Operation cancelled by User!\n') sys.exit(1) # print ending message print_verbose('\nPIPELINE COMPLETE!\n\n') print_running_time(settings.get_actual_time())
import src.persistence.article_service as articles_service from kafka import KafkaConsumer import src.config as config import src.constants as constants from json import loads from src.producer import Producer import src.api.service as api from threading import Thread from src.analysis import Analysis TFIDF_TOPIC = 'tfidf-input' UNIQUE_TOPIC = 'unique-articles-input' analysis = Analysis() unique_consumer = KafkaConsumer( UNIQUE_TOPIC, bootstrap_servers=[ config.CONNECTION['host'] + ':' + config.CONNECTION['port'] ], auto_offset_reset='earliest', enable_auto_commit=True, group_id='articles_consumer', value_deserializer=lambda x: loads(x.decode(constants.UTF_ENCODING))) tfidf_producer = Producer(TFIDF_TOPIC) def format_message(message): message['title'] = ' '.join(message['title']) message['text'] = ' '.join(message['text']) return message
# 'datapath': '/data/atlas/HighMassDrellYan/test_mc16a/zzllnunu/*.root', # 'cutfile': 'options/jesal_cutflow/cutfile_jesal.txt', # 'TTree_name': 'nominal_Loose', # 'is_slices': False, # }, # 'zzqqll': { # 'datapath': '/data/atlas/HighMassDrellYan/test_mc16a/zzqqll/*.root', # 'cutfile': 'options/jesal_cutflow/cutfile_jesal.txt', # 'TTree_name': 'nominal_Loose', # 'is_slices': False, # }, } my_analysis = Analysis(datasets, analysis_label='jesal_cutflow', force_rebuild=False, log_level=10, log_out='console') my_analysis.plot_1d(x='mu_mt', bins=(50, 120, 4000), title=r'$W\rightarrow\tau\nu$ (13 TeV)', scaling='xs', log_y=True) # pipeline # my_analysis.plot_mass_slices(ds_name='wmintaunu_slices', xvar='MC_WZ_m', # inclusive_dataset='wmintaunu_inclusive', logx=True, to_pkl=True) # my_analysis.plot_mass_slices(ds_name='wplustaunu_slices', xvar='MC_WZ_m', # inclusive_dataset='wplustaunu_inclusive', logx=True, to_pkl=True) # my_analysis.plot_mass_slices(ds_name='wminmunu_slices', xvar='MC_WZ_m', # inclusive_dataset='wminmunu_inclusive', logx=True, to_pkl=True)
'lepton': 'muon', 'label': r'$W^+\rightarrow\mu\nu$', }, 'wplustaunu': { 'data_path': '/data/atlas/HighMassDrellYan/test_mc16a/wplustaunu/*.root', 'cutfile_path': '../options/jesal_cutflow/DY_peak.txt', 'lepton': 'tau', 'label': r'$W^+\rightarrow\tau\nu\rightarrow\mu\nu$', } } analysis = Analysis(datasets, 'mutau_compare', log_level=10, log_out='both', timedatelog=False, year='2015+2016', force_rebuild=False, TTree_name='nominal_Loose') analysis.merge_datasets("wminmunu", "wminmunu_hm", verify=True) analysis.merge_datasets("wmintaunu", "wmintaunu_hm", verify=True) analysis.merge_datasets("wplusmunu", "wplusmunu_hm", verify=True) analysis.merge_datasets("wplustaunu", "wplustaunu_hm", verify=True) # normalised analysis.plot_hist(['wminmunu', 'wmintaunu'], 'met_met', weight='reco_weight', title='reco 139fb$^{-1}$', bins=(30, 150, 5000),
# Open -------------------------------------------------------------------------------------------------------- if args.open: pkg = open_pkl(args.open) # set vd vd = {'pkg': 'Content in .pkl'} args.shell = True # Analyze ------------------------------------------------------------------------------------------------------ if args.analyze: from src.analysis import Analysis if args.analyze == 'model': # This analyze the training results stored with each model in the .pkl if args.data: analysis = Analysis(p_data=args.data) else: analysis = Analysis(p_data=_d_model_) analysis.use_f1 = args.use_f1 data = analysis.compile_batch_train_results() vd = {'data': 'Compiled training data (Pandas dataframe)'} # Enable interaction \______________________________________________________________________________________________ if args.shell: interact(var_desc=vd, local=locals()) # For testing new code \____________________________________________________________________________________________ if args.test: d = Dummy().init_networks() print(' Generated pure network: %s' %
out=d.joinpath(step), geo=self.toml_name(self.Steps[i - 1]) if i else None, section=step, cfg='align') else: warning('geo file already exists!') print_elapsed_time(t) def recon(self, raw=False): """ step 3: based on the alignment generate the tracks with proteus. """ self.Out.parent.mkdir(exist_ok=True) self.run('pt-recon', out=self.Out, cfg=None if raw else self.toml_name()) # endregion RUN # ---------------------------------------- if __name__ == '__main__': from src.analysis import Analysis, Dir a = Analysis() sdir = Path(a.Config.get('SOFTWARE', 'dir')).expanduser().joinpath( a.Config.get('SOFTWARE', 'proteus')) f_ = a.BeamTest.Path.joinpath('data', f'run{11:06d}.root') z = Proteus(sdir, a.BeamTest.Path.joinpath('proteus'), Dir.joinpath('proteus'), f_, a.Config.getint('align', 'max events'), a.Config.getint('align', 'skip events'))
}, 'wplustaunu': { 'data_path': '/data/atlas/HighMassDrellYan/mc16a/wplustaunu/*.root', 'cutfile_path': '../options/joanna_cutflow/DY_peak.txt', 'lepton': 'tau', 'label': r'$W^+\rightarrow\tau\nu\rightarrow\mu\nu$', } } analysis = Analysis( datasets, 'mutau_compare_full', data_dir='/data/keanu/framework_outputs/mutau_compare_full/', log_level=10, log_out='both', timedatelog=True, year='2015+2016', force_rebuild=False, TTree_name='truth', hard_cut='M_W') analysis.print_latex_table() analysis.merge_datasets("wminmunu", "wminmunu_hm") analysis.merge_datasets("wmintaunu", "wmintaunu_hm") analysis.merge_datasets("wplusmunu", "wplusmunu_hm") analysis.merge_datasets("wplustaunu", "wplustaunu_hm") # ========================= # ===== TRUTH - UNCUT =====