def output(self): """Local target.""" dge_dir = os.path.join(self.workdir, "processes", self.method, self.kingdom, "CDS") dge_files = [ f for f in glob.glob(dge_dir + "**/*sig.csv", recursive=True) ] for file in dge_files: cond_fdr = os.path.basename(file).split("__sig.csv")[0] opaver_outfile = os.path.join(self.workdir, "processes", "opaver", self.kingdom, self.method, cond_fdr, "opaver.tsv") return LocalTarget(opaver_outfile)
def output(self): targets = {} session = current_session() i = 0 for section in session.query(OBSTag).filter(OBSTag.type == 'section'): for subsection in session.query(OBSTag).filter(OBSTag.type == 'subsection'): i += 1 if i > 1 and self.preview: break targets[(section.id, subsection.id)] = LocalTarget( 'catalog/source/{section}/{subsection}.rst'.format( section=strip_tag_id(section.id), subsection=strip_tag_id(subsection.id))) return targets
def __init__(self): conf = configuration.get_config() default = os.environ.get('SCALA_HOME', '/usr/share/scala') self.scala_home = conf.get('scalding', 'scala-home', default) default = os.environ.get('SCALDING_HOME', '/usr/share/scalding') self.scalding_home = conf.get('scalding', 'scalding-home', default) self.provided_dir = conf.get('scalding', 'scalding-provided', os.path.join(default, 'provided')) self.libjars_dir = conf.get('scalding', 'scalding-libjars', os.path.join(default, 'libjars')) self.tmp_dir = LocalTarget(is_tmp=True)
def test_format_newline(self): t = LocalTarget(self.path, luigi.format.SysNewLine) with t.open('w') as f: f.write(b'a\rb\nc\r\nd') with t.open('r') as f: b = f.read() with open(self.path, 'rb') as f: c = f.read() self.assertEqual(b'a\nb\nc\nd', b) self.assertEqual(b'a\r\nb\r\nc\r\nd', c)
def output(self): """Expected output of featureCounts.""" counts_dir = os.path.join(self.workdir, "processes", "featureCounts", self.kingdom) gff_fp = os.path.abspath(self.gff_file) features = list(set(pd.read_csv(gff_fp, sep="\t", header=None, comment='#')[2].tolist())) features = [feat for feat in features if feat in ['CDS', 'rRNA', 'tRNA', 'exon', 'gene', 'transcript']] loc_target = LocalTarget(os.path.join(counts_dir, features[-1] + "_count.tsv")) return loc_target
def output(self): """Expected output of featureCounts.""" gff_fp = os.path.abspath(self.gff) features = list( set( pd.read_csv(gff_fp, sep="\t", header=None, comment='#')[2].tolist())) features = [ feat for feat in features if feat in ['CDS', 'rRNA', 'tRNA', 'exon', 'gene', 'transcript'] ] loc_target = LocalTarget(self.out_dir + "/" + features[-1] + "_count.tsv") return loc_target
def getTarget(fileName, date, debug, awsAccessKeyId, awsSecretKey): workPath = '' if debug: workPath = os.path.join(LOCAL_TARGET, date.strftime("%Y-%m-%d")) else: workPath = os.path.join(FILE_ROOT, date.strftime("%Y-%m-%d")) filePath = os.path.join(workPath, fileName) if debug: logger.info("Debug - writing to %s", filePath) return LocalTarget(filePath) else: client = S3Client(awsAccessKeyId, awsSecretKey) return S3Target(path=filePath, client=client)
def valid_io_modes(self, *a, **kw): modes = set() t = LocalTarget(is_tmp=True) t.open('w').close() for mode in self.theoretical_io_modes(*a, **kw): try: io.FileIO(t.path, mode).close() except ValueError: pass except IOError as err: if err.errno == EEXIST: modes.add(mode) else: raise else: modes.add(mode) return modes
def test_target_compute_dict(tmpdir): # open some files files = [tmpdir.join(str(i)) for i in range(3)] tgts = [LocalTarget(str(s)) for s in files] targ = Targeted(lambda x: x, tgts) tgt_graph = {targ: []} compute = target_compute_dict(tgt_graph) assert compute[targ] == True # touch the files for f in files: f.open("w").write("") compute = target_compute_dict(tgt_graph) assert compute[targ] == False
def output(self): # Do a fuzzy check on the filenames because it's too much effort to generate the full expected file names filePattern = os.path.join(self.dirPath, self.pattern) matchingFiles = glob.glob(filePattern) if not len(matchingFiles) == 1: raise Exception( "Something went wrong, found more than one file for pattern " + self.pattern) if not os.path.isfile(matchingFiles[0]): raise Exception("Something went wrong, " + matchingFiles[0] + " is not a file") if not os.path.getsize(matchingFiles[0]) > 0: raise Exception("Something went wrong, file size is 0 for " + matchingFiles[0]) return LocalTarget(matchingFiles[0])
def test_tmp(self): t = LocalTarget(is_tmp=True) self.assertFalse(t.exists()) self.assertFalse(os.path.exists(t.path)) p = t.open('w') print('test', file=p) self.assertFalse(t.exists()) self.assertFalse(os.path.exists(t.path)) p.close() self.assertTrue(t.exists()) self.assertTrue(os.path.exists(t.path)) q = t.open('r') self.assertEqual(q.readline(), 'test\n') q.close() path = t.path del t # should remove the underlying file self.assertFalse(os.path.exists(path))
def test_open_modes(self): t = LocalTarget(is_tmp=True) print('Valid write mode:', end=' ') for mode in self.valid_write_io_modes_for_luigi(): print(mode, end=' ') p = t.open(mode) p.close() print() print('Valid read mode:', end=' ') for mode in self.valid_read_io_modes_for_luigi(): print(mode, end=' ') p = t.open(mode) p.close() print() print('Invalid mode:', end=' ') for mode in self.invalid_io_modes_for_luigi(): print(mode, end=' ') self.assertRaises(Exception, t.open, mode) print()
def output(self): file_id = "min_df-{min_df}__" \ "max_df-{max_df}__" \ "percentile-{percentile}__" \ "alpha={alpha}__" \ "random_state={random_state}".format( min_df=self.min_df, max_df=self.max_df, percentile=self.percentile, alpha=self.alpha, random_state=self.random_state ) file_id = hashlib.sha256(file_id).hexdigest() task_file = "pipeline_cross_val_score__{}.json".format(file_id) scores_path = "{}/pipeline_cross_val_scores/{}" return LocalTarget(scores_path.format(self.output_folder, task_file))
def __init__(self, path, file_type='regular', root_dir=None, format=None, **kwargs): self.is_remote = commons().is_remote if root_dir: full_path = os.path.join(root_dir, path) else: if self.is_remote: full_path = os.path.join(commons().remote_root, path) else: full_path = os.path.join(commons().local_root, path) self.file_type = file_type self.format = format if self.is_remote: host = commons().SSH_HOST port = commons().SSH_PORT kwargs['port'] = port self._target = RemoteTarget(full_path, host, format=format, **kwargs) if file_type == 'apk': # create temporary local copy self.local_path = os.path.join( tempfile.gettempdir(), 'luigi-{}-{}.apk'.format(os.path.basename(path), random.randint(0, 999999999))) self._target.get(self.local_path) else: self._target = LocalTarget(full_path, format=format, **kwargs) if self.is_remote and self.file_type == 'apk': path = self.local_path else: path = self._target.path super(ExternalFileTarget, self).__init__(path) # XXX: check if this is right
def run_task(self): """Runs the task with fake targets.""" output_archive_root = tempfile.mkdtemp() self.addCleanup(shutil.rmtree, output_archive_root) with tempfile.NamedTemporaryFile() as tmp_input_archive: with tarfile.open(mode='w:gz', fileobj=tmp_input_archive) as input_archive_file: input_archive_file.add(self.archive_root, arcname='') tmp_input_archive.seek(0) task = obfuscate.CourseContentTask( course=sentinel.ignored, output_directory=sentinel.ignored, data_directory=sentinel.ignored, auth_user_path=sentinel.ignored, auth_userprofile_path=sentinel.ignored, ) fake_input = {'data': [LocalTarget(path=tmp_input_archive.name)]} task.input = MagicMock(return_value=fake_input) output_target = FakeTarget() task.output = MagicMock(return_value=output_target) task.user_info_requirements = get_mock_user_info_requirements() reset_user_info_for_testing() task.run() with tarfile.open( mode='r:gz', fileobj=output_target.buffer) as output_archive_file: output_archive_file.extractall(output_archive_root) self.output_course_root = os.path.join( output_archive_root, get_filename_safe_course_id(self.COURSE_ID))
def output(self): return [ LocalTarget('data/external/wikipedia/parsed-wiki_SUCCESS'), LocalTarget('data/external/wikipedia/parsed-wiki/') ]
def output(self): yield LocalTarget(WIKI_DUMP_REDIRECT_PICKLE)
def output(self): return LocalTarget(WIKI_DISAMBIGUATION_PAGES)
def output(self): return LocalTarget(ALL_WIKI_REDIRECTS)
def output(self): return LocalTarget('data/external/nltk_download_SUCCESS')
def output(self): return LocalTarget(WIKI_INSTANCE_OF_PICKLE)
def output(self): return [ LocalTarget(WIKI_LOOKUP_PATH), ]
def output(self): return LocalTarget(WIKI_TITLES_PICKLE)
def output(self): import os file_location = (f'~/Temp/luigi/goalies-json.pckl') return LocalTarget(os.path.expanduser(file_location), format=Nop)
def output(self): """Check.""" return LocalTarget(os.path.abspath(self.path))
def output(self): return LocalTarget( AbstractGuesser.output_path(self.guesser_module, self.guesser_class, self.config_num, f'guesser_report_{self.fold}.pickle'))
def output(self): return LocalTarget("datasets/%s/raw" % self.dataset_name)
def output(self): return LocalTarget(WIKIDATA_CLAIMS)
def output(self): """QC output.""" out_file = self.outdir + "/" + self.sample + ".stats.txt" return LocalTarget(out_file)
def output(self): return LocalTarget("datasets/%s/cleaned/cleaned.csv" % self.dataset_name)