Exemplo n.º 1
0
    def sample_specific(self, done_files=[]):
        my_log = logging.getLogger('train:sample_specific')
        #     check if sample specific data is provided:
        if self.config.settings["sample_specific_dir"] == "":
            my_log.debug('no sample specific data')
            return
        if utils.dir_is_empty(self.config.settings["sample_specific_dir"]):
            my_log.debug('no sample specific data')
            return

        genomes_exclude_ss = []
        n_sequences_ss = 0
        my_log.info("Processing sample specific data (all data will be used)...")

        for e in os.listdir(self.config.settings["sample_specific_dir"]):
            # valid extension?
            if len(self.config.settings["extensions"]) != 0:
                ext = e.split(".")[-1]
                if ext == "" or ext not in self.config.settings["extensions"]:
                    continue
            organism = e.split(".", 1)[0]
            if organism == "":
                my_log.warning("Invalid sample specific file: {} skipping..".format(e))
                continue

            # map organism
            if organism in self.nodes:
                node = organism
            else:
                node = self.get_mapped_organism(organism)

            if node is None:
                my_log.info("Could not map {} on the tree".format(organism))
                continue
            elif str(node) == "1":
                my_log.info("Skipping {} due to lack of mapping".format(organism))
                continue

            seq_concat, definition = utils.get_sequence_infos(os.path.join(self.config.settings["sample_specific_dir"], e))

            for i in range(len(self.config.settings["fragment_len"])):
                fl = self.config.settings["fragment_len"][i]
                try:
                    step = self.config.settings["sample_specific_step"][i]
                except IndexError:  # shgould not happen at all
                    step = fl

                if step == 0 or step is None:
                    step = fl

                if len(seq_concat) < fl:
                    my_log.debug("No sample specific data for organism {o} at frag_len {fl}".format(o=organism, fl=fl))
                    continue

                number_frags = (len(seq_concat)-fl)/step
                sample_dir = os.path.join(self.config.settings["project_dir"], "sampled_fasta")
                fl_dir = os.path.join(sample_dir, str(fl))
                fastafile = os.path.join(fl_dir, e)

                utils.write_fragments(fl, step, seq_concat, definition, node, fastafile, range(number_frags))

            #    if self.stat is not None:      #  this is very inefficient at the moment !!!
            #        self.stat.succesfully_written(fastafile)
            #        self.stat.write_backup(self.backupdir)

            n_sequences_ss += 1

        if n_sequences_ss == 0:
            my_log.error("no data processed in SAMPLE_SPECIFIC_DIR")
        if len(genomes_exclude_ss) != 0:
            my_log.info("(excluded {} genomes from SS)".format(str(len(genomes_exclude_ss))))
        my_log.info("{} SS sequences done.".format(str(n_sequences_ss)))
Exemplo n.º 2
0
async def main(loop):
	download_timer = Timer()
	extract_timer = Timer()
	mod_converge_timer = Timer()
	apply_converge_timer = Timer()
	purge_timer = Timer()
	preproc_timer = Timer()
	postproc_timer = Timer()
	load_order_timer = Timer()
	enable_plugins_timer = Timer()

	mod_list = [
		# Install outside of Data/
		FastExit(),
		FourGBPatch(),
		OBSE(),
		ENB(),
		ENBoost(),
		MoreHeap(),
		# OBSE Plugins
		OneTweak(),
		# OBSETester(),
		MenuQue(),
		ConScribe(),
		Pluggy(),
		NifSE(),
		# Performance
		Streamline(),
		OSR(),
		# Necessary Tweaks
		ATakeAllAlsoBooks(),
		DarnifiedUI(),
		DarnifiedUIConfigAddon(),
		HarvestFlora(),
		HarvestContainers(),
		# Textures
		QTP3R(),
		GraphicImprovementProject(),
		ZiraHorseCompilationModpack(),
		RingRetexture(),
		KafeisArmoredCirclets(),
		KoldornsSewerTextures2(),
		KoldornsCaveTextures2(),
		MEAT(),
		BomretTexturePackForShiveringIslesWithUSIP(),
		# Gameplay
		RealisticLeveling(),
		HUDStatusBars(),
		UV3(),
		# Install Last
		INITweaks(),
		ArchiveInvalidationInvalidated(),
	]
	converged_paths = {}
	path_mod_owner = {}  # which mod owns which path
	for path in recurse_files(Config.VANILLA_DIR):
		converged_paths[str(path).lower()] = Config.VANILLA_DIR / path

	log.info('downloading')
	with download_timer:
		for mod in mod_list:
			async with aiohttp.ClientSession(loop=loop) as session:
				await mod.download(session)

	if False:  # stop after download?
		log.info('stopping after download')
		return

	log.info('extracting')
	with extract_timer:
		for mod in mod_list:
			await mod.extract()

	if False:  # stop after extract?
		log.info('stopping after extract')
		return

	log.info('pre-processing')
	with preproc_timer:
		for mod in mod_list:
			await mod.preprocess()

	log.info('calulcating convergance for each mod')
	with mod_converge_timer:
		for mod in mod_list:
			log.info(f'converging {mod.mod_name}')
			for source_path, dest_path in mod.modify():
				if not isinstance(dest_path, Path):
					raise Exception(f'{dest_path} is not a Path!')
				elif dest_path.is_absolute():
					raise Exception(f'{dest_path} is not absolute')
				if not isinstance(source_path, Path):
					raise Exception(f'{source_path} is not a Path!')
				elif not source_path.is_absolute():
					raise Exception(f'{source_path} is not absolute')
				elif not source_path.exists():
					raise Exception(f'{source_path} does not exist, bad modify code')
				elif not source_path.is_file():
					raise Exception(f'{source_path} is not a regular file.')

				converged_paths[str(dest_path).lower()] = source_path
				path_mod_owner[str(dest_path).lower()] = mod.mod_name

	log.info('applying convergance')
	with apply_converge_timer:
		for dest_path, source_path in converged_paths.items():
			dest_path = Config.game.root_dir / dest_path
			if not dest_path.exists() or not samefile(str(dest_path), str(source_path)):
				if dest_path.exists():
					dest_path.unlink()  # FIXME move to purged dir?
				dest_path.parent.mkdir(exist_ok=True, parents=True)
				try:
					create_hardlink(str(source_path), str(dest_path))
				except FileNotFoundError:
					raise Exception(f'failed to hard link {source_path} to {dest_path} {source_path} (or {dest_path.parent}) not found')

	log.info('purging')
	with purge_timer:
		try:
			with Config.mod_ownership_path.open('rb') as f:
				old_path_mod_owner = json.load(f)
		except FileNotFoundError:
			old_path_mod_owner = {}
		purged_root = Config.PURGED_DIR / datetime.now().isoformat().replace(':', '')
		for path in recurse_files(Config.game.root_dir):

			if (
				str(path).lower() not in converged_paths and
				not path.suffix.lower() in {'.ini', '.cfg', '.json', '.log'} and
				#TODO don't purge xml files unless they're menu files
				not path.parts[0].lower() in {'obmm', 'mopy'}
			):
				if str(path).lower() in old_path_mod_owner:
					(Config.game.root_dir / path).unlink()
				else:
					purged_path = purged_root / path
					purged_path.parent.mkdir(exist_ok=True, parents=True)
					(Config.game.root_dir / path).rename(purged_path)

		log.info('purging empty directories')
		for d in recurse_dirs(Config.game.root_dir):
			if dir_is_empty(d):
				d.rmdir()

	log.info('postprocessing')
	with postproc_timer:
		for mod in mod_list:
			await mod.postprocess()
	log.info('Done Applying Changes')

	log.info('modifying load order')
	with load_order_timer:
		boss_uninstall_string = get_regkey('HKLM', r'SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\BOSS', 'UninstallString')
		boss_install_location = Path(shlex.split(boss_uninstall_string)[0]).parent
		boss_exe_path = boss_install_location / 'boss.exe'

		proc = await asyncio.create_subprocess_exec(
			str(boss_exe_path), '-s', '-g', Config.game.BOSS_NAME,
			cwd=str(boss_install_location),
			stderr=sys.stderr,
			stdout=sys.stdout,
		)
		await proc.wait()

	log.info('enabling all .esp and .esm files')
	with enable_plugins_timer:
		PLUGINS_HEADER = textwrap.dedent('''
		# This file is used to tell Oblivion which data files to load.
		# WRITE YOUR OWN PYTHON SCRIPT TO MODIFY THIS FILE (lol)
		# Please do not modify this file by hand.
		''').strip()

		with atomic_save(str(Config.game.app_data_path / 'plugins.txt')) as f:
			with io.TextIOWrapper(f, 'ascii') as ef:
				ef.write(PLUGINS_HEADER)
				ef.write('\n')
				for esm in Config.game.root_dir.glob('Data/*.esm'):
					ef.write(esm.name)
					ef.write('\n')
				for esp in Config.game.root_dir.glob('Data/*.esp'):
					ef.write(esp.name)
					ef.write('\n')

	log.info('saving which mod owns which file')
	with atomic_save(str(Config.mod_ownership_path)) as f:
		with io.TextIOWrapper(f, 'ascii') as ef:
			json.dump(path_mod_owner, ef)

	log.info(f'download_timer = {download_timer}')
	log.info(f'extract_timer = {extract_timer}')
	log.info(f'mod_converge_timer = {mod_converge_timer}')
	log.info(f'apply_converge_timer = {apply_converge_timer}')
	log.info(f'purge_timer = {purge_timer}')
	log.info(f'preproc_timer = {preproc_timer}')
	log.info(f'postproc_timer = {postproc_timer}')
	log.info(f'load_order_timer = {load_order_timer}')
	log.info(f'enable_plugins_timer = {enable_plugins_timer}')
Exemplo n.º 3
0
    def process_ncbi(self):
        """
            download NCBI sequences,
            parse gbk files to fasta format,
            rename sequences and sort per genome
        """
        my_log = logging.getLogger('train:process_ncbi')
        if utils.dir_is_empty(self.config.settings["ncbi_processed_dir"]):
            my_log.warning("The NCBI_PROCESSED_DIR is empty, it is possible to download data from NCBI.")
            my_log.info("I can automatically download Bacterial & Archael data "
                        "(for more possibilities see INSTALL.txt).")

            if self.yes:
                answ = "y"
            else:
                my_log.info("Download sequence data from NCBI? [Y/N] (default=Y, timeout 2 minutes)")
                answ = utils.get_answer_timeout()

            if answ != "y":
                error_message = "There is no training data available, provide some and run the program again." \
                                "Read INSTALL.txt for details on how this can be done."
                my_log.critical(error_message)
                sys.exit(1)

            my_log.info("Download may take some time ...")
            os.mkdir(os.path.join(self.config.settings["project_dir"], "tmp"))

            tmp_dir = os.path.join(self.config.settings["project_dir"], 'tmp')
            data_archive = os.path.join(tmp_dir, 'all.gbk.tar.gz')

            success = os.system("wget -O {} ftp://ftp.ncbi.nih.gov/genomes/Bacteria/all.gbk.tar.gz".format(data_archive))
            if success != 0:
                my_log.critical("Error in downloading sequence data from NCBI.")
                sys.exit(1)
                # unpack data

            unpack_cmd = "tar xfz {a} -C {tmp}".format(a=data_archive, tmp=tmp_dir)
            success = os.system(unpack_cmd)

            if success != 0:
                my_log.critical("Error in unpacking the downloaded sequence data.")
                sys.exit(1)

            # process the data and create the fasta files in ncbi_dir
            process_object = process_ncbi.Process_NCBI(tmp_dir, self.config.settings["ncbi_processed_dir"])
            success = process_object.run()
            if not success:
                sys.exit(1)

            # clean the dowloaded NCBI data
            shutil.rmtree(tmp_dir)

        # get all the organism names from the files in the ncbi_dir
        # this can be used for generating generic clades
        n_sequences = 0
        files = glob.glob("{dir}{sep}*.*".format(dir=self.config.settings["ncbi_processed_dir"],
                                                 sep=os.path.sep))
        for f in files:
            ext = f.split(".")[-1]
            if len(self.config.settings["extensions"]) > 0 or \
                    (len(self.config.settings["extensions"]) == 1 and len(self.config.settings["extensions"][0]) > 0):
                        # extensions entweder [] oder [""]
                if ext is not None and ext not in self.config.settings["extensions"]:
                    continue
            if "." not in f:
                my_log.debug("Invalid file: {}..skipping".format(f))
                continue
            else:
                organism = f.split(os.path.sep)[-1].split(".")[0]

            # exclude this genome if asked to
            if organism in self.config.genomes_exclude:
                self.genomes_excluded.append(organism)
                continue

            n_sequences += 1
            self.organisms.add(organism)
            if organism not in self.organism_file_map:
                self.organism_file_map[organism] = [f]
            else:
                self.organism_file_map.append(f)

        if len(self.genomes_excluded) is not 0:
            my_log.info("excluded {} sequences.".format(str(len(self.genomes_excluded))))