def make_post_clone(git_local, repo_name, remote_base): local_go = jpth(git_local, repo_name, 'ttl/external/go.owl') if repo_name == 'NIF-Ontology' and not os.path.exists(local_go): remote_go = jpth(remote_base, 'ttl/external/go.owl') def post_clone(): print('Retrieving go.owl since it is not in the repo.') os.system('wget -O' + local_go + ' ' + remote_go) else: post_clone = lambda: None return post_clone
def locate_config_file(location_spec, git_local): dflt = defaults['--scigraph-config-folder'] if location_spec.startswith(dflt): this_path = os.path.realpath(__file__) #print(this_path) test = jpth(os.path.dirname(this_path), '..', '.git') if not os.path.exists(test): base = jpth(git_local, 'pyontutils', 'pyontutils', 'some_file.wat') else: base = this_path location_spec = refile(base, location_spec) elif location_spec.startswith('~'): location_spec = os.path.expanduser(location_spec) location_spec = os.path.realpath(location_spec) #print('Loading config from', location_spec) return location_spec
def build_services_config(self): if self.services_config is None: raise self.MissingTemplateError( f'You have not provided a services config!') services_config_template = self.locate_config_template( self.services_config) curies_location = self.curies curies = getCuries(curies_location) with open(services_config_template, 'rt') as f: services_config = yaml.safe_load(f) services_config['graphConfiguration']['curies'] = curies if self.graph_folder != combined_defaults['--graph-folder']: services_config['graphConfiguration'][ 'location'] = self.graph_folder else: self.graph_folder = services_config['graphConfiguration'][ 'location'] port = services_config['server']['connector']['port'] url = services_config['serviceMetadata']['preview']['url'] services_config['serviceMetadata']['preview']['url'] = url.format( HOSTNAME=self.services_host, PORT=port) url = services_config['serviceMetadata']['view']['url'] services_config['serviceMetadata']['view']['url'] = url.format( HOSTNAME=self.services_host, PORT=port) #print(self.graph_folder) services_config_path = jpth(self.zip_location, self.services_config) # save loc p = Path(services_config_path) if not p.parent.exists(): p.parent.mkdir(parents=True) with open(services_config_path, 'wt') as f: yaml.dump(services_config, f, default_flow_style=False)
def remote_graph(self, commands_only=False): dependencies = self.deploy_graph(), #if self.graph_folder == ontload_defaults['--graph-folder']: services_config_file = jpth( self.services_folder, 'services.yaml') # FIXME param services.yaml # DONT TRUST THEIR LIES READ IT FROM THE DISK use_python = ( "import sys\\n" "import yaml\\n" "with open(\\\"$F\\\", \\\"rt\\\") as f:\\n" " sys.stdout.write(yaml.safe_load(f)[\\\"graphConfiguration\\\"][\\\"location\\\"])" ) get_graph_folder = f'$(F={services_config_file}; echo -e "{use_python}" | python)' commands = self.runOnServices( f'export GRAPH_FOLDER={get_graph_folder}', 'export GRAPH_PARENT_FOLDER=$(dirname $GRAPH_FOLDER)', 'unzip -q NIF-Ontology-*-graph-*.zip', 'export GRAPH_NAME=$(echo NIF-Ontology-*-graph-*/)', f'sudo chown -R {self.services_user}:{self.services_user} $GRAPH_NAME', 'mv $GRAPH_NAME $GRAPH_PARENT_FOLDER/', 'sudo systemctl stop scigraph-services', 'unlink $GRAPH_FOLDER', 'ln -sT $GRAPH_PARENT_FOLDER/$GRAPH_NAME $GRAPH_FOLDER', 'sudo systemctl start scigraph-services', defer_shell_expansion=True) #oper=AND) return self.runOnExecutor(*dependencies, commands, oper=AND)
def _config_path(self, config): """ Implements the rule that filenames only for configs are assumed to live in --scigraph-config-folder""" if '/' not in config: return jpth(self.scigraph_config_folder, config) else: return config
def build_config(self): if self.check_built: configs = (self.services_config, self.start_script, self.stop_script, self.systemd_config, self.java_config) if not all( os.path.exists(jpth(self.zip_location, c)) for c in configs): print('The configs have not been built.') raise NotBuiltError('The configs have not been built.') else: print(self.zip_location) return self.build_services_config() setVars, build = self._config_helper() # This reminder that something is a bit weird with using templates, # we would rather just let the files be the config # themselves, but then one of them would have to be 'wrong'... hrm # variables (names should match {variables} in the templates) setVars( 'start_template', self.start_script, ('services_user', self.services_user), ('java_config_path', jpth(self.etc, self.java_config)), ('services_jar_path', jpth(self.services_folder, self.services_jar)), ('services_config_path', jpth(self.services_folder, self.services_config)), ('services_log', jpth(self.services_log_loc, self.services_log))) setVars('stop_template', self.stop_script, ('services_user', self.services_user)) setVars('systemd_config_template', self.systemd_config, ('path_to_start_script', jpth(self.services_folder, self.start_script)), ('path_to_stop_script', jpth(self.services_folder, self.stop_script))) # terror of terrors... this is not going in by default # why was this even enabled?! debug_jmx = '\n' + '\n'.join( ('-Dcom.sun.management.jmxremote.port=8082', '-Dcom.sun.management.jmxremote.authenticate=false', '-Dcom.sun.management.jmxremote.ssl=false')) setVars( 'java_template', self.java_config, ('heap_dump_path', jpth(self.services_log_loc, self.heap_dump)), ('services_host', self.services_host), ('garbage_collection_log', jpth(self.services_log_loc, self.garbage_collection_log)), ('debug_jmx', '')) # debug_jmx) build()
def loadall(git_local, repo_name, local=False, dobig=False): memoryCheck(2665488384) local_base = jpth(git_local, repo_name) lb_ttl = os.path.realpath(jpth(local_base, 'ttl')) #match = (rdflib.term.URIRef('http://purl.org/dc/elements/1.1/member'), # iao.owl #rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), #rdflib.term.URIRef('http://www.w3.org/2002/07/owl#AnnotationProperty')) done = [] filenames = [ f for g in ('*', '*/*', '*/*/*') for f in glob(lb_ttl + '/' + g + '.ttl') ] graph = rdflib.Graph() for f in filenames: print(f) done.append(os.path.basename(f)) graph.parse(f, format='turtle') #if match in graph: #raise BaseException('Evil file found %s' % f) def repeat( dobig=dobig): # we don't really know when to stop, so just adjust for s, o in graph.subject_objects(owl.imports): if os.path.basename(o) not in done and o not in done: #if (o, rdf.type, owl.Ontology) not in graph: print(o) done.append(o) ext = os.path.splitext(o)[1] fmt = 'turtle' if ext == '.ttl' else 'xml' if noneMembers(o, *bigleaves) or dobig: graph.parse(o, format=fmt) #if match in graph: #raise BaseException('Evil file found %s' % o) #if local: #repeat(False) #else: if not local: for i in range(10): repeat(True) return graph
def make_folder_zip(wild=False): folder = folder_name(scigraph_commit, wild) graph_path = jpth(zip_location, folder) zip_path = graph_path + '.zip' if wild: return graph_path, zip_path zip_name = os.path.basename(zip_path) zip_dir = os.path.dirname(zip_path) zip_command = ' '.join( ('cd', zip_dir, ';', 'zip -r', zip_name, folder)) return graph_path, zip_path, zip_command
def oneshots_services(self, commands_only=False): java_config_path = jpth(self.etc, self.java_config) return self.runOnServices( f'sudo mkdir {self.services_folder}', f'sudo chown {self.services_user}:{self.services_user} {self.services_folder}', f'sudo mkdir {self.services_log_loc}', f'sudo chown {self.services_user}:{self.services_user} {self.services_log_loc}', f'sudo touch {java_config_path}', f'sudo chown {self.services_user}:{self.services_user} {java_config_path}', f'sudo mkdir -p {self.graph_folder}', f'sudo chown {self.services_user}:{self.services_user} {self.graph_folder}', oper=AND)
def build(templates=_templates): # format and save them as their real selves! for template_file, kwargs, config_file in templates.values(): with open(template_file, 'rt') as f: template = f.read() config = template.format(**kwargs) #print(config_file) config_path = jpth(self.zip_location, config_file) #print(config_path) with open(config_path, 'wt') as f: f.write(config) #if config_path.endswith('.sh'): # scpeeeee #os.chmod(config_path, 0o744) print(self.zip_location) # sent back over ssh
def build_services_config(self): services_config_template = self.locate_config_template( self.services_config) curies_location = self.locate_config(self.curies) curies, _ = getCuries(curies_location) with open(services_config_template, 'rt') as f: services_config = yaml.load(f) services_config['graphConfiguration']['curies'] = curies if self.graph_folder != combined_defaults['--graph-folder']: services_config['graphConfiguration'][ 'location'] = self.graph_folder else: self.graph_folder = services_config['graphConfiguration'][ 'location'] #print(self.graph_folder) services_config_path = jpth(self.zip_location, self.services_config) # save loc with open(services_config_path, 'wt') as f: yaml.dump(services_config, f, default_flow_style=False)
def _set_up_repo_state(local_base, git_remote, org, git_local, repo_name, branch, commit, post_clone): git_base = jpth(git_remote, org, repo_name) if not os.path.exists(local_base): repo = Repo.clone_from(git_base + '.git', local_base) post_clone() # FIXME if this does not complete we need to warn or something, it causes errors else: repo = Repo(local_base) nob = repo.active_branch try: nab = getBranch(repo, branch) nab.checkout() except ValueError: # usually indicates a remote branch repo.git.checkout(branch) nab = repo.active_branch repo.remote().pull() # make sure we are up to date if commit != 'HEAD': repo.git.checkout(commit) return repo, nob
def deploy_scp(local_path, remote_spec): basename = os.path.basename(local_path) if remote_spec == 'user@localhost:/tmp/': print(f'Default so not scping {local_path}') else: ssh_target, remote_path = remote_spec.split(':', 1) # XXX bad things? remote_folder = os.path.dirname(remote_path) remote_latest = jpth(remote_folder, 'LATEST') if 'localhost' in remote_spec: if '~' in remote_path: remote_path = os.path.expanduser(remote_path) remote_latest = os.path.expanduser(remote_latest) remote_spec = remote_path copy_command = 'cp' update_latest = f'echo {basename} > {remote_latest}' else: copy_command = 'scp' update_latest = f'ssh {ssh_target} "echo {basename} > {remote_latest}"' command = f'{copy_command} {local_path} {remote_spec}' print(command) print(update_latest)
def make_graphload_config(graphload_config, graph_path, remote_base, local_base, zip_location, config_path=None): # config graphload.yaml from template graphload_config_template = graphload_config + '.template' with open(graphload_config_template, 'rt') as f: config = yaml.safe_load(f) config['graphConfiguration']['location'] = graph_path config['ontologies'] = [{ k: v.replace(remote_base, local_base) if k == 'url' else v for k, v in ont.items() } for ont in config['ontologies']] if config_path is None: config_path = jpth(zip_location, 'graphload-' + TODAY() + '.yaml') with open(config_path, 'wt') as f: yaml.dump(config, f, default_flow_style=False) ontologies = [ont['url'] for ont in config['ontologies']] return config, config_path, ontologies
def scigraph_build(zip_location, git_remote, org, git_local, branch, commit, clean=False, check_built=False, cleanup_later=False): COMMIT_LOG = 'last-built-commit.log' repo_name = 'SciGraph' remote = jpth(git_remote, org, repo_name) local = jpth(git_local, repo_name) commit_log_path = jpth(local, COMMIT_LOG) load_base = ('cd {}; '.format(jpth(local, 'SciGraph-core')) + 'mvn exec:java ' '-Dexec.mainClass="io.scigraph.owlapi.loader.BatchOwlLoader" ' '-Dexec.args="-c {config_path}"') if not os.path.exists(local): repo = Repo.clone_from(remote + '.git', local) else: repo = Repo(local) if not os.path.exists(commit_log_path): last_commit = None else: with open(commit_log_path, 'rt') as f: last_commit = f.read().strip() sob = repo.active_branch try: sab = getBranch(repo, branch) sab.checkout() except ValueError: # usually indicates a remote branch repo.git.checkout(branch) sab = repo.active_branch repo.remote().pull() if commit != 'HEAD': repo.git.checkout(commit) scigraph_commit = repo.head.object.hexsha def zip_name(wild=False): return (repo_name + '-' + branch + '-services' + '-' + ('*' if wild else TODAY()) + '-' + scigraph_commit[:COMMIT_HASH_HEAD_LEN] + '.zip') def reset_state(original_branch=sob): original_branch.checkout() with execute_regardless( reset_state, only_exception=cleanup_later ): # FIXME this fails when we need to load the graph if we start on master :/ # main if scigraph_commit != last_commit or clean: print('SciGraph not built at commit', commit, 'last built at', last_commit) build_command = ('cd ' + local + '; mvn clean -DskipTests -DskipITs install' '; cd SciGraph-services' '; mvn -DskipTests -DskipITs package') if check_built: print('SciGraph has not been built.') raise NotBuiltError('SciGraph has not been built.') out = os.system(build_command) print(out) if out: scigraph_commit = 'FAILURE' with open(commit_log_path, 'wt') as f: f.write(scigraph_commit) else: print('SciGraph already built at commit', scigraph_commit) wildcard = jpth(zip_location, zip_name(wild=True)) try: services_zip = glob(wildcard)[ 0] # this will error if the zip was moved return scigraph_commit, load_base, services_zip, reset_state except IndexError: pass # we need to copy the zip out again # services zip zip_filename = 'scigraph-services-*-SNAPSHOT.zip' services_zip_temp = glob( jpth(local, 'SciGraph-services', 'target', zip_filename))[0] services_zip = jpth(zip_location, zip_name()) shutil.copy(services_zip_temp, services_zip) return scigraph_commit, load_base, services_zip, reset_state
def __init__(self, zip_location, git_remote, org, git_local, repo_name, branch, commit, remote_base, load_base, graphload_config, patch_config, patch, scigraph_commit, post_clone=lambda: None, check_built=False): local_base = jpth(git_local, repo_name) git_base = jpth(git_remote, org, repo_name) if not os.path.exists(local_base): repo = Repo.clone_from(git_base + '.git', local_base) post_clone( ) # FIXME if this does not complete we need to warn or something, it causes errors else: repo = Repo(local_base) nob = repo.active_branch try: nab = getBranch(repo, branch) nab.checkout() except ValueError: # usually indicates a remote branch repo.git.checkout(branch) nab = repo.active_branch repo.remote().pull() # make sure we are up to date if commit != 'HEAD': repo.git.checkout(commit) # TODO consider dumping metadata in a file in the folder too? def folder_name(scigraph_commit, wild=False): ontology_commit = repo.head.object.hexsha[:COMMIT_HASH_HEAD_LEN] return (repo_name + '-' + branch + '-graph' + '-' + ('*' if wild else TODAY()) + '-' + scigraph_commit[:COMMIT_HASH_HEAD_LEN] + '-' + ontology_commit) def make_folder_zip(wild=False): folder = folder_name(scigraph_commit, wild) graph_path = jpth(zip_location, folder) zip_path = graph_path + '.zip' if wild: return graph_path, zip_path zip_name = os.path.basename(zip_path) zip_dir = os.path.dirname(zip_path) zip_command = ' '.join( ('cd', zip_dir, ';', 'zip -r', zip_name, folder)) return graph_path, zip_path, zip_command graph_path, zip_path, zip_command = make_folder_zip() wild_graph_path, wild_zip_path = make_folder_zip(wild=True) (config, config_path, ontologies) = self.make_graphload_config(graphload_config, graph_path, remote_base, local_base, zip_location) load_command = load_base.format( config_path=config_path) # 'exit 1' to test print(load_command) # replace raw github imports with ontology.neuinfor iris to simplify import chain fix_imports = "find " + local_base + " -name '*.ttl' -exec sed -i 's/<http.\+\/ttl\//<http:\/\/ontology.neuinfo.org\/NIF\/ttl\//' {} \;" os.system(fix_imports) def reset_state(original_branch=nob): repo.git.checkout('--', local_base) original_branch.checkout() with execute_regardless( reset_state ): # FIXME start this immediately after we obtain nob? # main if patch: # FIXME TODO XXX does scigraph load from the catalog!??!?? # because it seems like doid loads correctly without using local_versions # which would be cool, if confusing local_versions = tuple(do_patch(patch_config, local_base)) else: local_versions = tuple() itrips = local_imports( remote_base, local_base, ontologies, local_versions=local_versions, dobig=True) # SciGraph doesn't support catalog.xml catalog = make_catalog(itrips) with open(Path(local_base, 'catalog.xml'), 'wt') as f: f.write(catalog) maybe_zip_path = glob(wild_zip_path) if not maybe_zip_path: if check_built: print('The graph has not been loaded.') raise NotBuiltError('The graph has not been loaded.') failure = os.system(load_command) if failure: if os.path.exists(graph_path): shutil.rmtree(graph_path) else: os.rename( config_path, # save the config for eaiser debugging jpth(graph_path, os.path.basename(config_path))) failure = os.system(zip_command) # graphload zip else: zip_path = maybe_zip_path[0] # this way we get the actual date print('Graph already loaded at', zip_path) # this needs to be run when the branch is checked out # FIXME might be worth adding this to the load config? self.ontologies = [ get_iri(load_header(rec['url'])) for rec in config['ontologies'] ] self.zip_path = zip_path self.itrips = itrips self.config = config
def run(args): # modes graph = args['graph'] scigraph = args['scigraph'] imports = args['imports'] chain = args['chain'] extra = args['extra'] # required repo_name = args['<repo>'] remote_base = args['<remote_base>'] ontologies = args['<ontologies>'] # options git_remote = args['--git-remote'] git_local = args['--git-local'] zip_location = args['--zip-location'] scigraph_config_folder = args['--scigraph-config-folder'] graphload_config = args['--graphload-config'] org = args['--org'] branch = args['--branch'] commit = args['--commit'] scp = args['--scp-loc'] sorg = args['--scigraph-org'] sbranch = args['--scigraph-branch'] scommit = args['--scigraph-commit'] sscp = args['--scigraph-scp-loc'] curies_location = args['--curies'] check_built = args['--check-built'] debug = args['--debug'] log = args['--logfile'] # TODO # post parse mods if remote_base == 'NIF': remote_base = 'http://ontology.neuinfo.org/NIF' if '~' in git_local: git_local = os.path.expanduser(git_local) if '/' not in graphload_config: graphload_config = jpth(scigraph_config_folder, graphload_config) if '/' not in curies_location: curies_location = jpth(scigraph_config_folder, curies_location) graphload_config = locate_config_file(graphload_config, git_local) curies_location = locate_config_file(curies_location, git_local) curies, curie_prefixes = getCuries(curies_location) itrips = None if repo_name is not None: local_base = jpth(git_local, repo_name) if graph: (scigraph_commit, load_base, services_zip, scigraph_reset_state) = scigraph_build(zip_location, git_remote, sorg, git_local, sbranch, scommit, check_built=check_built, cleanup_later=True) with execute_regardless(scigraph_reset_state): graph_zip, itrips = repro_loader(zip_location, git_remote, org, git_local, repo_name, branch, commit, remote_base, load_base, graphload_config, scigraph_commit, check_built=check_built) if not check_built: deploy_scp(services_zip, sscp) deploy_scp(graph_zip, scp) print(services_zip) print(graph_zip) if '--local' in args: return elif scigraph: (scigraph_commit, load_base, services_zip, _) = scigraph_build(zip_location, git_remote, sorg, git_local, sbranch, scommit, check_built=check_built) if not check_built: deploy_scp(services_zip, sscp) print(services_zip) if '--local' in args: return elif imports: # TODO mismatch between import name and file name needs a better fix itrips = local_imports(remote_base, local_base, ontologies) elif chain: itrips = local_imports(remote_base, local_base, ontologies, readonly=True) elif extra: graph = loadall(git_local, repo_name) mg, ng_ = normalize_prefixes(graph, curies) ng_.add_known_namespaces( 'NIFRID') # not officially in the curies yet... for_burak(ng_) debug = True else: raise BaseException('How did we possibly get here docopt?') if itrips: import_graph = rdflib.Graph() [import_graph.add(t) for t in itrips] tree, extra = import_tree(import_graph) with open( jpth( zip_location, '{repo_name}-import-closure.html'.format( repo_name=repo_name)), 'wt') as f: f.write(extra.html.replace('NIFTTL:', '')) # much more readable if debug: embed()
def scigraph_build(zip_location, git_remote, org, git_local, branch, commit, clean=False, check_built=False, cleanup_later=False, quiet=False): COMMIT_LOG = 'last-built-commit.log' repo_name = 'SciGraph' remote = jpth(git_remote, org, repo_name) local = jpth(git_local, repo_name) commit_log_path = jpth(local, COMMIT_LOG) if not os.path.exists(local): repo = Repo.clone_from(remote + '.git', local) elif not Path(local, '.git').exists(): repo = Repo.clone_from(remote + '.git', local) else: repo = Repo(local) if not os.path.exists(commit_log_path): last_commit = None else: with open(commit_log_path, 'rt') as f: last_commit = f.read().strip() sob = repo.active_branch try: sab = getBranch(repo, branch) sab.checkout() except ValueError: # usually indicates a remote branch repo.git.checkout(branch) sab = repo.active_branch repo.remote().pull() if commit != 'HEAD': repo.git.checkout(commit) scigraph_commit = repo.head.object.hexsha scigraph_commit_short = scigraph_commit[:COMMIT_HASH_HEAD_LEN] bin_location = zip_location / 'bin' os.environ['PATH'] = bin_location.as_posix() + ':' + os.environ.get( 'PATH', '') if not bin_location.exists(): bin_location.mkdir() # hack to make the scigraph-load we are about to create available as a command # so that it matches the usual scigraph-load behavior def zip_name(wild=False): return (repo_name + '-' + branch + '-services' + '-' + ('*' if wild else TODAY()) + '-' + scigraph_commit_short + '.zip') def reset_state(original_branch=sob): original_branch.checkout() with execute_regardless( reset_state, only_exception=cleanup_later ): # FIXME this fails when we need to load the graph if we start on master :/ # main if scigraph_commit != last_commit or clean: print('SciGraph not built at commit', commit, 'last built at', last_commit) quiet = '--quiet ' if quiet else '' build_command = ( 'cd ' + local + f';export HASH={scigraph_commit_short}' ';sed -i "/<name>SciGraph<\/name>/{N;s/<version>.\+<\/version>/<version>${HASH}<\/version>/}" pom.xml' ';sed -i "/<artifactId>scigraph<\/artifactId>/{N;s/<version>.\+<\/version>/<version>${HASH}<\/version>/}" SciGraph-analysis/pom.xml' ';sed -i "/<groupId>io.scigraph<\/groupId>/{N;s/<version>.\+<\/version>/<version>${HASH}<\/version>/}" SciGraph-core/pom.xml' ';sed -i "/<artifactId>scigraph<\/artifactId>/{N;s/<version>.\+<\/version>/<version>${HASH}<\/version>/}" SciGraph-entity/pom.xml' ';sed -i "/<groupId>io.scigraph<\/groupId>/{N;s/<version>.\+<\/version>/<version>${HASH}<\/version>/}" SciGraph-services/pom.xml' f'; mvn {quiet}clean -DskipTests -DskipITs install' '; cd SciGraph-services' f'; mvn {quiet}-DskipTests -DskipITs package') if check_built: print('SciGraph has not been built.') raise NotBuiltError('SciGraph has not been built.') out = os.system(build_command) print(out) if out: scigraph_commit = 'FAILURE' with open(commit_log_path, 'wt') as f: f.write(scigraph_commit) else: print('SciGraph already built at commit', scigraph_commit) wildcard = jpth(zip_location, zip_name(wild=True)) try: services_zip = glob(wildcard)[ 0] # this will error if the zip was moved return scigraph_commit, services_zip, reset_state except IndexError: pass # we need to copy the zip out again # services zip zip_filename = f'scigraph-services-{scigraph_commit_short}.zip' services_zip_temp = Path(local, 'SciGraph-services', 'target', zip_filename) services_zip = jpth(zip_location, zip_name()) shutil.copy(services_zip_temp, services_zip) core_jar = Path( local, 'SciGraph-core', 'target', f'scigraph-core-{scigraph_commit_short}-jar-with-dependencies.jar') scigraph_load = f'''#!/usr/bin/env sh /usr/bin/java \\ -cp "{core_jar.as_posix()}" \\ io.scigraph.owlapi.loader.BatchOwlLoader $@''' slf = bin_location / 'scigraph-load' with open(slf, 'wt') as f: f.write(scigraph_load) os.chmod(slf, 0o0755) return scigraph_commit, services_zip, reset_state
def run(args): # modes graph = args['graph'] scigraph = args['scigraph'] config = args['config'] imports = args['imports'] chain = args['chain'] extra = args['extra'] # required repo_name = args['<repo>'] remote_base = args['<remote_base>'] ontologies = args['<ontologies>'] # options git_remote = args['--git-remote'] git_local = args['--git-local'] zip_location = args['--zip-location'] graphload_config = args['--graphload-config'] org = args['--org'] branch = args['--branch'] commit = args['--commit'] scp = args['--scp-loc'] sorg = args['--scigraph-org'] sbranch = args['--scigraph-branch'] scommit = args['--scigraph-commit'] sscp = args['--scigraph-scp-loc'] patch_config = args['--patch-config'] curies_location = args['--curies'] patch = args['--patch'] check_built = args['--check-built'] debug = args['--debug'] log = args['--logfile'] # TODO if args['--view-defaults']: for k, v in defaults.items(): print(f'{k:<22} {v}') return # post parse mods if remote_base == 'NIF': remote_base = 'http://ontology.neuinfo.org/NIF' curies = getCuries(curies_location) curie_prefixes = set(curies.values()) itrips = None if repo_name is not None: local_base = jpth(git_local, repo_name) if graph: if args['--build-scigraph']: (scigraph_commit, load_base, services_zip, scigraph_reset_state) = scigraph_build(zip_location, git_remote, sorg, git_local, sbranch, scommit, check_built=check_built, cleanup_later=True) else: scigraph_commit = 'dev-9999' services_zip = 'None' load_base = 'scigraph-load -c {config_path}' # now _this_ is easier scigraph_reset_state = lambda: None with execute_regardless(scigraph_reset_state): rl = ReproLoader(zip_location, git_remote, org, git_local, repo_name, branch, commit, remote_base, load_base, graphload_config, patch_config, patch, scigraph_commit, check_built=check_built) itrips, config = rl.itrips, rl.config if not check_built: deploy_scp(services_zip, sscp) deploy_scp(rl.zip_path, scp) if not ontologies: ontologies = rl.ontologies print(services_zip) print(rl.zip_path) if '--local' in args: return elif scigraph: (scigraph_commit, load_base, services_zip, _) = scigraph_build(zip_location, git_remote, sorg, git_local, sbranch, scommit, check_built=check_built) if not check_built: deploy_scp(services_zip, sscp) print(services_zip) if '--local' in args: return elif config: graph_path = args['<graph_path>'] config_path = args['--graph-config-out'] local_base = jpth(git_local, repo_name) ReproLoader.make_graphload_config(graphload_config, graph_path, remote_base, local_base, zip_location, config_path) elif imports: # TODO mismatch between import name and file name needs a better fix itrips = local_imports(remote_base, local_base, ontologies) elif chain: itrips = local_imports(remote_base, local_base, ontologies, readonly=True) elif extra: graph = loadall(git_local, repo_name) mg, ng_ = normalize_prefixes(graph, curies) ng_.add_known_namespaces( 'NIFRID') # not officially in the curies yet... for_burak(ng_) debug = True elif patch: local_base = jpth(git_local, repo_name) local_versions = tuple(do_patch(patch_config, local_base)) else: raise BaseException('How did we possibly get here docopt?') if itrips: import_graph = rdflib.Graph() [import_graph.add(t) for t in itrips] for tree, extra in import_tree(import_graph, ontologies): name = Path(next(iter(tree.keys()))).name with open(jpth(zip_location, f'{name}-import-closure.html'), 'wt') as f: f.write(extra.html.replace('NIFTTL:', '')) # much more readable if debug: embed()
def locate_config(self, config): config = self._config_path(config) folder = self.locate_folder(self.scigraph_config_folder) return jpth(folder, config)
def __init__(self, zip_location, git_remote, org, git_local, repo_name, branch, commit, remote_base, load_base, graphload_config_template, graphload_ontologies, patch_config, patch, scigraph_commit, post_clone=lambda: None, fix_imports_only=False, check_built=False): date_today = TODAY() load_from_repo = True local_base = jpth(git_local, repo_name) if load_from_repo: repo, nob = self._set_up_repo_state(local_base, git_remote, org, git_local, repo_name, branch, commit, post_clone) ontology_commit = repo.head.object.hexsha[:COMMIT_HASH_HEAD_LEN] else: ontology_commit = 'NONE' config_path, config = self.make_graphload_config( graphload_config_template, graphload_ontologies, zip_location, date_today) config_hash = identity_json(config, sort_lists=True).hex() (graph_path, zip_path, zip_command, wild_zip_path) = self._set_up_paths(zip_location, repo_name, branch, scigraph_commit, ontology_commit, config_hash, date_today) # NOTE config is modified in place ontologies = self.configure_config(config, graph_path, remote_base, local_base, config_path) load_command = load_base.format( config_path=config_path) # 'exit 1' to test log.info(load_command) if load_from_repo: # replace raw github imports with ontology.neuinfor iris to simplify import chain # FIXME this is hardcoded and will not generalize ... fix_imports = ("find " + local_base + ( " -name '*.ttl' -exec sed -i" " 's,<http.\+/ttl/,<http://ontology.neuinfo.org/NIF/ttl/,' {} \;" )) os.system(fix_imports) if load_from_repo and not fix_imports_only: def reset_state(original_branch=nob): repo.git.checkout('--', local_base) original_branch.checkout() else: reset_state = lambda: None with execute_regardless( reset_state ): # FIXME start this immediately after we obtain nob? # main if load_from_repo: if patch: # FIXME TODO XXX does scigraph load from the catalog!??!?? # because it seems like doid loads correctly without using local_versions # which would be cool, if confusing local_versions = tuple(do_patch(patch_config, local_base)) else: local_versions = tuple() itrips = local_imports( remote_base, local_base, ontologies, local_versions=local_versions, dobig=True) # SciGraph doesn't support catalog.xml catalog = make_catalog(itrips) with open(Path(local_base, 'catalog.xml'), 'wt') as f: f.write(catalog) else: itrips = [] pass maybe_zip_path = glob(wild_zip_path) if fix_imports_only: pass elif not maybe_zip_path: if check_built: print('The graph has not been loaded.') raise NotBuiltError('The graph has not been loaded.') #breakpoint() failure = os.system(load_command) if failure: if os.path.exists(graph_path): shutil.rmtree(graph_path) else: os.rename( config_path, # save the config for eaiser debugging graph_path / config_path.name) cpr = config_path.with_suffix(config_path.suffix + '.raw') os.rename(cpr, graph_path / cpr.name) failure = os.system(zip_command) # graphload zip else: zip_path = maybe_zip_path[0] # this way we get the actual date print('Graph already loaded at', zip_path) # this needs to be run when the branch is checked out # FIXME might be worth adding this to the load config? self.ontologies = [ get_iri(load_header(rec['url'])) for rec in config['ontologies'] ] self.zip_path = zip_path self.itrips = itrips self.config = config
def get_latest_commit(self, repo): local_path = jpth(self.git_local, repo) return self.runOnBuild(f'cd {local_path}', 'git pull 1>/dev/null', 'git rev-parse HEAD')
def run(args): # modes graph = args['graph'] scigraph = args['scigraph'] config = args['config'] imports = args['imports'] chain = args['chain'] extra = args['extra'] # required repo_name = args['<repo>'] remote_base = args['<remote_base>'] ontologies = args['<ontologies>'] # options git_remote = args['--git-remote'] git_local = Path(args['--git-local']).resolve() zip_location = Path(args['--zip-location']).resolve() graphload_config = Path(args['--graphload-config']).resolve() graphload_config_template = graphload_config # NOTE XXX if args['--graphload-ontologies'] is not None: graphload_ontologies = Path(args['--graphload-ontologies']).resolve() else: graphload_ontologies = None org = args['--org'] branch = args['--branch'] commit = args['--commit'] scp = args['--scp-loc'] sorg = args['--scigraph-org'] sbranch = args['--scigraph-branch'] scommit = args['--scigraph-commit'] sscp = args['--scigraph-scp-loc'] scigraph_quiet = args['--scigraph-quiet'] patch_config = args['--patch-config'] curies_location = args['--curies'] patch = args['--patch'] check_built = args['--check-built'] debug = args['--debug'] log = args['--logfile'] # TODO fix_imports_only = args['--fix-imports-only'] load_base = 'scigraph-load -c {config_path}' # now _this_ is easier if args['--view-defaults']: for k, v in defaults.items(): print(f'{k:<22} {v}') return # post parse mods if remote_base == 'NIF': remote_base = 'http://ontology.neuinfo.org/NIF' itrips = None if repo_name is not None: local_base = jpth(git_local, repo_name) if graph: if args['--path-build-scigraph']: # path-build-scigraph path_build_scigraph = Path(args['--path-build-scigraph']) (scigraph_commit, services_zip, scigraph_reset_state) = scigraph_build(path_build_scigraph, git_remote, sorg, path_build_scigraph, sbranch, scommit, check_built=check_built, cleanup_later=True, quiet=scigraph_quiet) else: scigraph_commit = 'dev-9999' services_zip = 'None' scigraph_reset_state = lambda: None with execute_regardless(scigraph_reset_state): rl = ReproLoader( zip_location, git_remote, org, git_local, repo_name, branch, commit, remote_base, load_base, graphload_config_template, graphload_ontologies, patch_config, patch, scigraph_commit, fix_imports_only=fix_imports_only, check_built=check_built, ) if not fix_imports_only: FILE_NAME_ZIP = Path(rl.zip_path).name LATEST = Path(zip_location) / 'LATEST' if LATEST.exists() and LATEST.is_symlink(): LATEST.unlink() LATEST.symlink_to(FILE_NAME_ZIP) itrips, config = rl.itrips, rl.config if not ontologies: ontologies = rl.ontologies print(services_zip) print(rl.zip_path) if '--local' in args: return elif scigraph: (scigraph_commit, services_zip, _) = scigraph_build(zip_location, git_remote, sorg, git_local, sbranch, scommit, check_built=check_built, quiet=scigraph_quiet) print(services_zip) if '--local' in args: return elif config: #graph_path = Path(args['<graph_path>']).resolve() config_path = Path(args['--graph-config-out']).resolve() #local_base = Path(git_local, repo_name).resolve() date_today = TODAY() ReproLoader.make_graphload_config(graphload_config_template, graphload_ontologies, zip_location, date_today, config_path) elif imports: # TODO mismatch between import name and file name needs a better fix itrips = local_imports(remote_base, local_base, ontologies) elif chain: itrips = local_imports(remote_base, local_base, ontologies, readonly=True) elif extra: from nifstd_tools.utils import memoryCheck curies = getCuries(curies_location) curie_prefixes = set(curies.values()) memoryCheck(2665488384) graph = loadall(git_local, repo_name) new_graph = normalize_prefixes(graph, curies) for_burak(new_graph) debug = True elif patch: local_base = jpth(git_local, repo_name) local_versions = tuple(do_patch(patch_config, local_base)) else: raise BaseException('How did we possibly get here docopt?') if itrips: import_graph = OntGraph() [import_graph.add(t) for t in itrips] for tree, extra in import_tree(import_graph, ontologies): name = Path(next(iter(tree.keys()))).name with open(jpth(zip_location, f'{name}-import-closure.html'), 'wt') as f: f.write(extra.html.replace('NIFTTL:', '')) # much more readable if debug: breakpoint()
def repro_loader(zip_location, git_remote, org, git_local, repo_name, branch, commit, remote_base, load_base, graphload_config, scigraph_commit, post_clone=lambda: None, check_built=False): local_base = jpth(git_local, repo_name) git_base = jpth(git_remote, org, repo_name) if not os.path.exists(local_base): repo = Repo.clone_from(git_base + '.git', local_base) post_clone( ) # FIXME if this does not complete we need to warn or something, it causes errors else: repo = Repo(local_base) nob = repo.active_branch try: nab = getBranch(repo, branch) nab.checkout() except ValueError: # usually indicates a remote branch repo.git.checkout(branch) nab = repo.active_branch repo.remote().pull() # make sure we are up to date if commit != 'HEAD': repo.git.checkout(commit) # TODO consider dumping metadata in a file in the folder too? def folder_name(scigraph_commit, wild=False): ontology_commit = repo.head.object.hexsha[:COMMIT_HASH_HEAD_LEN] return (repo_name + '-' + branch + '-graph' + '-' + ('*' if wild else TODAY) + '-' + scigraph_commit[:COMMIT_HASH_HEAD_LEN] + '-' + ontology_commit) def make_folder_zip(wild=False): folder = folder_name(scigraph_commit, wild) graph_path = jpth(zip_location, folder) zip_path = graph_path + '.zip' if wild: return graph_path, zip_path zip_name = os.path.basename(zip_path) zip_dir = os.path.dirname(zip_path) zip_command = ' '.join( ('cd', zip_dir, ';', 'zip -r', zip_name, folder)) return graph_path, zip_path, zip_command graph_path, zip_path, zip_command = make_folder_zip() wild_graph_path, wild_zip_path = make_folder_zip(wild=True) # config graphload.yaml from template graphload_config_template = graphload_config + '.template' with open(graphload_config_template, 'rt') as f: config = yaml.load(f) config['graphConfiguration']['location'] = graph_path config['ontologies'] = [{ k: v.replace(remote_base, local_base) if k == 'url' else v for k, v in ont.items() } for ont in config['ontologies']] config_path = jpth(zip_location, 'graphload-' + TODAY + '.yaml') with open(config_path, 'wt') as f: yaml.dump(config, f, default_flow_style=False) ontologies = [ont['url'] for ont in config['ontologies']] load_command = load_base.format(config_path=config_path) print(load_command) def reset_state(original_branch=nob): original_branch.checkout() # return to original state (reset --hard) repo.head.reset( index=True, working_tree=True ) # FIXME we need to not run anything if there are files added to staging with execute_regardless( reset_state): # FIXME start this immediately after we obtain nob? # main itrips = local_imports( remote_base, local_base, ontologies) # SciGraph doesn't support catalog.xml maybe_zip_path = glob(wild_zip_path) if not maybe_zip_path: if check_built: print('The graph has not been loaded.') raise NotBuiltError('The graph has not been loaded.') failure = os.system(load_command) if failure: if os.path.exists(graph_path): shutil.rmtree(graph_path) else: os.rename( config_path, # save the config for eaiser debugging jpth(graph_path, os.path.basename(config_path))) failure = os.system(zip_command) # graphload zip else: zip_path = maybe_zip_path[0] # this way we get the actual date print('Graph already loaded at', zip_path) return zip_path, itrips