def fetch_nexson(self, study_id, output_filepath=None, store_raw=False): '''Calls export_gzipNexSON URL and unzips response. Raises HTTP error, gzip module error, or RuntimeError ''' if study_id.startswith('pg_'): study_id = study_id[3:] #strip pg_ prefix uri = self.domain + '/study/export_gzipNexSON.json/' + study_id _LOG.debug('Downloading %s using "%s"\n', study_id, uri) resp = requests.get(uri, headers=GZIP_REQUEST_HEADERS, allow_redirects=True) resp.raise_for_status() try: uncompressed = gzip.GzipFile(mode='rb', fileobj=StringIO( resp.content)).read() results = uncompressed except: raise if is_str_type(results): if output_filepath is None: return anyjson.loads(results) else: if store_raw: write_to_filepath(results, output_filepath) else: write_as_json(anyjson.loads(results), output_filepath) return True raise RuntimeError( 'gzipped response from phylografter export_gzipNexSON.json, but not a string is:', results)
def commit_and_try_merge2master(git_action, file_content, study_id, auth_info, parent_sha, commit_msg='', merged_sha=None): """Actually make a local Git commit and push it to our remote """ #_LOG.debug('commit_and_try_merge2master study_id="{s}" \ # parent_sha="{p}" merged_sha="{m}"'.format( # s=study_id, p=parent_sha, m=merged_sha)) merge_needed = False fc = tempfile.NamedTemporaryFile() try: if is_str_type(file_content): fc.write(file_content) else: write_as_json(file_content, fc) fc.flush() f = "Could not acquire lock to write to study #{s}".format(s=study_id) acquire_lock_raise(git_action, fail_msg=f) try: try: commit_resp = git_action.write_study_from_tmpfile( study_id, fc, parent_sha, auth_info, commit_msg) except Exception as e: _LOG.exception('write_study_from_tmpfile exception') raise GitWorkflowError( "Could not write to study #%s ! Details: \n%s" % (study_id, e.message)) written_fp = git_action.path_for_study(study_id) branch_name = commit_resp['branch'] new_sha = commit_resp['commit_sha'] _LOG.debug( 'write of study {s} on parent {p} returned = {c}'.format( s=study_id, p=parent_sha, c=str(commit_resp))) m_resp = _do_merge2master_commit( git_action, new_sha, branch_name, written_fp, merged_sha=merged_sha, prev_file_sha=commit_resp.get('prev_file_sha')) new_sha, branch_name, merge_needed = m_resp finally: git_action.release_lock() finally: fc.close() # What other useful information should be returned on a successful write? r = { "error": 0, "resource_id": study_id, "branch_name": branch_name, "description": "Updated study #%s" % study_id, "sha": new_sha, "merge_needed": merge_needed, } _LOG.debug('returning {r}'.format(r=str(r))) return r
def fetch_nexson(self, study_id, output_filepath=None, store_raw=False): '''Calls export_gzipNexSON URL and unzips response. Raises HTTP error, gzip module error, or RuntimeError ''' if study_id.startswith('pg_'): study_id = study_id[3:] #strip pg_ prefix uri = self.domain + '/study/export_gzipNexSON.json/' + study_id _LOG.debug('Downloading %s using "%s"\n', study_id, uri) resp = requests.get(uri, headers=GZIP_REQUEST_HEADERS, allow_redirects=True) resp.raise_for_status() try: uncompressed = gzip.GzipFile(mode='rb', fileobj=StringIO(resp.content)).read() results = uncompressed except: raise if is_str_type(results): if output_filepath is None: return anyjson.loads(results) else: if store_raw: write_to_filepath(results, output_filepath) else: write_as_json(anyjson.loads(results), output_filepath) return True raise RuntimeError('gzipped response from phylografter export_gzipNexSON.json, but not a string is:', results)
def _write_to_next_free(tag, blob): '''#WARNING not thread safe just a easy of debugging routine!''' ind = 0 pref = '/tmp/peyotl-' + tag + str(ind) while os.path.exists(pref): ind += 1 pref = '/tmp/peyotl-' + tag + str(ind) write_as_json(blob, pref)
def write_study(self, study_id, file_content, branch, author): """Given a study_id, temporary filename of content, branch and auth_info Deprecated but needed until we merge api local-dep to master... """ parent_sha = None gh_user = branch.split('_study_')[0] fc = tempfile.NamedTemporaryFile() if is_str_type(file_content): fc.write(file_content) else: write_as_json(file_content, fc) fc.flush() try: study_filepath = self.path_for_study(study_id) study_dir = os.path.split(study_filepath)[0] if parent_sha is None: self.checkout_master() parent_sha = self.get_master_sha() branch = self.create_or_checkout_branch(gh_user, study_id, parent_sha, force_branch_name=True) # create a study directory if this is a new study EJM- what if it isn't? if not os.path.isdir(study_dir): os.makedirs(study_dir) shutil.copy(fc.name, study_filepath) git(self.gitdir, self.gitwd, "add", study_filepath) try: git(self.gitdir, self.gitwd, "commit", author=author, message="Update Study #%s via OpenTree API" % study_id) except Exception as e: # We can ignore this if no changes are new, # otherwise raise a 400 if "nothing to commit" in e.message: #@EJM is this dangerous? pass else: _LOG.exception('"git commit" failed') self.reset_hard() raise new_sha = git(self.gitdir, self.gitwd, "rev-parse", "HEAD") except Exception as e: _LOG.exception('write_study exception') raise GitWorkflowError( "Could not write to study #%s ! Details: \n%s" % (study_id, e.message)) finally: fc.close() return new_sha
def atomic_write_json_if_not_found(obj, dest, request): if os.path.exists(dest): return False dir = get_private_dir(request) handle, tmpfn = tempfile.mkstemp(suffix='.json', dir=dir, text=True) # mkstemp opens the file and returns a file descriptor, # but we are using write_as_json to open with the right encoding os.close(handle) write_as_json(obj, tmpfn, indent=2, sort_keys=True) if os.path.exists(dest): return False os.rename(tmpfn, dest) return True
def write_document(self, gh_user, doc_id, file_content, branch, author, commit_msg=None): """Given a document id, temporary filename of content, branch and auth_info Deprecated but needed until we merge api local-dep to master... """ parent_sha = None fc = tempfile.NamedTemporaryFile() # N.B. we currently assume file_content is text/JSON, or should be serialized from a dict if is_str_type(file_content): fc.write(file_content) else: write_as_json(file_content, fc) fc.flush() try: doc_filepath = self.path_for_doc(doc_id) doc_dir = os.path.split(doc_filepath)[0] if parent_sha is None: self.checkout_master() parent_sha = self.get_master_sha() branch = self.create_or_checkout_branch(gh_user, doc_id, parent_sha, force_branch_name=True) # create a document directory if this is a new doc EJM- what if it isn't? if not os.path.isdir(doc_dir): os.makedirs(doc_dir) shutil.copy(fc.name, doc_filepath) git(self.gitdir, self.gitwd, "add", doc_filepath) if commit_msg is None: commit_msg = "Update document '%s' via OpenTree API" % doc_id try: git(self.gitdir, self.gitwd, "commit", author=author, message=commit_msg) except Exception as e: # We can ignore this if no changes are new, # otherwise raise a 400 if "nothing to commit" in e.message: # @EJM is this dangerous? pass else: _LOG.exception('"git commit" failed') self.reset_hard() raise new_sha = git(self.gitdir, self.gitwd, "rev-parse", "HEAD") except Exception as e: _LOG.exception('write_document exception') raise GitWorkflowError("Could not write to document #%s ! Details: \n%s" % (doc_id, e.message)) finally: fc.close() return new_sha
def equal_blob_check(unit_test, diff_file_tag, first, second): from peyotl.test.support import pathmap if first != second: dd = DictDiff.create(first, second) ofn = pathmap.next_unique_scratch_filepath(diff_file_tag + '.obtained_rt') efn = pathmap.next_unique_scratch_filepath(diff_file_tag + '.expected_rt') write_as_json(first, ofn) write_as_json(second, efn) er = dd.edits_expr() _LOG.info('\ndict diff: {d}'.format(d='\n'.join(er))) if first != second: m_fmt = "TreeBase conversion failed see files {o} and {e}" m = m_fmt.format(o=ofn, e=efn) unit_test.assertEqual("", m)
def write_study(self, study_id, file_content, branch, author): """Given a study_id, temporary filename of content, branch and auth_info Deprecated but needed until we merge api local-dep to master... """ parent_sha = None gh_user = branch.split('_study_')[0] fc = tempfile.NamedTemporaryFile() if is_str_type(file_content): fc.write(file_content) else: write_as_json(file_content, fc) fc.flush() try: study_filepath = self.path_for_study(study_id) study_dir = os.path.split(study_filepath)[0] if parent_sha is None: self.checkout_master() parent_sha = self.get_master_sha() branch = self.create_or_checkout_branch(gh_user, study_id, parent_sha, force_branch_name=True) # create a study directory if this is a new study EJM- what if it isn't? if not os.path.isdir(study_dir): os.makedirs(study_dir) shutil.copy(fc.name, study_filepath) git(self.gitdir, self.gitwd, "add", study_filepath) try: git(self.gitdir, self.gitwd, "commit", author=author, message="Update Study #%s via OpenTree API" % study_id) except Exception as e: # We can ignore this if no changes are new, # otherwise raise a 400 if "nothing to commit" in e.message:#@EJM is this dangerous? pass else: _LOG.exception('"git commit" failed') self.reset_hard() raise new_sha = git(self.gitdir, self.gitwd, "rev-parse", "HEAD") except Exception as e: _LOG.exception('write_study exception') raise GitWorkflowError("Could not write to study #%s ! Details: \n%s" % (study_id, e.message)) finally: fc.close() return new_sha
def add_default_prop(obj, out): # see Jim's comment on # https://groups.google.com/forum/?fromgroups&hl=en#!searchin/opentreeoflife-software/tried$20a$20commit/opentreeoflife-software/c8b_rQvUYvA/g1p-yIfmCEcJ _add_defaults(obj) write_as_json(obj, out)
def workaround_phylografter_export_diffs(obj, out): workaround_phylografter_nexson(obj) write_as_json(obj, out)
def apr_1_2014_workaround_phylografter_export_diffs(obj, out): _rec_resource_meta(obj, 'root') _coerce_boolean(obj, 'root') _move_ott_taxon_name_to_otu(obj) _move_otu_at_label_properties(obj) write_as_json(obj, out)
def generic_commit_and_try_merge2master_wf(git_action, file_content, doc_id, auth_info, parent_sha, commit_msg='', merged_sha=None, doctype_display_name="document"): """Actually make a local Git commit and push it to our remote """ # _LOG.debug('generic_commit_and_try_merge2master_wf: doc_id="{s}" \ # parent_sha="{p}" merged_sha="{m}"'.format( # s=doc_id, p=parent_sha, m=merged_sha)) merge_needed = False fc = tempfile.NamedTemporaryFile() # N.B. we currently assume file_content is text/JSON, or should be serialized from a dict try: if is_str_type(file_content): fc.write(file_content) else: write_as_json(file_content, fc) fc.flush() try: max_file_size = git_action.max_file_size except: max_file_size = None if max_file_size is not None: file_size = os.stat(fc.name).st_size if file_size > max_file_size: m = 'Commit of {t} "{i}" had a file size ({a} bytes) which ' \ 'exceeds the maximum size allowed ({b} bytes).' m = m.format(t=doctype_display_name, i=doc_id, a=file_size, b=max_file_size) raise GitWorkflowError(m) f = "Could not acquire lock to write to %s #%s" % ( doctype_display_name, doc_id) acquire_lock_raise(git_action, fail_msg=f) try: try: commit_resp = git_action.write_doc_from_tmpfile( doc_id, fc, parent_sha, auth_info, commit_msg, doctype_display_name) except Exception as e: _LOG.exception('write_doc_from_tmpfile exception') raise GitWorkflowError( "Could not write to %s #%s ! Details: \n%s" % (doctype_display_name, doc_id, e.message)) written_fp = git_action.path_for_doc(doc_id) branch_name = commit_resp['branch'] new_sha = commit_resp['commit_sha'] _LOG.debug('write of {t} {i} on parent {p} returned = {c}'.format( t=doctype_display_name, i=doc_id, p=parent_sha, c=str(commit_resp))) m_resp = _do_merge2master_commit( git_action, new_sha, branch_name, written_fp, merged_sha=merged_sha, prev_file_sha=commit_resp.get('prev_file_sha')) new_sha, branch_name, merge_needed = m_resp finally: git_action.release_lock() finally: fc.close() # What other useful information should be returned on a successful write? r = { "error": 0, "resource_id": doc_id, "branch_name": branch_name, "description": "Updated %s #%s" % (doctype_display_name, doc_id), "sha": new_sha, "merge_needed": merge_needed, } _LOG.debug('returning {r}'.format(r=str(r))) return r
from peyotl.api import APIWrapper from peyotl.ott import OTT from peyotl import get_logger import sys _LOG = get_logger('otu-label-comparison') if len(sys.argv) != 2: sys.exit('expecting an output file path for the JSON mapping file') outfn = sys.argv[1] a = APIWrapper(phylesystem_api_kwargs={'get_from':'local'}) ott = OTT() ott_id_to_names = ott.ott_id_to_names orig2ott_name = {} phylesys = a.phylesystem_api.phylesystem_obj for sid, blob in phylesys.iter_study_objs(): maps = [] for otu_id, otu in iter_otu(blob): ott_id = otu.get('^ot:ottId') if ott_id is not None: try: names = ott_id_to_names[ott_id] except: _LOG.debug('Apparently deprecated ott_id="{o}" in study="{s}"'.format(o=ott_id, s=sid)) else: if not isinstance(names, tuple): names = (names, ) maps.append((otu['^ot:originalLabel'], names)) if maps: orig2ott_name[sid] = maps write_as_json(orig2ott_name, outfn)
def _main(): import sys, codecs, json, os import argparse _HELP_MESSAGE = '''NeXML/NexSON converter''' _EPILOG = '''UTF-8 encoding is used (for input and output). Environmental variables used: NEXSON_INDENTATION_SETTING indentation in NexSON (default 0) NEXML_INDENTATION_SETTING indentation in NeXML (default is 0). NEXSON_LOGGING_LEVEL logging setting: NotSet, Debug, Warn, Info, Error NEXSON_LOGGING_FORMAT format string for logging messages. ''' parser = argparse.ArgumentParser(description=_HELP_MESSAGE, formatter_class=argparse.RawDescriptionHelpFormatter, epilog=_EPILOG) parser.add_argument("input", help="filepath to input") parser.add_argument("-o", "--output", metavar="FILE", required=False, help="output filepath. Standard output is used if omitted.") parser.add_argument("-s", "--sort", action="store_true", default=False, help="If specified, the arbitrarily ordered items will be sorted.") e_choices = ["nexml", str(BADGER_FISH_NEXSON_VERSION), str(DIRECT_HONEY_BADGERFISH), str(BY_ID_HONEY_BADGERFISH), "0.0", "1.0", "1.2", "badgerfish"] e_choices.sort() e_help = 'output format. Valid choices are: "{c}". \ With "0.0" and "badgerfish" as aliases for "0.0.0", and \ "1.2" being an alias for the most recent version of honeybadgerfish \ (1.2.0). The verions "1.0.0" and its alias "1.0" refer to a \ version that uses the honeybadgefish syntax for meta elements, \ but maintained the direct object-mapping from NeXML of the \ badgerfish form of NexSON'.format(c='", "'.join(e_choices)) parser.add_argument("-e", "--export", metavar="FMT", required=False, choices=e_choices, help=e_help) codes = 'xjb' parser.add_argument("-m", "--mode", metavar="MODE", required=False, choices=[i + j for i in codes for j in codes], help="A less precise way to specify a mapping. The \ m option is a two-letter code for {input}{output} \ The letters are x for NeXML, j for NexSON, \ and b for BadgerFish JSON version of NexML. \ The default behavior is to autodetect the format \ and convert JSON to NeXML or NeXML to NexSON.") args = parser.parse_args() inpfn = args.input outfn = args.output mode = args.mode export_format = args.export if export_format: if export_format.lower() in ["badgerfish", "0.0"]: export_format = str(BADGER_FISH_NEXSON_VERSION) elif export_format.lower() == "1.0": export_format = str(DIRECT_HONEY_BADGERFISH) elif export_format.lower() == "1.2": export_format = str(BY_ID_HONEY_BADGERFISH) if export_format is not None and mode is not None: if (mode.endswith('b') and (export_format != str(BADGER_FISH_NEXSON_VERSION))) \ or (mode.endswith('x') and (export_format.lower() != "nexml")) \ or (mode.endswith('x') and (export_format.lower() not in [str(DIRECT_HONEY_BADGERFISH)])): sys.exit('export format {e} clashes with mode {m}. The mode option is not neeeded if the export option is used.'.format(e=export_format, m=mode)) try: inp = codecs.open(inpfn, mode='rU', encoding='utf-8') except: sys.exit('nexson_nexml: Could not open file "{fn}"\n'.format(fn=inpfn)) if mode is None: try: while True: first_graph_char = inp.read(1).strip() if first_graph_char == '<': mode = 'x*' break elif first_graph_char in '{[': mode = '*x' break elif first_graph_char: raise ValueError('Expecting input to start with <, {, or [') except: sys.exit('nexson_nexml: First character of "{fn}" was not <, {, or [\nInput does not appear to be NeXML or NexSON\n'.format(fn=inpfn)) if export_format is None: if mode.endswith('*'): export_format = str(DIRECT_HONEY_BADGERFISH) else: export_format = "nexml" inp.seek(0) elif export_format is None: if mode.endswith('j'): export_format = str(DIRECT_HONEY_BADGERFISH) elif mode.endswith('b'): export_format = str(BADGER_FISH_NEXSON_VERSION) else: assert mode.endswith('x') export_format = "nexml" if export_format == "nexml": indentation = int(os.environ.get('NEXML_INDENTATION_SETTING', 0)) else: indentation = int(os.environ.get('NEXSON_INDENTATION_SETTING', 0)) if outfn is not None: try: out = codecs.open(outfn, mode='w', encoding='utf-8') except: sys.exit('nexson_nexml: Could not open output filepath "{fn}"\n'.format(fn=outfn)) else: out = codecs.getwriter('utf-8')(sys.stdout) if mode.startswith('x'): blob = get_ot_study_info_from_nexml(inp, nexson_syntax_version=export_format) else: blob = json.load(inp) if mode.startswith('*'): try: n = get_nexml_el(blob) except: n = None if not n or (not isinstance(n, dict)): sys.exit('No top level "nex:nexml" element found. Document does not appear to be a JSON version of NeXML\n') if n: mode = 'j' + mode[1] if args.sort: sort_arbitrarily_ordered_nexson(blob) if export_format == "nexml": if indentation > 0: indent = ' '*indentation else: indent = '' newline = '\n' write_obj_as_nexml(blob, out, addindent=indent, newl=newline) else: if not mode.startswith('x'): blob = convert_nexson_format(blob, export_format, sort_arbitrary=True) write_as_json(blob, out, indent=indentation)
s = '%s' % m s = s.lower() s = s[0].upper() + s[1:] el.append(s) m_list = word_then_punc.findall(orig) if m_list: for m in m_list: s = '%s' % m s = s.lower() s = s[0].upper() + s[1:] el.append(s) el.append(orig) return el def find_ott_matches(word): w_list = cascade_with_ssp_sp_handling(word) r_set = set() u_list = [] for w in w_list: if w not in r_set: r_set.add(w) u_list.append(w) from peyotl.sugar import taxomachine return taxomachine.TNRS(u_list) for word in sys.argv[1:]: r = find_ott_matches(word) write_as_json(r, sys.stdout, indent=1)
try: v_log, adaptor = validate_nexson(obj, codes_to_skip) except NexsonError as nx: _LOG.error(nx.value) sys.exit(1) rc = 0 if args.embed: rc = 1 if v_log.has_error() else 0 annotation = v_log.prepare_annotation(author_name=SCRIPT_NAME, invocation=sys.argv[1:], ) adaptor.add_or_replace_annotation(obj, annotation['annotationEvent'], annotation['agent'], add_agent_only=args.add_agent_only) write_as_json(obj, out) if args.add_agent_only: write_as_json(annotation['annotationEvent'], err_stream, indent=2) rc = len(v_log.errors) else: if (not v_log.errors) and (not v_log.warnings): _LOG.info('Valid') else: rc = len(v_log.errors) if args.syntax.lower() == 'json': em_dict = v_log.get_err_warn_summary_dict() if em_dict: json.dump(em_dict, out, indent=2, sort_keys=True) out.write('\n') else: if v_log.errors:
from peyotl.ott import OTT from peyotl import get_logger import sys _LOG = get_logger('otu-label-comparison') if len(sys.argv) != 2: sys.exit('expecting an output file path for the JSON mapping file') outfn = sys.argv[1] a = APIWrapper(phylesystem_api_kwargs={'get_from': 'local'}) ott = OTT() ott_id_to_names = ott.ott_id_to_names orig2ott_name = {} phylesys = a.phylesystem_api.phylesystem_obj for sid, blob in phylesys.iter_study_objs(): maps = [] for otu_id, otu in iter_otu(blob): ott_id = otu.get('^ot:ottId') if ott_id is not None: try: names = ott_id_to_names[ott_id] except: _LOG.debug('Apparently deprecated ott_id="{o}" in study="{s}"'.format(o=ott_id, s=sid)) else: if not isinstance(names, tuple): names = (names,) maps.append((otu['^ot:originalLabel'], names)) if maps: orig2ott_name[sid] = maps write_as_json(orig2ott_name, outfn)
def commit_and_try_merge2master(git_action, file_content, study_id, auth_info, parent_sha, commit_msg='', merged_sha=None): """Actually make a local Git commit and push it to our remote """ #_LOG.debug('commit_and_try_merge2master study_id="{s}" \ # parent_sha="{p}" merged_sha="{m}"'.format( # s=study_id, p=parent_sha, m=merged_sha)) merge_needed = False fc = tempfile.NamedTemporaryFile() try: if is_str_type(file_content): fc.write(file_content) else: write_as_json(file_content, fc) fc.flush() try: max_file_size = git_action.max_file_size except: max_file_size = None if max_file_size is not None: file_size = os.stat(fc.name).st_size if file_size > max_file_size: m = 'Commit of study "{s}" had a file size ({a} bytes) which exceeds the maximum size allowed ({b} bytes).' m = m.format(s=study_id, a=file_size, b=max_file_size) raise GitWorkflowError(m) f = "Could not acquire lock to write to study #{s}".format(s=study_id) acquire_lock_raise(git_action, fail_msg=f) try: try: commit_resp = git_action.write_study_from_tmpfile(study_id, fc, parent_sha, auth_info, commit_msg) except Exception as e: _LOG.exception('write_study_from_tmpfile exception') raise GitWorkflowError("Could not write to study #%s ! Details: \n%s" % (study_id, e.message)) written_fp = git_action.path_for_study(study_id) branch_name = commit_resp['branch'] new_sha = commit_resp['commit_sha'] _LOG.debug('write of study {s} on parent {p} returned = {c}'.format(s=study_id, p=parent_sha, c=str(commit_resp))) m_resp = _do_merge2master_commit(git_action, new_sha, branch_name, written_fp, merged_sha=merged_sha, prev_file_sha=commit_resp.get('prev_file_sha')) new_sha, branch_name, merge_needed = m_resp finally: git_action.release_lock() finally: fc.close() # What other useful information should be returned on a successful write? r = { "error": 0, "resource_id": study_id, "branch_name": branch_name, "description": "Updated study #%s" % study_id, "sha": new_sha, "merge_needed": merge_needed, } _LOG.debug('returning {r}'.format(r=str(r))) return r