def test_05_output(self): """ test ouput of pending messages """ Messager.warning(u'Hello warning') Messager.info(u'Hello info') Messager.debug(u'Hello debug') Messager.error(u'Hello error') output = NamedTemporaryFile("w", delete=False) try: Messager.output(output) output.close() with open(output.name, "r") as output: self.assertEqual( output.read(), u"warning : Hello warning\n" u"comment : Hello info\n" u'debug : Hello debug\n' u'error : Hello error\n') Messager.clear() with open(output.name, "w") as output: Messager.output(output) with open(output.name, "r") as output: self.assertEqual(output.read(), "") finally: os.unlink(output.name)
def test_03_error(self): """ test error level """ Messager.error(u'Hello 世界!') json_dic = {} Messager.output_json(json_dic) self.assertEqual( json_dic, {'messages': [(u'Hello \u4e16\u754c\uff01', 'error', 3)]})
def _listdir(directory, user): # return listdir(directory) try: assert_allowed_to_read(directory, user) return [ f for f in listdir(directory) if not _is_hidden(f) and allowed_to_read(path_join(directory, f), user) ] except OSError as exception: Messager.error("Error listing %s: %s" % (directory, exception)) raise AnnotationCollectionNotFoundError(directory)
def _enrich_json_with_text(j_dic, txt_file_path, raw_text=None): if raw_text is not None: # looks like somebody read this already; nice text = raw_text else: # need to read raw text try: with open_textfile(txt_file_path, 'r') as txt_file: text = txt_file.read() except IOError: raise UnableToReadTextFile(txt_file_path) except UnicodeDecodeError: Messager.error( 'Error reading text file: nonstandard encoding or binary?', -1) raise UnableToReadTextFile(txt_file_path) j_dic['text'] = text tokeniser = options_get_tokenization(dirname(txt_file_path)) # First, generate tokenisation tok_offset_gen = tokeniser_by_name(tokeniser) j_dic['token_offsets'] = [o for o in tok_offset_gen(text)] ssplitter = options_get_ssplitter(dirname(txt_file_path)) if ssplitter == 'newline': from arat.server.ssplit import newline_sentence_boundary_gen ss_offset_gen = newline_sentence_boundary_gen elif ssplitter == 'regex': from arat.server.ssplit import regex_sentence_boundary_gen ss_offset_gen = regex_sentence_boundary_gen else: Messager.warning('Unrecognized sentence splitting option ' ', reverting to newline sentence splitting.') from arat.server.ssplit import newline_sentence_boundary_gen ss_offset_gen = newline_sentence_boundary_gen j_dic['sentence_offsets'] = [o for o in ss_offset_gen(text)] return True
def retrieve_stored(document, suffix): stored_path = _stored_path() + '.' + suffix if not isfile(stored_path): # @ninjin: not sure what 'version' was supposed to be returned # here, but none was defined, so returning that # raise NoSVGError(version) raise NoSVGError('None') filename = document + '.' + suffix # sorry, quick hack to get the content-type right # TODO: send this with initial 'stored' response instead of # guessing on suffix if suffix == SVG_SUFFIX: content_type = 'image/svg+xml' elif suffix == PNG_SUFFIX: content_type = 'image/png' elif suffix == PDF_SUFFIX: content_type = 'application/pdf' elif suffix == EPS_SUFFIX: content_type = 'application/postscript' else: Messager.error('Unknown suffix "%s"; cannot determine Content-Type' % suffix) # TODO: reasonable backoff value content_type = None # Bail out with a hack since we violated the protocol hdrs = [('Content-Type', content_type), ('Content-Disposition', 'inline; filename=' + filename)] with open(stored_path, 'rb') as stored_file: data = stored_file.read() return (hdrs, data)
def __read_term_hierarchy(input_, section=None): """ Output a list of TypeHierarchyNode >>> _input = ["# This a comment to be ignored"] >>> _input.append("[spans]") >>> _input.append("# POS tags") >>> _input.append("adj") >>> _input.append("adv") >>> _input.append("art") >>> isinstance((__read_term_hierarchy("\\n".join(_input))[0]), TypeHierarchyNode) True """ root_nodes = [] last_node_at_depth = {} last_args_at_depth = {} macros = {} for line in input_: # skip empties and lines starting with '#' if line.strip() == '' or re.match(r'^\s*#', line): continue # interpret lines of only hyphens as separators # for display if re.match(r'^\s*-+\s*$', line): # TODO: proper placeholder and placing root_nodes.append(cst.SEPARATOR_STR) continue # interpret lines of the format <STR1>=STR2 as "macro" # definitions, defining <STR1> as a placeholder that should be # replaced with STR2 whevever it occurs. match_obj = re.match(r'^<([a-zA-Z_-]+)>=\s*(.*?)\s*$', line) if match_obj: name, value = match_obj.groups() if name in cst.RESERVED_CONFIG_NAME: Messager.error("Cannot redefine <%s> in configuration, " "it is a reserved name." % name) # TODO: proper exception raise InvalidProjectConfigException("Reserved name: " + name) else: macros["<%s>" % name] = value continue # macro expansion for token in macros: line = line.replace(token, macros[token]) # check for undefined macros for match_obj in re.finditer(r'(<.*?>)', line): token = match_obj.group(1) assert token in cst.RESERVED_CONFIG_STRING, ( "Error: undefined macro %s " "in configuration. (Note that macros are section-specific.)" ) % token # choose strict tab-only separator or looser any-space # separator matching depending on section if __require_tab_separator(section): match_obj = re.match(r'^(\s*)([^\t]+)(?:\t(.*))?$', line) else: match_obj = re.match(r'^(\s*)(\S+)(?:\s+(.*))?$', line) assert match_obj, "Error parsing line: '%s'" % line indent, terms, args = match_obj.groups() terms = [i.strip() for i in terms.split("|") if i.strip() != ""] if args is None or args.strip() == "": args = [] else: args = [i.strip() for i in args.split(",") if i.strip() != ""] # older configs allowed space in term strings, splitting those # from arguments by space. Trying to parse one of these in the # new way will result in a crash from space in arguments. # The following is a workaround for the transition. if [i for i in args if re.search(r'\s', i)] and '\t' in line: # re-parse in the old way (dups from above) match_obj = re.match(r'^(\s*)([^\t]+)(?:\t(.*))?$', line) assert match_obj, "Error parsing line: '%s'" % line indent, terms, args = match_obj.groups() terms = [i.strip() for i in terms.split("|") if i.strip() != ""] if args is None or args.strip() == "": args = [] else: args = [i.strip() for i in args.split(",") if i.strip() != ""] # issue a warning Messager.warning( "Space in term name(s) (%s) on line \"%s\" " "in config. This feature is deprecated and " "support will be removed in future versions. " "Please revise your configuration." % (",".join(['"%s"' % i for i in terms if " " in i]), line), 20) # depth in the ontology corresponds to the number of # spaces in the initial indent. depth = len(indent) # expand <INHERIT> into parent arguments expanded_args = [] for a in args: if a != '<INHERIT>': expanded_args.append(a) else: assert depth-1 in last_args_at_depth, \ "Error no parent for '%s'" % line expanded_args.extend(last_args_at_depth[depth - 1]) args = expanded_args n = TypeHierarchyNode(terms, args) if depth == 0: # root level, no children assignments root_nodes.append(n) else: # assign as child of last node at the depth of the parent assert depth-1 in last_node_at_depth, \ "Error: no parent for '%s'" % line last_node_at_depth[depth - 1].children.append(n) last_node_at_depth[depth] = n last_args_at_depth[depth] = args return root_nodes
def _enrich_json_with_data(j_dic, ann_obj): # TODO: figure out if there's a reason for all the unicode() # invocations here; remove if not. # We collect trigger ids to be able to link the textbound later on trigger_ids = set() for event_ann in ann_obj.get_events(): trigger_ids.add(event_ann.trigger) j_dic['events'].append([ six.text_type(event_ann.id), six.text_type(event_ann.trigger), event_ann.args ]) for rel_ann in ann_obj.get_relations(): j_dic['relations'].append([ six.text_type(rel_ann.id), six.text_type(rel_ann.type), [(rel_ann.arg1l, rel_ann.arg1), (rel_ann.arg2l, rel_ann.arg2)] ]) for tb_ann in ann_obj.get_textbounds(): #j_tb = [unicode(tb_ann.id), tb_ann.type, tb_ann.start, tb_ann.end] j_tb = [six.text_type(tb_ann.id), tb_ann.type, tb_ann.spans] # If we spotted it in the previous pass as a trigger for an # event or if the type is known to be an event type, we add it # as a json trigger. # TODO: proper handling of disconnected triggers. Currently # these will be erroneously passed as 'entities' if six.text_type(tb_ann.id) in trigger_ids: j_dic['triggers'].append(j_tb) # special case for BioNLP ST 2013 format: send triggers # also as entities for those triggers that are referenced # from annotations other than events (#926). if BIONLP_ST_2013_COMPATIBILITY: if tb_ann.id in ann_obj.externally_referenced_triggers: try: j_dic['entities'].append(j_tb) except KeyError: j_dic['entities'] = [ j_tb, ] else: try: j_dic['entities'].append(j_tb) except KeyError: j_dic['entities'] = [ j_tb, ] for eq_ann in ann_obj.get_equivs(): j_dic['equivs'].append( (['*', eq_ann.type] + [e for e in eq_ann.entities])) for att_ann in ann_obj.get_attributes(): j_dic['attributes'].append([ six.text_type(att_ann.id), six.text_type(att_ann.type), six.text_type(att_ann.target), att_ann.value ]) for norm_ann in ann_obj.get_normalizations(): j_dic['normalizations'].append([ six.text_type(norm_ann.id), six.text_type(norm_ann.type), six.text_type(norm_ann.target), six.text_type(norm_ann.refdb), six.text_type(norm_ann.refid), six.text_type(norm_ann.reftext) ]) for com_ann in ann_obj.get_oneline_comments(): comment = [ six.text_type(com_ann.target), six.text_type(com_ann.type), com_ann.tail.strip() ] tmp = j_dic.get('comments', []) tmp.append(comment) j_dic['comments'] = tmp if ann_obj.failed_lines: # The line number is off by one error_msg = 'Unable to parse the following line(s):\n%s' % ('\n'.join( [('%i: %s' % (line_num + 1, six.text_type(ann_obj[line_num]))).strip() for line_num in ann_obj.failed_lines])) Messager.error(error_msg, duration=len(ann_obj.failed_lines) * 3) j_dic['mtime'] = ann_obj.ann_mtime j_dic['ctime'] = ann_obj.ann_ctime try: # XXX avoid digging the directory from the ann_obj docdir = os.path.dirname(ann_obj._document) if options_get_validation(docdir) in ( 'all', 'full', ): projectconf = ProjectConfiguration(docdir) issues = verify_annotation(ann_obj, projectconf) else: issues = [] except Exception as exception: # TODO add an issue about the failure? issues = [] Messager.error('Error: verify_annotation() failed: %s' % exception, -1) for i in issues: issue = (six.text_type(i.ann_id), i.type, i.description) tmp = j_dic.get('comments', []) tmp.append(issue) j_dic['comments'] = tmp # Attach the source files for the annotations and text ann_files = [splitext(p)[1][1:] for p in ann_obj.input_files] ann_files.append(TEXT_FILE_SUFFIX) ann_files = [p for p in set(ann_files)] ann_files.sort() j_dic['source_files'] = ann_files
def json(self, json_dic): json_dic['exception'] = 'accessDenied' # TODO: Client should be responsible here Messager.error('Access Denied') return json_dic