def to_xml(self, value, param_name): wrapper = Element(param_name) for item_value in value: xml_item = Element('item') wrapper.append(xml_item) wrapper.item[-1] = item_value return wrapper
def to_xml(self, value, param_name): wrapper = Element(param_name) for _dict in value: wrapper.append(self.get_xml_dict(_dict, 'dict')) return wrapper
def _get_security_list(cluster): zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.cluster_id = cluster.id _, zato_message, _ = invoke_admin_service(cluster, 'zato:security.get-list', zato_message) return zato_message
def edit(req): prefix = 'edit-' cluster_id = req.POST['cluster_id'] tech_account_id = req.POST['id'] name = req.POST[prefix + 'name'] is_active = req.POST.get(prefix + 'is_active') is_active = True if is_active else False cluster = req.odb.query(Cluster).filter_by(id=cluster_id).first() try: zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.cluster_id = cluster_id zato_message.data.tech_account_id = tech_account_id zato_message.data.name = name zato_message.data.is_active = is_active _, zato_message, soap_response = invoke_admin_service(cluster, 'zato:security.tech-account.edit', zato_message) except Exception, e: msg = "Could not update the technical account, e=[{e}]".format(e=format_exc(e)) logger.error(msg) return HttpResponseServerError(msg)
def index(req): zato_clusters = req.odb.query(Cluster).order_by('name').all() choose_cluster_form = ChooseClusterForm(zato_clusters, req.GET) cluster_id = req.GET.get('cluster') items = [] create_form = CreateForm() edit_form = EditForm(prefix='edit') if cluster_id and req.method == 'GET': cluster = req.odb.query(Cluster).filter_by(id=cluster_id).first() def_ids = _get_def_ids(cluster) create_form.set_def_id(def_ids) edit_form.set_def_id(def_ids) zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.cluster_id = cluster_id _, zato_message, soap_response = invoke_admin_service(cluster, 'zato:outgoing.amqp.get-list', zato_message) if zato_path('data.item_list.item').get_from(zato_message) is not None: for msg_item in zato_message.data.item_list.item: id = msg_item.id.text name = msg_item.name.text is_active = is_boolean(msg_item.is_active.text) delivery_mode = int(msg_item.delivery_mode.text) priority = msg_item.priority.text content_type = msg_item.content_type.text content_encoding = msg_item.content_encoding.text expiration = msg_item.expiration.text user_id = msg_item.user_id.text app_id = msg_item.app_id.text delivery_mode_text = delivery_friendly_name[delivery_mode] def_name = msg_item.def_name.text def_id = msg_item.def_id.text item = OutgoingAMQP(id, name, is_active, delivery_mode, priority, content_type, content_encoding, expiration, user_id, app_id, def_id, delivery_mode_text, def_name) items.append(item) return_data = {'zato_clusters':zato_clusters, 'cluster_id':cluster_id, 'choose_cluster_form':choose_cluster_form, 'items':items, 'create_form':create_form, 'edit_form':edit_form, } # TODO: Should really be done by a decorator. if logger.isEnabledFor(TRACE1): logger.log(TRACE1, 'Returning render_to_response [{0}]'.format(return_data)) return render_to_response('zato/outgoing/amqp.html', return_data, context_instance=RequestContext(req))
def handle(self, *args, **kwargs): with closing(self.server.odb.session()) as session: payload = kwargs.get('payload') core_params = ['cluster_id', 'name', 'is_active', 'def_id', 'delivery_mode', 'priority'] core_params = _get_params(payload, core_params, 'data.') optional_params = ['expiration'] optional_params = _get_params(payload, optional_params, 'data.', default_value=None) priority = int(core_params['priority']) if not(priority >= 0 and priority <= 9): msg = 'Priority should be between 0 and 9, not [{0}]'.format(repr(priority)) raise ValueError(msg) name = core_params['name'] cluster_id = core_params['cluster_id'] core_params['def_id'] = int(core_params['def_id']) existing_one = session.query(OutgoingWMQ.id).\ filter(ConnDefWMQ.cluster_id==cluster_id).\ filter(OutgoingWMQ.def_id==ConnDefWMQ.id).\ filter(OutgoingWMQ.name==name).\ first() if existing_one: raise Exception('An outgoing JMS WebSphere MQ connection [{0}] already exists on this cluster'.format(name)) created_elem = Element('out_jms_wmq') try: core_params['delivery_mode'] = int(core_params['delivery_mode']) core_params['priority'] = int(core_params['priority']) core_params['is_active'] = is_boolean(core_params['is_active']) item = OutgoingWMQ() item.name = core_params['name'] item.is_active = core_params['is_active'] item.def_id = core_params['def_id'] item.delivery_mode = core_params['delivery_mode'] item.priority = core_params['priority'] item.expiration = optional_params['expiration'] session.add(item) session.commit() created_elem.id = item.id start_connector(self.server.repo_location, item.id, item.def_id) return ZATO_OK, etree.tostring(created_elem) except Exception, e: msg = 'Could not create an outgoing JMS WebSphere MQ connection, e=[{e}]'.format(e=format_exc(e)) self.logger.error(msg) session.rollback() raise
def _get_edit_create_message(params, prefix=''): """ Creates a base document which can be used by both 'edit' and 'create' actions for channels and outgoing connections. """ zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.is_internal = False zato_message.data.connection = params['connection'] zato_message.data.transport = params['transport'] zato_message.data.id = params.get('id') zato_message.data.cluster_id = params['cluster_id'] zato_message.data.name = params[prefix + 'name'] zato_message.data.is_active = bool(params.get(prefix + 'is_active')) zato_message.data.url_path = params[prefix + 'url_path'] zato_message.data.method = params[prefix + 'method'] zato_message.data.soap_action = params.get(prefix + 'soap_action', '') zato_message.data.soap_version = params.get(prefix + 'soap_version', '') zato_message.data.service = params[prefix + 'service'] security = params[prefix + 'security'] if security != ZATO_NONE: _, sec_def_id = security.split('/') else: _, sec_def_id = ZATO_NONE, ZATO_NONE zato_message.data.sec_def_id = sec_def_id return zato_message
def index(req): zato_clusters = req.odb.query(Cluster).order_by('name').all() choose_cluster_form = ChooseClusterForm(zato_clusters, req.GET) cluster_id = req.GET.get('cluster') items = [] create_form = CreateForm() edit_form = EditForm(prefix='edit') if cluster_id and req.method == 'GET': cluster = req.odb.query(Cluster).filter_by(id=cluster_id).first() zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.cluster_id = cluster_id _, zato_message, soap_response = invoke_admin_service(cluster, 'zato:definition.jms_wmq.get-list', zato_message) if zato_path('data.definition_list.definition').get_from(zato_message) is not None: for definition_elem in zato_message.data.definition_list.definition: id = definition_elem.id.text name = definition_elem.name.text host = definition_elem.host.text port = definition_elem.port.text queue_manager = definition_elem.queue_manager.text channel = definition_elem.channel.text cache_open_send_queues = is_boolean(definition_elem.cache_open_send_queues.text) cache_open_receive_queues = is_boolean(definition_elem.cache_open_receive_queues.text) use_shared_connections = is_boolean(definition_elem.use_shared_connections.text) ssl = is_boolean(definition_elem.ssl.text) ssl_cipher_spec = definition_elem.ssl_cipher_spec.text ssl_key_repository = definition_elem.ssl_key_repository.text needs_mcd = is_boolean(definition_elem.needs_mcd.text) max_chars_printed = definition_elem.max_chars_printed.text def_jms_wmq = ConnDefWMQ(id, name, host, port, queue_manager, channel, cache_open_send_queues, cache_open_receive_queues, use_shared_connections, ssl, ssl_cipher_spec, ssl_key_repository, needs_mcd, max_chars_printed) items.append(def_jms_wmq) return_data = {'zato_clusters':zato_clusters, 'cluster_id':cluster_id, 'choose_cluster_form':choose_cluster_form, 'items':items, 'create_form':create_form, 'edit_form':edit_form, } # TODO: Should really be done by a decorator. if logger.isEnabledFor(TRACE1): logger.log(TRACE1, 'Returning render_to_response [{0}]'.format(return_data)) return render_to_response('zato/definition/jms_wmq.html', return_data, context_instance=RequestContext(req))
def convert(self, param, param_name, value, has_simple_io_config, is_xml, date_time_format=None): try: if any(param_name.startswith(prefix) for prefix in self.bool_parameter_prefixes) or isinstance(param, Boolean): value = asbool(value or None) # value can be an empty string and asbool chokes on that if value and value is not None: # Can be a 0 if isinstance(param, Boolean): value = asbool(value) elif isinstance(param, CSV): value = value.split(',') elif isinstance(param, List): if is_xml: # We are parsing XML to create a SIO request if isinstance(value, EtreeElement): return [elem.text for elem in value.getchildren()] # We are producing XML out of an SIO response else: wrapper = Element(param_name) for item_value in value: xml_item = Element('item') wrapper.append(xml_item) wrapper.item[-1] = item_value return wrapper # This is a JSON list return value elif isinstance(param, Integer): value = int(value) elif isinstance(param, Unicode): value = unicode(value) elif isinstance(param, UTC): value = value.replace('+00:00', '') else: if value and value != ZATO_NONE and has_simple_io_config: if any(param_name==elem for elem in self.int_parameters) or \ any(param_name.endswith(suffix) for suffix in self.int_parameter_suffixes): value = int(value) if date_time_format and isinstance(value, datetime): value = value.strftime(date_time_format) if isinstance(param, CSV) and not value: value = [] return value except Exception, e: msg = 'Conversion error, param:[{}], param_name:[{}], repr(value):[{}], e:[{}]'.format( param, param_name, repr(value), format_exc(e)) logger.error(msg) raise ZatoException(msg=msg)
def handle(self, *args, **kwargs): with closing(self.server.odb.session()) as session: payload = kwargs.get('payload') core_params = ['id', 'cluster_id', 'name', 'is_active', 'host', 'port', 'dircache'] core_params = _get_params(payload, core_params, 'data.') optional_params = ['user', 'acct', 'timeout'] optional_params = _get_params(payload, optional_params, 'data.', default_value=None) id = core_params['id'] name = core_params['name'] cluster_id = core_params['cluster_id'] existing_one = session.query(OutgoingFTP.id).\ filter(OutgoingFTP.cluster_id==cluster_id).\ filter(OutgoingFTP.name==name).\ filter(OutgoingFTP.id!=core_params['id']).\ first() if existing_one: raise Exception('An outgoing FTP connection [{0}] already exists on this cluster'.format(name)) xml_item = Element('out_ftp') try: core_params['id'] = int(core_params['id']) core_params['is_active'] = is_boolean(core_params['is_active']) core_params['dircache'] = is_boolean(core_params['dircache']) item = session.query(OutgoingFTP).filter_by(id=id).one() old_name = item.name item.name = core_params['name'] item.is_active = core_params['is_active'] item.cluster_id = core_params['cluster_id'] item.dircache = core_params['dircache'] item.host = core_params['host'] item.port = core_params['port'] item.user = optional_params['user'] item.acct = optional_params['acct'] item.timeout = optional_params['timeout'] session.add(item) session.commit() xml_item.id = item.id self.update_facade(core_params, optional_params, old_name) return ZATO_OK, etree.tostring(xml_item) except Exception, e: msg = 'Could not update the outgoing FTP connection, e=[{e}]'.format(e=format_exc(e)) self.logger.error(msg) session.rollback() raise
def index(req): zato_clusters = req.odb.query(Cluster).order_by('name').all() choose_cluster_form = ChooseClusterForm(zato_clusters, req.GET) cluster_id = req.GET.get('cluster') items = [] create_form = CreateForm() edit_form = EditForm(prefix='edit') change_password_form = ChangePasswordForm() if cluster_id and req.method == 'GET': cluster = req.odb.query(Cluster).filter_by(id=cluster_id).first() zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.cluster_id = cluster_id _ignored, zato_message, soap_response = invoke_admin_service(cluster, 'zato:security.wss.get-list', zato_message) if zato_path('data.definition_list.definition').get_from(zato_message) is not None: for definition_elem in zato_message.data.definition_list.definition: id = definition_elem.id.text name = definition_elem.name.text is_active = is_boolean(definition_elem.is_active.text) username = definition_elem.username.text password_type = ZATO_WSS_PASSWORD_TYPES[definition_elem.password_type.text] password_type_raw = definition_elem.password_type.text reject_empty_nonce_ts = definition_elem.reject_empty_nonce_ts reject_stale_username = definition_elem.reject_stale_username expiry_limit = definition_elem.expiry_limit nonce_freshness = definition_elem.nonce_freshness wss = WSSDefinition(id, name, is_active, username, None, password_type, reject_empty_nonce_ts, reject_stale_username, expiry_limit, nonce_freshness, password_type_raw=password_type_raw) items.append(wss) return_data = {'zato_clusters':zato_clusters, 'cluster_id':cluster_id, 'choose_cluster_form':choose_cluster_form, 'items':items, 'create_form': create_form, 'edit_form': edit_form, 'change_password_form': change_password_form } # TODO: Should really be done by a decorator. if logger.isEnabledFor(TRACE1): logger.log(TRACE1, 'Returning render_to_response [%s]' % return_data) return render_to_response('zato/security/wss.html', return_data, context_instance=RequestContext(req))
def handle(self, *args, **kwargs): with closing(self.server.odb.session()) as session: payload = kwargs.get('payload') core_params = ['id', 'cluster_id', 'name', 'is_active', 'url_path', 'connection', 'transport'] core_params = _get_params(payload, core_params, 'data.') optional_params = ['method', 'soap_action', 'soap_version'] optional_params = _get_params(payload, optional_params, 'data.', default_value=None) id = core_params['id'] name = core_params['name'] cluster_id = core_params['cluster_id'] existing_one = session.query(HTTPSOAP.id).\ filter(HTTPSOAP.cluster_id==cluster_id).\ filter(HTTPSOAP.id!=id).\ filter(HTTPSOAP.name==name).\ first() if existing_one: raise Exception('An object of that name [{0}] already exists on this cluster'.format(name)) xml_item = Element('http_soap') try: core_params['id'] = int(core_params['id']) core_params['is_active'] = is_boolean(core_params['is_active']) item = session.query(HTTPSOAP).filter_by(id=id).one() item.name = core_params['name'] item.is_active = core_params['is_active'] item.url_path = core_params['url_path'] item.connection = core_params['connection'] item.transport = core_params['transport'] item.cluster_id = core_params['cluster_id'] item.method = optional_params.get('method') item.soap_action = optional_params.get('soap_action') item.soap_version = optional_params.get('soap_version') session.add(item) session.commit() xml_item.id = item.id return ZATO_OK, etree.tostring(xml_item) except Exception, e: msg = 'Could not update the object, e=[{e}]'.format(e=format_exc(e)) self.logger.error(msg) session.rollback() raise
def index(req): zato_clusters = req.odb.query(Cluster).order_by('name').all() choose_cluster_form = ChooseClusterForm(zato_clusters, req.GET) cluster_id = req.GET.get('cluster') items = [] create_form = CreateForm() edit_form = EditForm(prefix='edit') change_password_form = ChangePasswordForm() if cluster_id and req.method == 'GET': cluster = req.odb.query(Cluster).filter_by(id=cluster_id).first() zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.cluster_id = cluster_id _, zato_message, soap_response = invoke_admin_service(cluster, 'zato:definition.amqp.get-list', zato_message) if zato_path('data.definition_list.definition').get_from(zato_message) is not None: for definition_elem in zato_message.data.definition_list.definition: id = definition_elem.id.text name = definition_elem.name.text host = definition_elem.host.text port = definition_elem.port.text vhost = definition_elem.vhost.text username = definition_elem.username.text frame_max = definition_elem.frame_max.text heartbeat = definition_elem.heartbeat.text def_amqp = ConnDefAMQP(id, name, 'amqp', host, port, vhost, username, None, frame_max, heartbeat) items.append(def_amqp) return_data = {'zato_clusters':zato_clusters, 'cluster_id':cluster_id, 'choose_cluster_form':choose_cluster_form, 'items':items, 'create_form':create_form, 'edit_form':edit_form, 'change_password_form':change_password_form } # TODO: Should really be done by a decorator. if logger.isEnabledFor(TRACE1): logger.log(TRACE1, 'Returning render_to_response [{0}]'.format(return_data)) return render_to_response('zato/definition/amqp.html', return_data, context_instance=RequestContext(req))
def _get_edit_create_message(params, prefix=''): """ Creates a base document which can be used by both 'edit' and 'create' actions. """ zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.id = params.get('id') zato_message.data.cluster_id = params['cluster_id'] zato_message.data.name = params[prefix + 'name'] zato_message.data.is_active = bool(params.get(prefix + 'is_active')) return zato_message
def _execute(server_address, params): """ Submits a request for an execution of a job. """ logger.info('About to submit a request for an execution of a job, server_address=[%s], params=[%s]' % (server_address, params)) zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.job = Element('job') zato_message.job.name = params['name'] invoke_admin_service(server_address, 'zato:scheduler.job.execute', etree.tostring(zato_message)) logger.info('Successfully submitted a request, server_address=[%s], params=[%s]' % (server_address, params))
def handle(self, *args, **kwargs): with closing(self.server.odb.session()) as session: payload = kwargs.get('payload') request_params = ['cluster_id', 'name', 'host', 'port', 'queue_manager', 'channel', 'cache_open_send_queues', 'cache_open_receive_queues', 'use_shared_connections', 'ssl', 'ssl_cipher_spec', 'ssl_key_repository', 'needs_mcd', 'max_chars_printed'] params = _get_params(payload, request_params, 'data.') name = params['name'] params['port'] = int(params['port']) params['cache_open_send_queues'] = is_boolean(params['cache_open_send_queues']) params['cache_open_receive_queues'] = is_boolean(params['cache_open_receive_queues']) params['use_shared_connections'] = is_boolean(params['use_shared_connections']) params['ssl'] = is_boolean(params['ssl']) params['needs_mcd'] = is_boolean(params['needs_mcd']) params['max_chars_printed'] = int(params['max_chars_printed']) cluster_id = params['cluster_id'] cluster = session.query(Cluster).filter_by(id=cluster_id).first() # Let's see if we already have an object of that name before committing # any stuff into the database. existing_one = session.query(ConnDefWMQ).\ filter(ConnDefWMQ.cluster_id==Cluster.id).\ filter(ConnDefWMQ.name==name).\ first() if existing_one: raise Exception('JMS WebSphere MQ definition [{0}] already exists on this cluster'.format(name)) created_elem = Element('def_jms_wmq') try: def_ = ConnDefWMQ(None, name, params['host'], params['port'], params['queue_manager'], params['channel'], params['cache_open_send_queues'], params['cache_open_receive_queues'], params['use_shared_connections'], params['ssl'], params['ssl_cipher_spec'], params['ssl_key_repository'], params['needs_mcd'], params['max_chars_printed'], cluster_id) session.add(def_) session.commit() created_elem.id = def_.id return ZATO_OK, etree.tostring(created_elem) except Exception, e: msg = "Could not create a JMS WebSphere MQ definition, e=[{e}]".format(e=format_exc(e)) self.logger.error(msg) session.rollback() raise
def index(req): zato_clusters = req.odb.query(Cluster).order_by("name").all() choose_cluster_form = ChooseClusterForm(zato_clusters, req.GET) cluster_id = req.GET.get("cluster") items = [] create_form = CreateForm() edit_form = EditForm(prefix="edit") if cluster_id and req.method == "GET": cluster = req.odb.query(Cluster).filter_by(id=cluster_id).first() def_ids = _get_def_ids(cluster) create_form.set_def_id(def_ids) edit_form.set_def_id(def_ids) zato_message = Element("{%s}zato_message" % zato_namespace) zato_message.data = Element("data") zato_message.data.cluster_id = cluster_id _, zato_message, soap_response = invoke_admin_service(cluster, "zato:channel.amqp.get-list", zato_message) if zato_path("data.item_list.item").get_from(zato_message) is not None: for msg_item in zato_message.data.item_list.item: id = msg_item.id.text name = msg_item.name.text is_active = is_boolean(msg_item.is_active.text) queue = msg_item.queue.text consumer_tag_prefix = msg_item.consumer_tag_prefix.text def_name = msg_item.def_name.text def_id = msg_item.def_id.text service_name = msg_item.service_name.text item = ChannelAMQP(id, name, is_active, queue, consumer_tag_prefix, def_id, def_name, service_name) items.append(item) return_data = { "zato_clusters": zato_clusters, "cluster_id": cluster_id, "choose_cluster_form": choose_cluster_form, "items": items, "create_form": create_form, "edit_form": edit_form, } # TODO: Should really be done by a decorator. if logger.isEnabledFor(TRACE1): logger.log(TRACE1, "Returning render_to_response [{0}]".format(return_data)) return render_to_response("zato/channel/amqp.html", return_data, context_instance=RequestContext(req))
def index(req): zato_clusters = req.odb.query(Cluster).order_by('name').all() choose_cluster_form = ChooseClusterForm(zato_clusters, req.GET) cluster_id = req.GET.get('cluster') items = [] create_form = CreateForm() edit_form = EditForm(prefix='edit') if cluster_id and req.method == 'GET': cluster = req.odb.query(Cluster).filter_by(id=cluster_id).first() def_ids = _get_def_ids(cluster) create_form.set_def_id(def_ids) edit_form.set_def_id(def_ids) zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.cluster_id = cluster_id _, zato_message, soap_response = invoke_admin_service(cluster, 'zato:channel.jms_wmq.get-list', zato_message) if zato_path('data.item_list.item').get_from(zato_message) is not None: for msg_item in zato_message.data.item_list.item: id = msg_item.id.text name = msg_item.name.text is_active = is_boolean(msg_item.is_active.text) queue = msg_item.queue.text def_name = msg_item.def_name.text def_id = msg_item.def_id.text service_name = msg_item.service_name.text item = ChannelWMQ(id, name, is_active, queue, def_id, def_name, service_name) items.append(item) return_data = {'zato_clusters':zato_clusters, 'cluster_id':cluster_id, 'choose_cluster_form':choose_cluster_form, 'items':items, 'create_form':create_form, 'edit_form':edit_form, } # TODO: Should really be done by a decorator. if logger.isEnabledFor(TRACE1): logger.log(TRACE1, 'Returning render_to_response [{0}]'.format(return_data)) return render_to_response('zato/channel/jms_wmq.html', return_data, context_instance=RequestContext(req))
def handle(self, *args, **kwargs): with closing(self.server.odb.session()) as session: payload = kwargs.get('payload') core_params = ['id', 'cluster_id', 'name', 'is_active', 'address', 'socket_type'] core_params = _get_params(payload, core_params, 'data.') id = core_params['id'] name = core_params['name'] cluster_id = core_params['cluster_id'] existing_one = session.query(OutgoingZMQ.id).\ filter(OutgoingZMQ.cluster_id==cluster_id).\ filter(OutgoingZMQ.name==name).\ filter(OutgoingZMQ.id!=core_params['id']).\ first() if existing_one: raise Exception('An outgoing ZeroMQ connection [{0}] already exists on this cluster'.format(name)) xml_item = Element('out_zmq') try: core_params['id'] = int(core_params['id']) core_params['is_active'] = is_boolean(core_params['is_active']) item = session.query(OutgoingZMQ).filter_by(id=id).one() old_name = item.name item.name = name item.is_active = core_params['is_active'] item.address = core_params['address'] item.socket_type = core_params['socket_type'] session.add(item) session.commit() xml_item.id = item.id core_params['action'] = OUTGOING.ZMQ_EDIT core_params['old_name'] = old_name self.broker_client.send_json(core_params, msg_type=MESSAGE_TYPE.TO_ZMQ_CONNECTOR_SUB) return ZATO_OK, etree.tostring(xml_item) except Exception, e: msg = 'Could not update the outgoing ZeroMQ connection, e=[{e}]'.format(e=format_exc(e)) self.logger.error(msg) session.rollback() raise
def index(req): zato_clusters = req.odb.query(Cluster).order_by("name").all() choose_cluster_form = ChooseClusterForm(zato_clusters, req.GET) cluster_id = req.GET.get("cluster") items = [] create_form = CreateForm() edit_form = EditForm(prefix="edit") change_password_form = ChangePasswordForm() if cluster_id and req.method == "GET": cluster = req.odb.query(Cluster).filter_by(id=cluster_id).first() zato_message = Element("{%s}zato_message" % zato_namespace) zato_message.data = Element("data") zato_message.data.cluster_id = cluster_id _, zato_message, soap_response = invoke_admin_service(cluster, "zato:outgoing.ftp.get-list", zato_message) if zato_path("data.item_list.item").get_from(zato_message) is not None: for msg_item in zato_message.data.item_list.item: id = msg_item.id.text name = msg_item.name.text is_active = is_boolean(msg_item.is_active.text) host = msg_item.host.text if msg_item.host else "" user = msg_item.user.text if msg_item.user else "" acct = msg_item.acct.text if msg_item.acct else "" timeout = msg_item.timeout.text if msg_item.timeout else "" port = msg_item.port.text if msg_item.port else "" dircache = is_boolean(msg_item.dircache.text) item = OutgoingFTP(id, name, is_active, host, user, None, acct, timeout, port, dircache) items.append(item) return_data = { "zato_clusters": zato_clusters, "cluster_id": cluster_id, "choose_cluster_form": choose_cluster_form, "items": items, "create_form": create_form, "edit_form": edit_form, "change_password_form": change_password_form, } # TODO: Should really be done by a decorator. if logger.isEnabledFor(TRACE1): logger.log(TRACE1, "Returning render_to_response [{0}]".format(return_data)) return render_to_response("zato/outgoing/ftp.html", return_data, context_instance=RequestContext(req))
def handle(self, *args, **kwargs): with closing(self.server.odb.session()) as session: payload = kwargs.get('payload') request_params = ['id', 'is_active', 'name', 'username', 'password_type', 'reject_empty_nonce_ts', 'reject_stale_username', 'expiry_limit', 'nonce_freshness', 'cluster_id'] new_params = _get_params(payload, request_params, 'data.') def_id = new_params['id'] name = new_params['name'] cluster_id = new_params['cluster_id'] existing_one = session.query(WSSDefinition).\ filter(Cluster.id==cluster_id).\ filter(WSSDefinition.name==name).\ filter(WSSDefinition.id != def_id).\ first() if existing_one: raise Exception('WS-Security definition [{0}] already exists on this cluster'.format(name)) wss_elem = Element('wss') try: wss = session.query(WSSDefinition).filter_by(id=def_id).one() old_name = wss.name wss.name = name wss.is_active = new_params['is_active'] wss.username = new_params['username'] wss.password_type = new_params['password_type'] wss.reject_empty_nonce_ts = new_params['reject_empty_nonce_ts'] wss.reject_stale_username = new_params['reject_stale_username'] wss.expiry_limit = new_params['expiry_limit'] wss.nonce_freshness = new_params['nonce_freshness'] session.add(wss) session.commit() wss_elem.id = wss.id except Exception, e: msg = "Could not update the WS-Security definition, e=[{e}]".format(e=format_exc(e)) self.logger.error(msg) session.rollback() raise else:
def handle(self, *args, **kwargs): with closing(self.server.odb.session()) as session: payload = kwargs.get('payload') core_params = ['id', 'cluster_id', 'name', 'is_active', 'prefix', 'separator', 'key_sync_timeout'] core_params = _get_params(payload, core_params, 'data.') id = core_params['id'] name = core_params['name'] cluster_id = core_params['cluster_id'] existing_one = session.query(OutgoingS3.id).\ filter(OutgoingS3.cluster_id==cluster_id).\ filter(OutgoingS3.name==name).\ filter(OutgoingS3.id!=core_params['id']).\ first() if existing_one: raise Exception('An outgoing S3 connection [{0}] already exists on this cluster'.format(name)) xml_item = Element('out_s3') try: core_params['id'] = int(core_params['id']) core_params['is_active'] = is_boolean(core_params['is_active']) item = session.query(OutgoingS3).filter_by(id=id).one() old_name = item.name item.name = name item.is_active = core_params['is_active'] item.prefix = core_params['prefix'] item.separator = core_params['separator'] item.key_sync_timeout = core_params['key_sync_timeout'] session.add(item) session.commit() xml_item.id = item.id return ZATO_OK, etree.tostring(xml_item) except Exception, e: msg = 'Could not update the outgoing S3 connection, e=[{e}]'.format(e=format_exc(e)) self.logger.error(msg) session.rollback() raise
def handle(self, *args, **kwargs): with closing(self.server.odb.session()) as session: payload = kwargs.get('payload') request_params = ['cluster_id', 'name', 'host', 'port', 'vhost', 'username', 'frame_max', 'heartbeat'] params = _get_params(payload, request_params, 'data.') name = params['name'] params['port'] = int(params['port']) params['frame_max'] = int(params['frame_max']) params['heartbeat'] = int(params['heartbeat']) cluster_id = params['cluster_id'] cluster = session.query(Cluster).filter_by(id=cluster_id).first() password = uuid4().hex params['password'] = password # Let's see if we already have an account of that name before committing # any stuff into the database. existing_one = session.query(ConnDefAMQP).\ filter(ConnDefAMQP.cluster_id==Cluster.id).\ filter(ConnDefAMQP.def_type=='amqp').\ filter(ConnDefAMQP.name==name).\ first() if existing_one: raise Exception('AMQP definition [{0}] already exists on this cluster'.format(name)) created_elem = Element('def_amqp') try: def_ = ConnDefAMQP(None, name, 'amqp', params['host'], params['port'], params['vhost'], params['username'], password, params['frame_max'], params['heartbeat'], cluster_id) session.add(def_) session.commit() created_elem.id = def_.id return ZATO_OK, etree.tostring(created_elem) except Exception, e: msg = "Could not create an AMQP definition, e=[{e}]".format(e=format_exc(e)) self.logger.error(msg) session.rollback() raise
def _get_edit_create_message(params, prefix=""): """ Creates a base document which can be used by both 'edit' and 'create' actions. """ zato_message = Element("{%s}zato_message" % zato_namespace) zato_message.data = Element("data") zato_message.data.id = params.get("id") zato_message.data.cluster_id = params["cluster_id"] zato_message.data.name = params[prefix + "name"] zato_message.data.is_active = bool(params.get(prefix + "is_active")) zato_message.data.def_id = params[prefix + "def_id"] zato_message.data.queue = params[prefix + "queue"] zato_message.data.consumer_tag_prefix = params[prefix + "consumer_tag_prefix"] zato_message.data.service = params[prefix + "service"] return zato_message
def _edit_create_response(cluster, verb, id, name, def_id): zato_message = Element("{%s}zato_message" % zato_namespace) zato_message.data = Element("data") zato_message.data.id = def_id _, zato_message, soap_response = invoke_admin_service(cluster, "zato:definition.amqp.get-by-id", zato_message) return_data = { "id": id, "message": "Successfully {0} the AMQP channel [{1}]".format(verb, name), "def_name": zato_message.data.definition.name.text, } return HttpResponse(dumps(return_data), mimetype="application/javascript")
def _edit_create_response(cluster, verb, id, name, cluster_id, def_id): zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.id = def_id zato_message.data.cluster_id = cluster_id _, zato_message, soap_response = invoke_admin_service(cluster, 'zato:definition.jms_wmq.get-by-id', zato_message) return_data = {'id': id, 'message': 'Successfully {0} the JMS WebSphere MQ channel [{1}]'.format(verb, name), 'def_name': zato_message.data.definition.name.text } return HttpResponse(dumps(return_data), mimetype='application/javascript')
def _get_def_ids(cluster): out = {} zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.cluster_id = cluster.id _, zato_message, soap_response = invoke_admin_service(cluster, 'zato:definition.amqp.get-list', zato_message) if zato_path('data.definition_list.definition').get_from(zato_message) is not None: for definition_elem in zato_message.data.definition_list.definition: id = definition_elem.id.text name = definition_elem.name.text out[id] = name return out
def _edit_create_response(cluster, verb, id, name, delivery_mode_text, def_id): zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.id = def_id _, zato_message, soap_response = invoke_admin_service(cluster, 'zato:definition.amqp.get-by-id', zato_message) return_data = {'id': id, 'message': 'Successfully {0} the outgoing AMQP connection [{1}]'.format(verb, name), 'delivery_mode_text': delivery_mode_text, 'def_name': zato_message.data.definition.name.text } return HttpResponse(dumps(return_data), mimetype='application/javascript')
def _get_create_edit_message(cluster, params, form_prefix=""): """ Creates a base document which can be used by both 'edit' and 'create' actions, regardless of the job's type. """ zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.name = params[form_prefix + 'name'] zato_message.data.cluster_id = cluster.id zato_message.data.id = params.get(form_prefix + 'id', '') zato_message.data.is_active = bool(params.get(form_prefix + 'is_active')) zato_message.data.service = params.get(form_prefix + 'service', '') zato_message.data.extra = params.get(form_prefix + 'extra', '') zato_message.data.start_date = params.get(form_prefix + 'start_date', '') return zato_message
def index(req): zato_clusters = req.odb.query(Cluster).order_by('name').all() choose_cluster_form = ChooseClusterForm(zato_clusters, req.GET) cluster_id = req.GET.get('cluster') items = [] create_form = CreateForm() edit_form = EditForm(prefix='edit') change_password_form = ChangePasswordForm() if cluster_id and req.method == 'GET': cluster = req.odb.query(Cluster).filter_by(id=cluster_id).first() zato_message = Element('{%s}zato_message' % zato_namespace) zato_message.data = Element('data') zato_message.data.cluster_id = cluster_id _, zato_message, soap_response = invoke_admin_service(cluster, 'zato:security.tech-account.get-list', zato_message) if zato_path('data.definition_list.definition').get_from(zato_message) is not None: for definition_elem in zato_message.data.definition_list.definition: id = definition_elem.id.text name = definition_elem.name.text is_active = is_boolean(definition_elem.is_active.text) account = TechnicalAccount(id, name, is_active=is_active) items.append(account) return_data = {'zato_clusters':zato_clusters, 'cluster_id':cluster_id, 'choose_cluster_form':choose_cluster_form, 'items':items, 'create_form':create_form, 'edit_form':edit_form, 'change_password_form':change_password_form } # TODO: Should really be done by a decorator. if logger.isEnabledFor(TRACE1): logger.log(TRACE1, 'Returning render_to_response [{0}]'.format(return_data)) return render_to_response('zato/security/tech-account.html', return_data, context_instance=RequestContext(req))
def _process_data(self, data: Element) -> List[TestGroupReport]: """ XML output contains entries for skipped testcases as well, which are not included in the report. """ result = [] for suite in data.getchildren(): suite_name = suite.attrib["name"] suite_report = TestGroupReport( name=suite_name, uid=suite_name, category=ReportCategories.TESTSUITE, ) for testcase in suite.getchildren(): if testcase.tag != "testcase": continue testcase_classname = testcase.attrib["classname"] testcase_name = testcase.attrib["name"] testcase_prefix = testcase_classname.split(".")[-1] testcase_report = TestCaseReport( name="{}::{}".format(testcase_prefix, testcase_name), uid="{}::{}".format(testcase_classname.replace(".", "::"), testcase_name), ) if not testcase.getchildren(): assertion_obj = RawAssertion( description="Passed", content="Testcase {} passed".format(testcase_name), passed=True, ) testcase_report.append(registry.serialize(assertion_obj)) else: for entry in testcase.getchildren(): assertion_obj = RawAssertion( description=entry.tag, content=entry.text, passed=entry.tag not in ("failure", "error"), ) testcase_report.append( registry.serialize(assertion_obj)) testcase_report.runtime_status = RuntimeStatus.FINISHED suite_report.append(testcase_report) if len(suite_report) > 0: result.append(suite_report) return result
def _process_data(self, data: Element) -> List[TestGroupReport]: """ Processes data read from the source file. :param data: raw data as read by the importer """ # NOTE: XML output contains skipped testcases which are ignored. result = [] for suite in data.getchildren(): suite_name = suite.attrib["name"] suite_report = TestGroupReport( name=suite_name, category=ReportCategories.TESTSUITE, ) for testcase in suite.getchildren(): if testcase.tag != "testcase": continue testcase_classname = testcase.attrib["classname"] testcase_name = testcase.attrib["name"] testcase_prefix = testcase_classname.split(".")[-1] testcase_report = TestCaseReport(name="{}::{}".format( testcase_prefix, testcase_name), ) if not testcase.getchildren(): assertion_obj = RawAssertion( description="Passed", content=f"Testcase {testcase_name} passed", passed=True, ) testcase_report.append(registry.serialize(assertion_obj)) else: for entry in testcase.getchildren(): assertion_obj = RawAssertion( description=entry.tag, content=entry.text, passed=entry.tag not in ("failure", "error"), ) testcase_report.append( registry.serialize(assertion_obj)) testcase_report.runtime_status = RuntimeStatus.FINISHED suite_report.append(testcase_report) if len(suite_report) > 0: result.append(suite_report) return result
def __init__(self, xml_input=None, annotations=None): super().__init__("corpus", "document") self.corpus = Element("corpus") self.url_indices = [] self.has_terms_locations = False self.nlp = stanza.Pipeline("en", processors={ "tokenize": "gum", "ner": "default", "lemma": "gum", "pos": "gum", "depparse": "gum" }, verbose=False, tokenize_no_ssplit=True) self.annotations = annotations.documents if annotations else None if xml_input: if xml_input and not os.path.exists(xml_input): raise FileNotFoundError( f"{xml_input} not found. Check the path again.") elif os.path.isfile(xml_input): self.read_from_xml(xml_input) else: self.read_from_folder(xml_input)
def copyNode(node, children=False, parent=False): """ Copy an XML Node :param node: Etree Node :param children: Copy children nodes is set to True :param parent: Append copied node to parent if given :return: New Element """ if parent is not False: element = SubElement(parent, node.tag, attrib=node.attrib, nsmap={None: "http://www.tei-c.org/ns/1.0"}) else: element = Element(node.tag, attrib=node.attrib, nsmap={None: "http://www.tei-c.org/ns/1.0"}) if children: if node.text: element._setText(node.text) for child in xmliter(node): element.append(copy(child)) return element
def get_xml_dict(self, _dict, name): xml_dict = Element(name) for k, v in _dict.items(): xml_item = Element('item') key = Element('key') value = Element('value') xml_item.key = key xml_item.value = value xml_item.key[-1] = k xml_item.value[-1] = v xml_dict.append(xml_item) return xml_dict
def add_document(self, url, title, categories, published_time, content, author=None, topics=None, links=None, terms=None, document_id=None): if url is None or len(url) == 0: raise KeyError("'url' is mandatory") elif url in self.url_indices: log.info(f"Ignoring duplicate URL={url}") return new_document = Element("document") title = Corpus.unicodify(title) new_document.document_id = md5(title.encode("utf-8")).hexdigest()[-6:] if document_id is None or \ len(document_id) == 0 else document_id new_document.url = url new_document.title = title new_document.author = author new_document.published_time = published_time # handle lists new_document.categories = Element("categories") if categories: new_document.categories.category = categories new_document.topics = Element("topics") if topics: new_document.topics.topic = topics new_document.links = Element("links") if links: new_document.links.link = links new_document.content = Element("content") if content: new_document.content.p = [ Corpus.unicodify(p) for p in content if p ] # handle terms new_document.terms = Element("terms") terms_list = [] if terms: for term in terms: term_elmt = Element("term") term_elmt.word = term term_elmt.locations = Element("locations") locations_list = [] for location in terms[term]: location_elmt = Element("location") location_elmt.begin, location_elmt.end = location locations_list.append(location_elmt) term_elmt.locations.location = locations_list terms_list.append(term_elmt) new_document.terms.term = terms_list self.corpus.append(new_document) self.url_indices.append(url)
def new(tag, **extra): return Element(qn(tag), **extra)
def convert(self, param, param_name, value, has_simple_io_config, is_xml, date_time_format=None): try: if any( param_name.startswith(prefix) for prefix in self.bool_parameter_prefixes) or isinstance( param, Boolean): value = asbool( value or None ) # value can be an empty string and asbool chokes on that if value and value is not None: # Can be a 0 if isinstance(param, Boolean): value = asbool(value) elif isinstance(param, CSV): value = value.split(',') elif isinstance(param, List): if is_xml: # We are parsing XML to create a SIO request if isinstance(value, EtreeElement): return [elem.text for elem in value.getchildren()] # We are producing XML out of an SIO response else: wrapper = Element(param_name) for item_value in value: xml_item = Element('item') wrapper.append(xml_item) wrapper.item[-1] = item_value return wrapper # This is a JSON list return value elif isinstance(param, Integer): value = int(value) elif isinstance(param, Unicode): value = unicode(value) elif isinstance(param, UTC): value = value.replace('+00:00', '') else: if value and value != ZATO_NONE and has_simple_io_config: if any(param_name==elem for elem in self.int_parameters) or \ any(param_name.endswith(suffix) for suffix in self.int_parameter_suffixes): value = int(value) if date_time_format and isinstance(value, datetime): value = value.strftime(date_time_format) if isinstance(param, CSV) and not value: value = [] return value except Exception, e: msg = 'Conversion error, param:[{}], param_name:[{}], repr(value):[{}], e:[{}]'.format( param, param_name, repr(value), format_exc(e)) logger.error(msg) raise ZatoException(msg=msg)
def _process_data(self, data: Element) -> List[TestGroupReport]: """ Processes data read from the source file. :param data: raw data as read by the importer """ result = [] suites = data.getchildren() if data.tag == "testsuites" else [data] for suite in suites: suite_name = suite.attrib.get("name") suite_report = TestGroupReport( name=suite_name, category=ReportCategories.TESTSUITE, ) for element in suite.getchildren(): # Elements like properties, system-out, and system-err are # skipped. if element.tag != "testcase": continue case_class = element.attrib.get("classname") case_name = element.attrib.get("name") if case_class is None: if case_name == suite_report.name: path = os.path.normpath(case_name) suite_report.name = path.rpartition(os.sep)[-1] # We use the name "Execution" to avoid collision of # test suite and test case. case_report_name = "Execution" else: case_report_name = case_name else: case_report_name = ( f"{case_class.split('.')[-1]}::{case_name}") case_report = TestCaseReport(name=case_report_name) if not element.getchildren(): assertion = RawAssertion( description="Passed", content=f"Testcase {case_name} passed", passed=True, ) case_report.append(registry.serialize(assertion)) else: # Upon a failure, there will be a single testcase which is # the first child. content, tag, desc = "", None, None for child in element.getchildren(): tag = tag or child.tag msg = child.attrib.get("message") or child.text # Approach: if it is a failure/error child, then use # the message attribute directly. # Otherwise, for instance if it is a system-err/out, # then build up the content step by step from it. if not desc and child.tag in ("failure", "error"): desc = msg else: content += f"[{child.tag}]\n{msg}\n" assertion = RawAssertion( description=desc, content=content, passed=tag not in ("failure", "error"), ) case_report.append(registry.serialize(assertion)) suite_report.runtime_status = RuntimeStatus.FINISHED suite_report.append(case_report) if len(suite_report): result.append(suite_report) return result
def getvalue(self, serialize=True): """ Gets the actual payload's value converted to a string representing either XML or JSON. """ if self.zato_is_xml: if self.zato_output_repeated: value = Element('item_list') else: value = Element('item') else: if self.zato_output_repeated: value = [] else: value = {} if self.zato_output_repeated: output = self.zato_output else: output = set(dir(self)) & self.zato_all_attrs output = [dict((name, getattr(self, name)) for name in output)] if output: # All elements must be of the same type so it's OK to do it is_sa_namedtuple = isinstance(output[0], KeyedTuple) for item in output: if self.zato_is_xml: out_item = Element('item') else: out_item = {} for is_required, name in chain(self.zato_required, self.zato_optional): leave_as_is = isinstance(name, AsIs) elem_value = self._getvalue(name, item, is_sa_namedtuple, is_required, leave_as_is) if isinstance(name, ForceType): name = name.name if isinstance(elem_value, basestring): elem_value = elem_value if isinstance( elem_value, unicode) else elem_value.decode('utf-8') if self.zato_is_xml: setattr(out_item, name, elem_value) else: out_item[name] = elem_value if self.zato_output_repeated: value.append(out_item) else: value = out_item if self.zato_is_xml: em = ElementMaker(annotate=False, namespace=self.namespace, nsmap={None: self.namespace}) zato_env = em.zato_env(em.cid(self.zato_cid), em.result(ZATO_OK)) top = getattr(em, self.response_elem)(zato_env) top.append(value) else: top = {self.response_elem: value} search = self.zato_meta.get('search') if search: top['_meta'] = search if serialize: if self.zato_is_xml: deannotate(top, cleanup_namespaces=True) return etree.tostring(top) else: return dumps(top) else: return top
class Corpus(XMLBase): def __init__(self, xml_input=None, annotations=None): super().__init__("corpus", "document") self.corpus = Element("corpus") self.url_indices = [] self.has_terms_locations = False self.nlp = stanza.Pipeline("en", processors={ "tokenize": "gum", "ner": "default", "lemma": "gum", "pos": "gum", "depparse": "gum" }, verbose=False, tokenize_no_ssplit=True) self.annotations = annotations.documents if annotations else None if xml_input: if xml_input and not os.path.exists(xml_input): raise FileNotFoundError( f"{xml_input} not found. Check the path again.") elif os.path.isfile(xml_input): self.read_from_xml(xml_input) else: self.read_from_folder(xml_input) @staticmethod def unicodify(text): return text.replace("“", "\"")\ .replace("”", "\"")\ .replace("’", "'")\ .replace("‘", "'") \ .replace("\n", " ") def add_document(self, url, title, categories, published_time, content, author=None, topics=None, links=None, terms=None, document_id=None): if url is None or len(url) == 0: raise KeyError("'url' is mandatory") elif url in self.url_indices: log.info(f"Ignoring duplicate URL={url}") return new_document = Element("document") title = Corpus.unicodify(title) new_document.document_id = md5(title.encode("utf-8")).hexdigest()[-6:] if document_id is None or \ len(document_id) == 0 else document_id new_document.url = url new_document.title = title new_document.author = author new_document.published_time = published_time # handle lists new_document.categories = Element("categories") if categories: new_document.categories.category = categories new_document.topics = Element("topics") if topics: new_document.topics.topic = topics new_document.links = Element("links") if links: new_document.links.link = links new_document.content = Element("content") if content: new_document.content.p = [ Corpus.unicodify(p) for p in content if p ] # handle terms new_document.terms = Element("terms") terms_list = [] if terms: for term in terms: term_elmt = Element("term") term_elmt.word = term term_elmt.locations = Element("locations") locations_list = [] for location in terms[term]: location_elmt = Element("location") location_elmt.begin, location_elmt.end = location locations_list.append(location_elmt) term_elmt.locations.location = locations_list terms_list.append(term_elmt) new_document.terms.term = terms_list self.corpus.append(new_document) self.url_indices.append(url) def add_document_from_element(self, document_elmt): # construct terms terms_list = {} if document_elmt.terms.countchildren() > 0: for term in document_elmt.terms.term: if term.locations.countchildren() > 0: terms_list[term.word.text] = [ (loc.begin.text, loc.end.text) for loc in term.locations.location ] self.add_document( document_elmt.url.text, document_elmt.title.text, [category.text for category in document_elmt.categories.category] if document_elmt.categories.countchildren() > 0 else None, document_elmt.published_time.text, [p.text for p in document_elmt.content.p] if document_elmt.content.countchildren() > 0 else None, document_elmt.author.text, [topic.text for topic in document_elmt.topics.topic] if document_elmt.topics.countchildren() > 0 else None, [link.text for link in document_elmt.links.link] if document_elmt.links.countchildren() > 0 else None, terms_list if len(terms_list) > 0 else None, document_elmt.document_id, ) def filter_empty(self): empty_document_list = [] for document in self.iter_documents(): if document.content.countchildren() == 0: empty_document_list.append(document) for document in empty_document_list: self.get_root().remove(document) return self def read_from_xml(self, input_path): composites = ["terms", "topics", "content", "links", "categories"] corpus_etree = etree.parse(input_path) corpus_root = corpus_etree.getroot() for document in corpus_root: new_document_attrs = {} annotated_terms = {} contain_terms_elmt = False for document_elmt in document: if document_elmt.tag == "category": new_document_attrs[ "categories"] = document_elmt.text.split( ";") if document_elmt.text else [] elif document_elmt.tag == "terms": # the document has existing annotations for term_elmt in document_elmt: word = None locations = [] for item_elmt in term_elmt: if item_elmt.tag == "word": word = item_elmt.text elif item_elmt.tag == "locations": begin, end = None, None for loc_elmt in item_elmt: for point_elmt in loc_elmt: if point_elmt.tag == "begin": begin = int(point_elmt.text) elif point_elmt.tag == "end": end = int(point_elmt.text) locations.append((begin, end)) annotated_terms[word] = locations contain_terms_elmt = True elif document_elmt.tag in composites: new_document_attrs[document_elmt.tag] = [ item.text for item in document_elmt ] else: new_document_attrs[document_elmt.tag] = document_elmt.text if self.annotations and new_document_attrs[ "document_id"] in self.annotations: # annotation file new_document_attrs["terms"] = self.annotations[ new_document_attrs["document_id"]] self.add_document(**new_document_attrs) self.has_terms_locations = True # at least 1 with terms elif contain_terms_elmt: # there is no annotation file but terms element exist new_document_attrs["terms"] = annotated_terms self.add_document(**new_document_attrs) self.has_terms_locations = True elif self.annotations is None: # there is no annotation file and no terms element self.add_document(**new_document_attrs) def read_from_folder(self, root_folder): in_folders = [ folder for folder in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, folder)) ] for in_folder in in_folders: xml_files = [ f for f in os.listdir(os.path.join(root_folder, in_folder)) if f.endswith(".xml") ] for xml_file in xml_files: self.read_from_xml( os.path.join(root_folder, in_folder, xml_file)) def get_document_ids(self): return [document.document_id for document in self.iter_documents()] def get_sample(self, n, excluded_ids=None): sample_corpus = Corpus() indices = list(range(len(self))) random.shuffle(indices) acquired_count = 0 i = 0 while acquired_count < n and i < len(indices): document = self[indices[i]] i += 1 document_id = document.document_id.text if excluded_ids and document_id in excluded_ids: continue sample_corpus.add_document_from_element(document) acquired_count += 1 return sample_corpus def get_more_sample(self, n, json1_filename): existing_ids = [] with open(json1_filename, "r") as json1_file: lines = json1_file.readlines() for line in lines: json_news = json.loads(line) current_id = md5(json_news["text"].split("|")[0].encode( "utf-8")).hexdigest()[-6:] existing_ids.append(current_id) return self.get_sample(n, existing_ids) def get_documents_by_ids(self, ids): subset_corpus = Corpus() for document in self: if document.document_id in ids: subset_corpus.add_document_from_element(document) return subset_corpus def get_documents_by_urls(self, urls): subset_corpus = Corpus() for document in self: if document.url.text in urls: subset_corpus.add_document_from_element(document) return subset_corpus def get_annotated_terms_as_csv(self, csv_path): with open(csv_path, "w") as csv_file: fieldnames = ["document_id", "terms"] csv_writer = DictWriter(csv_file, fieldnames) csv_writer.writeheader() for doc in self.iter_documents(): document_id = doc.document_id.text all_terms = [term.word.text.lower() for term in doc.terms.term] csv_writer.writerow({ "document_id": document_id, "terms": "|".join(all_terms) }) return True def train_test_split(self, test_size, random_seed=1337): dev_c = Corpus() test_c = Corpus() n = len(self) * test_size indices = list(range(len(self))) random.seed(random_seed) random.shuffle(indices) i = 0 while i < len(indices): document = self[indices[i]] if i < n: dev_c.add_document_from_element(document) else: test_c.add_document_from_element(document) i += 1 return dev_c, test_c def annotate_sentence(self, sentence, buffer_offset, term_locs=None): term_state = ["O", "B-TERM", "I-TERM"] annotated_text = self.nlp(sentence) annotated_sentences = [] head_dict = {0: "root"} for sentence in annotated_text.sentences: annotated_sentence = [] for token in sentence.tokens: if len(token.words) > 1: log.info(token) else: word = token.words[0] misc = dict( token_misc.split("=") for token_misc in word.misc.split("|")) word_id = int(word.id) head_dict[word_id] = word.text start_char = buffer_offset + int(misc["start_char"]) end_char = buffer_offset + int(misc["end_char"]) annotations = { "id": word_id, "word": word.text, "pos": word.xpos, "lemma": word.lemma, "deprel": word.deprel, "deprel_head_id": word.head, "character_offset_begin": start_char, "character_offset_end": end_char, "ner": token.ner } if term_locs is not None and len(term_locs) > 0: annotations["term_tag"] = term_state[bisect( term_locs, start_char) % 3] annotated_sentence.append(annotations) for i, token in enumerate(annotated_sentence): token["deprel_head_text"] = head_dict[token["deprel_head_id"]] if "term_tag" in token: # hacky way, should fix write_to_core_nlp_xmls insort usage # if token["term_tag"][0] == "I" and (i == 0 or annotated_sentence[i-1]["term_tag"][0] == "O"): # if i == len(annotated_sentence) - 1 or annotated_sentence[i+1]["term_tag"][0] != "I": # token["term_tag"] = "S" + token["term_tag"][1:] # else: # token["term_tag"] = "B" + token["term_tag"][1:] # el if i == len(annotated_sentence) - 1 or annotated_sentence[ i + 1]["term_tag"][0] != "I": if token["term_tag"][0] == "B": token["term_tag"] = "S" + token["term_tag"][1:] elif token["term_tag"][0] == "I": token["term_tag"] = "E" + token["term_tag"][1:] annotated_sentences.append(annotated_sentence) return annotated_sentences def write_to_core_nlp_xmls(self, output_folder): for document in tqdm(self.iter_documents(), total=len(self)): document_id = document.document_id.text if f"{document_id}.xml" not in os.listdir(output_folder): buffer_offset = 0 title = document.title.text term_locs = [] if self.has_terms_locations: for term in document.terms.term: for location in term.locations.location: insort(term_locs, int(location.begin.text) - 0.5) insort(term_locs, int(location.begin.text) + 0.5) insort(term_locs, int(location.end.text)) annotated_title = self.annotate_sentence( title, buffer_offset, term_locs) buffer_offset += len(title) + 1 annotated_content = [] for p in document.content.p: if len(p.text.strip()) > 0: text = p.text.strip() p_sents = nltk.tokenize.sent_tokenize(text) for p_sent in p_sents: annotated_content += self.annotate_sentence( p_sent, buffer_offset, term_locs) buffer_offset += len(p_sent) + 1 core_nlp_document = StanfordCoreNLPDocument() core_nlp_document.from_sentences(annotated_title, annotated_content) core_nlp_document.write_xml_to( os.path.join(output_folder, f"{document_id}.xml")) def write_to_jsonl(self, jsonl_path): # terms_found = False with open(jsonl_path, "w") as out_file: for document in self.iter_documents(): # if document.terms.countchildren() > 0: # labels = [] # for term in document.terms.term: # for location in term.locations.location: # labels.append([int(location.begin.text), int(location.end.text), "UNK"]) doc_id = document.document_id.text text = { "text": "|".join([document.title.text] + [p.text for p in document.content.p]), "meta": { "doc_id": doc_id } } json.dump(html.unescape(text), out_file) out_file.write("\n")