def parseFFmpeg(self, parent): if parent is not None: object = parent.getObject() if object is not None: payload = None try: payload = object.getPayload("ffmpeg.info") # Stream the content out to string out = ByteArrayOutputStream() IOUtils.copy(payload.open(), out) payload.close() self.__ffmpegRaw = out.toString("UTF-8") out.close() payload.close() # And parse it self.__ffmpegData = JsonSimple(self.__ffmpegRaw) if self.__ffmpegData is None: return False else: self.__ffmpegOutputs = self.__ffmpegData.getJsonSimpleMap(["outputs"]) return True except: if payload is not None: payload.close() return False
def parseFFmpeg(self, parent): if parent is not None: object = parent.getObject() if object is not None: payload = None try: payload = object.getPayload("ffmpeg.info") # Stream the content out to string out = ByteArrayOutputStream() IOUtils.copy(payload.open(), out) payload.close() self.__ffmpegRaw = out.toString("UTF-8") out.close() payload.close() # And parse it jsonData = JsonConfigHelper(self.__ffmpegRaw) if jsonData is None: return False else: self.__ffmpegData = jsonData.getJsonMap("/outputs") return True except: if payload is not None: payload.close() return False
def process(flowfile): fn = flowfile.getAttribute('filename') if 'ethylene' in fn: parser = 'gas' elif 'hr' in fn: parser = 'heartrate' elif 'power' in fn: parser = 'power' path = parser_path + parser + '.py' # load the parser if it has been updated if parser not in sys.modules or os.path.getmtime(path) > sys.modules[parser].loaded_at: try: module = imp.load_source(parser, path) module.loaded_at = int(time.time()) except: fail(flowfile, 'Loading Module: ' + traceback.format_exc()) return parse_module = sys.modules[parser] # Read flowfile content data = {} instream = session.read(flowfile) if hasattr(parse_module, 'format') and parse_module.format.lower() == 'binary': data['content'] = IOUtils.toByteArray(instream) else: data['content'] = IOUtils.toString(instream, StandardCharsets.UTF_8) instream.close() # Attempt to parse try: if hasattr(parse_module, 'attributes'): for attribute in parse_module.attributes: data[attribute] = flowfile.getAttribute(attribute) result = parse_module.parse(data) flowfile = session.write(flowfile, PyStreamCallback(result)) session.transfer(flowfile, REL_SUCCESS) except: fail(flowfile, 'Parsing: ' + traceback.format_exc())
def getPayloadContent(self): format = self.__metadata.getField("dc_format") slash = self.__oid.rfind("/") pid = self.__oid[slash+1:] print " *** payload content, format: %s, pid: %s *** " % (format, pid) contentStr = "" if format.startswith("text"): contentStr = "<pre>" payload = self.__storage.getPayload(self.__oid, pid) str = StringWriter() IOUtils.copy(payload.getInputStream(), str) contentStr += str.toString() contentStr += "</pre>" elif format.find("vnd.ms-")>-1 or format.find("vnd.oasis.opendocument.")>-1: #get the html version if exist.... pid = pid[:pid.find(".")] + ".htm" payload = self.__storage.getPayload(self.__oid, pid) saxReader = SAXReader() document = saxReader.read(payload.getInputStream()) slideNode = document.selectSingleNode("//div[@class='body']") #linkNodes = slideNode.selectNodes("//img") #contentStr = slideNode.asXML(); # encode character entities correctly out = ByteArrayOutputStream() format = OutputFormat.createPrettyPrint() format.setSuppressDeclaration(True) writer = XMLWriter(out, format) writer.write(slideNode) writer.close() contentStr = out.toString("UTF-8") return contentStr
def __load(self, oid): template = """<div class="title" /><div class="page-toc" /><div class="body"><div>%s</div></div>""" print "Loading HTML preview for %s..." % oid if oid == "blank": return template % "<p>This page intentionally left blank.</p>" else: object = Services.getStorage().getObject(oid) # get preview payload or source if no preview pid = self.__getPreviewPid(object) payload = object.getPayload(pid) mimeType = payload.getContentType() print "pid=%s mimeType=%s" % (pid, mimeType) isHtml = mimeType in ["text/html", "application/xhtml+xml"] if isHtml or mimeType.startswith("text/"): out = ByteArrayOutputStream() IOUtils.copy(payload.open(), out) content = out.toString("UTF-8") if content.find('class="body"'): ## assumes ICE content return content elif isHtml: return template % content elif mimeType == "text/plain": return template % ('<pre>%s</pre>' % content) else: return content elif mimeType.startswith("image/"): return template % ('<div rel="%s"><img src="%s" /></div>' % (oid, pid)) else: return '<a href="%s" rel="%s">%s</a>' % (oid, mimeType, pid) payload.close() object.close()
def __getFile(self, packageDir, filename): file = File(packageDir, filename) if not file.exists(): out = FileOutputStream(file) IOUtils.copy(Services.getClass().getResourceAsStream("/workflows/" + filename), out) out.close() return file
def pageContent(self): # Object ID oid = self.metadata.get("id") # Determine MIME Type mimeType = "Unknown" mimeList = self.metadata.getList("dc_format") if mimeList is not None and not mimeList.isEmpty(): mimeType = mimeList.get(0) # The HTML payload is the real object, display in a frame because we # have no idea what kind of object it is. if mimeType == "text/html": objectPath = "http://%s:%s%s/%s/download/%s/" % \ (self.req.serverName, self.serverPort, self.contextPath, self.portalId, oid) objectLink = '<a class="iframe-link-alt" href="%s">View outside the frame</a>' % objectPath objectFrame = '<iframe class="iframe-preview" src="%s"></iframe>' % objectPath return objectLink + "<br/>" + objectFrame # We are just rendering a HTML preview else: preview = self.metadata.get("preview") try: object = Services.getStorage().getObject(oid) payload = object.getPayload(preview) out = ByteArrayOutputStream() IOUtils.copy(payload.open(), out) payload.close() return out.toString("UTF-8") except StorageException, e: return
def getPayloadContent(self): mimeType = self.__mimeType print " * single.py: payload content mimeType=%s" % mimeType contentStr = "" if mimeType.startswith("text/"): if mimeType == "text/html": contentStr = '<iframe class="iframe-preview" src="%s/%s/download/%s"></iframe>' % ( contextPath, portalId, self.__oid, ) else: pid = self.__oid[self.__oid.rfind("/") + 1 :] payload = self.__storage.getPayload(self.__oid, pid) print " * single.py: pid=%s payload=%s" % (pid, payload) if payload is not None: sw = StringWriter() sw.write("<pre>") IOUtils.copy(payload.getInputStream(), sw) sw.write("</pre>") sw.flush() contentStr = sw.toString() elif ( mimeType == "application/pdf" or mimeType.find("vnd.ms") > -1 or mimeType.find("vnd.oasis.opendocument.") > -1 ): # get the html version if exist... pid = os.path.splitext(self.__pid)[0] + ".htm" print " * single.py: pid=%s" % pid # contentStr = '<iframe class="iframe-preview" src="%s/%s/download/%s/%s"></iframe>' % \ # (contextPath, portalId, self.__oid, pid) payload = self.__storage.getPayload(self.__oid, pid) saxReader = SAXReader(Boolean.parseBoolean("false")) try: document = saxReader.read(payload.getInputStream()) slideNode = document.selectSingleNode("//*[local-name()='body']") # linkNodes = slideNode.selectNodes("//img") # contentStr = slideNode.asXML(); # encode character entities correctly slideNode.setName("div") out = ByteArrayOutputStream() format = OutputFormat.createPrettyPrint() format.setSuppressDeclaration(True) format.setExpandEmptyElements(True) writer = XMLWriter(out, format) writer.write(slideNode) writer.close() contentStr = out.toString("UTF-8") except: traceback.print_exc() contentStr = '<p class="error">No preview available</p>' elif mimeType.startswith("image/"): src = "%s/%s" % (self.__oid, self.__pid) contentStr = ( '<a class="image" href="%(src)s" style="max-width:98%%">' '<img src="%(src)s" style="max-width:100%%" /></a>' % {"src": self.__pid} ) return contentStr
def __getFile(self, packageDir, filename): file = File(packageDir, filename) if not file.exists(): out = FileOutputStream(file) IOUtils.copy( Services.getClass().getResourceAsStream("/workflows/" + filename), out) out.close() return file
def getResourceContent(self, plugin, field): resource = self.getMetadata(plugin, field) stream = self.pageService.getResource(resource) if stream: writer = StringWriter() IOUtils.copy(stream, writer, "UTF-8") html = writer.toString() print " *** html:", html return html return "<em>'%s' not found!</em>" % (field)
def getPayloadContent(self): mimeType = self.__mimeType print " * detail.py: payload content mimeType=%s" % mimeType contentStr = "" if mimeType == "application/octet-stream": dcFormat = self.__json.get("response/docs/dc_format") if dcFormat is not None: dcFormat = dcFormat[1:-1] print dcFormat, mimeType if dcFormat != mimeType: return "<div><em>(File not found)</em></div>" else: return "<div><em>(Binary file)</em></div>" elif mimeType.startswith("text/"): if mimeType == "text/html": contentStr = '<iframe class="iframe-preview" src="%s/%s/download/%s"></iframe>' % \ (contextPath, portalId, self.__oid) else: pid = self.__oid[self.__oid.rfind("/")+1:] payload = self.__storage.getPayload(self.__oid, pid) #print " * detail.py: pid=%s payload=%s" % (pid, payload) if payload is not None: sw = StringWriter() sw.write("<pre>") IOUtils.copy(payload.getInputStream(), sw) sw.write("</pre>") sw.flush() contentStr = sw.toString() elif mimeType == "application/pdf" or mimeType.find("vnd.ms")>-1 or mimeType.find("vnd.oasis.opendocument.")>-1: # get the html version if exist... pid = os.path.splitext(self.__pid)[0] + ".htm" print " * detail.py: pid=%s" % pid #contentStr = '<iframe class="iframe-preview" src="%s/%s/download/%s/%s"></iframe>' % \ # (contextPath, portalId, self.__oid, pid) payload = self.__storage.getPayload(self.__oid, pid) saxReader = SAXReader(Boolean.parseBoolean("false")) try: document = saxReader.read(payload.getInputStream()) slideNode = document.selectSingleNode("//*[local-name()='body']") #linkNodes = slideNode.selectNodes("//img") #contentStr = slideNode.asXML(); # encode character entities correctly slideNode.setName("div") out = ByteArrayOutputStream() format = OutputFormat.createPrettyPrint() format.setSuppressDeclaration(True) format.setExpandEmptyElements(True) writer = XMLWriter(out, format) writer.write(slideNode) writer.close() contentStr = out.toString("UTF-8") except: traceback.print_exc() contentStr = "<p class=\"error\">No preview available</p>" return contentStr
def getAboutPage(self, plugin, type): if type is None or plugin is None: return "<em>This plugin has provided no information about itself.</em>" pid = plugin.replace("-", "_") resource = "plugin/%s/%s/about.html" % (type, pid) stream = self.pageService.getResource(resource) if stream: writer = StringWriter() IOUtils.copy(stream, writer, "UTF-8") html = writer.toString() return html return "<em>This plugin has provided no information about itself.</em>"
def getAboutPage(self, plugin, type): if type is None or plugin is None: return "<em>'plugin/%s/%s/about.html' not found!</em>" % (type, plugin) pid = plugin.replace("-", "_") resource = "plugin/%s/%s/about.html" % (type, pid) stream = self.pageService.getResource(resource) if stream: writer = StringWriter() IOUtils.copy(stream, writer, "UTF-8") html = writer.toString() return html return "<em>'plugin/%s/%s/about.html' not found!</em>" % (type, pid)
def getPayloadContent(self): mimeType = self.__mimeType print " * single.py: payload content mimeType=%s" % mimeType contentStr = "" if mimeType.startswith("text/"): if mimeType == "text/html": contentStr = '<iframe class="iframe-preview" src="%s/download/%s"></iframe>' % \ (portalPath, self.__oid) else: pid = self.__oid[self.__oid.rfind("/") + 1:] payload = self.__storage.getPayload(self.__oid, pid) print " * single.py: pid=%s payload=%s" % (pid, payload) if payload is not None: sw = StringWriter() sw.write("<pre>") IOUtils.copy(payload.getInputStream(), sw) sw.write("</pre>") sw.flush() contentStr = sw.toString() elif mimeType == "application/pdf" or mimeType.find( "vnd.ms") > -1 or mimeType.find( "vnd.oasis.opendocument.") > -1: # get the html version if exist... pid = os.path.splitext(self.__pid)[0] + ".htm" print " * single.py: pid=%s" % pid #contentStr = '<iframe class="iframe-preview" src="%s/download/%s/%s"></iframe>' % \ # (portalPath, self.__oid, pid) payload = self.__storage.getPayload(self.__oid, pid) saxReader = SAXReader(Boolean.parseBoolean("false")) try: document = saxReader.read(payload.getInputStream()) slideNode = document.selectSingleNode( "//*[local-name()='body']") #linkNodes = slideNode.selectNodes("//img") #contentStr = slideNode.asXML(); # encode character entities correctly slideNode.setName("div") out = ByteArrayOutputStream() format = OutputFormat.createPrettyPrint() format.setSuppressDeclaration(True) format.setExpandEmptyElements(True) writer = XMLWriter(out, format) writer.write(slideNode) writer.close() contentStr = out.toString("UTF-8") except: traceback.print_exc() contentStr = "<p class=\"error\">No preview available</p>" elif mimeType.startswith("image/"): src = "%s/%s" % (self.__oid, self.__pid) contentStr = '<a class="image" href="%(src)s" style="max-width:98%%">' \ '<img src="%(src)s" style="max-width:100%%" /></a>' % { "src": self.__pid } return contentStr
def onCall(self, value, fileStream, fileStream2): uploaded = "Uploaded" uploadDir = "{}/upload/".format(sponge.home) # Single file. if fileStream.hasNext(): IOUtils.readLines(fileStream.inputStream, StandardCharsets.UTF_8) uploaded += " " + fileStream.filename # Multiple files. while fileStream2.hasNext(): fs2 = fileStream2.next() IOUtils.readLines(fs2.inputStream, StandardCharsets.UTF_8) uploaded += " " + fs2.filename return uploaded
def process(flowfile): # Read flowfile content data = {} instream = session.read(flowfile) if hasattr(parse_module, 'format') and parse_module.format.lower() == 'binary': data['content'] = IOUtils.toByteArray(instream) else: data['content'] = IOUtils.toString(instream, StandardCharsets.UTF_8) instream.close() rootNode = json.load(data) for currIndex, elem in enumerate(rootNode): rootPath = ${atlas_url} + '/api/atlas/v1/taxanomies/Catalog/terms/'+rootNode[currIndex]['name'] nextNode(rootNode[currIndex], rootPath)
def __checkMetadataPayload(self): try: # Simple check for its existance self.object.getPayload("formData.tfpackage") self.firstHarvest = False except Exception: self.firstHarvest = True # We need to create it self.log.info("Creating 'formData.tfpackage' payload for object '{}'", self.oid) # Prep data data = { "viewId": "default", "workflow_source": "Edgar Import", "packageType": "dataset", "redbox:formVersion": self.redboxVersion, "redbox:newForm": "true" } package = JsonSimple(JsonObject(data)) # Store it inStream = IOUtils.toInputStream(package.toString(True), "UTF-8") try: self.object.createStoredPayload("formData.tfpackage", inStream) self.packagePid = "formData.tfpackage" except StorageException, e: self.log.error("Error creating 'formData.tfpackage' payload for object '{}'", self.oid, e) raise Exception("Error creating package payload: ", e)
def __activate__(self, context): request = context["request"] storage = context["Services"].getStorage() auth = context["page"].authentication log = context["log"] username = auth.get_name() oid = request.getParameter("oid") approval = request.getParameter("approval") approval_comment = request.getParameter("approval_comment") storedObj = storage.getObject(oid) committeeResponses = None payloadList = storedObj.getPayloadIdList() if payloadList.contains("committee-responses.metadata"): committeeResponsePayload = storedObj.getPayload("committee-responses.metadata") committeeResponses = JsonSimple(committeeResponsePayload.open()).getJsonObject() else: committeeResponses = JsonObject() committeeResponse = JsonObject() committeeResponse.put("approval",approval) committeeResponse.put("approval_comment",approval_comment) committeeResponses.put(username,committeeResponse) log.debug(" %s: Committee %s, approval = %s, comment = %s" % ( oid, username, approval, approval_comment)) StorageUtils.createOrUpdatePayload(storedObj,"committee-responses.metadata",IOUtils.toInputStream(committeeResponses.toString(), "UTF-8")) context["response"].sendRedirect(context["portalPath"] +"/detail/"+oid)
def process(self, instream, outstream): # To read content as a string: data = IOUtils.toString(instream, StandardCharsets.UTF_8) output = {} datetimestamp = datetime.datetime.now() output['id'] = datetimestamp.strftime('%Y%m%d_%H%M%S') output['datetime'] = datetimestamp.strftime('%Y-%m-%d %H:%M:%S') output['state'] = random.choice([ 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY' ]) output['duration'] = round(random.triangular(1, 150, 1), 2) output['action'] = random.choice( ['TRUE'] * 1 + ['FALSE'] * 5) # True/False ratio of 1:5 # Generate CSV output based on "output", which is in JSON output_csv = ','.join([str(v) for k, v in output.items()]) # Write modified content outstream.write(str(output))
def process(self, inputStream): try: # Read input FlowFile content input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) input_list = json.loads(input_text) # Create FlowFiles for array items splits = [] fragment_index = 0 for item in input_list: color = item['color'] splitFlowFile = session.create(self.parentFlowFile) writeCallback = WriteCallback() writeCallback.content = json.dumps(item) splitFlowFile = session.write(splitFlowFile, writeCallback) splitFlowFile = session.putAllAttributes( splitFlowFile, { 'fragment.index': fragment_index, 'color': color }) splits.append(splitFlowFile) log.info(color) fragment_index += 1 for splitFlowFile in splits: session.transfer(splitFlowFile, REL_SUCCESS) except: traceback.print_exc(file=sys.stdout) raise
def process(self, inputStream, outputStream): text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) # Read the CSV stream. delim = ',' renum = False isHeader = True file_like_io = StringIO(text) csv_reader = csv.reader(file_like_io, dialect='excel', delimiter=delim) for row in csv_reader: newText = '' if isHeader: newText += ",".join(row) newText += "\n\r" isHeader = False outputStream.write(newText) continue try: datetime.datetime.strptime(row[DATE], '%Y-%m-%d %H:%M:%S') except ValueError: continue for value in range(1, len(row)): if row[value] == '': continue if float(row[value]) < LB or float(row[value]) > UB: # out of range row[value] = '' newText += ",".join(row) newText += "\n\r" outputStream.write(newText)
def __activate__(self, context): self.auth = context["page"].authentication self.errorMsg = "" self.request = context["request"] self.response = context["response"] self.formData = context["formData"] self.storage = context["Services"].getStorage() self.log = context["log"] self.reportManager = context["Services"].getService("reportManager") fromOid = self.formData.get("fromOid") fromObject = self.storage.getObject(fromOid) if (self.auth.is_logged_in()): if (self.auth.is_admin() == True): pass elif (self.__isOwner(fromObject)): pass else: self.errorMsg = "Requires Admin / Librarian / Reviewer / owner access." else: self.errorMsg = "Please login." if self.errorMsg == "": toOid = self.formData.get("toOid") toObject = self.storage.getObject(toOid) storeRelatedData = self.formData.get("relatedData") fromTFPackage = self._getTFPackage(fromObject) toTFPackage = self._getTFPackage(toObject) fromInputStream = fromTFPackage.open() try: StorageUtils.createOrUpdatePayload(toObject, toTFPackage.getId(), fromInputStream) except StorageException: print "error setting tfPackage" fromTFPackage.close() fromTFPackageJson = JsonSimple(fromTFPackage.open()).getJsonObject() if storeRelatedData != "false" : # add relatedOid info fromTFPackageJson = self._addRelatedOid(JsonSimple(fromTFPackage.open()), toOid) inStream = IOUtils.toInputStream(fromTFPackageJson.toJSONString(), "UTF-8") try: StorageUtils.createOrUpdatePayload(fromObject, fromTFPackage.getId(), inStream) except StorageException: print "error setting tfPackage" tfMetaPropertyValue = self.formData.get("tfMetaPropertyValue") self._addPropertyValueToTFMeta(toObject, tfMetaPropertyValue) self._reharvestPackage() result = '{"status": "ok", "url": "%s/workflow/%s", "oid": "%s" }' % (context["portalPath"], toOid , toOid) else: result = '{"status": "err", "message": "%s"}' % self.errorMsg writer = self.response.getPrintWriter("application/json; charset=UTF-8") writer.println(result) writer.close()
def __checkMetadataPayload(self, identifier): # We are just going to confirm the existance of # 'metadata.json', or create an empty one if it # doesn't exist. Makes curation function for this # option and removes some log errors on the details # screen. try: self.object.getPayload("metadata.json") # all is good, the above will throw an exception if it doesn't exist return except Exception: self.log.info("Creating 'metadata.json' payload for object '{}'", self.oid) # Prep data metadata = JsonSimple() metadata.getJsonObject().put("recordIDPrefix", "") metadata.writeObject("data") # The only real data we require is the ID for curation idHolder = metadata.writeObject("metadata") idHolder.put("dc.identifier", identifier) # Store it inStream = IOUtils.toInputStream(metadata.toString(True), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "metadata.json", inStream) except StorageException, e: self.log.error("Error creating 'metadata.json' payload for object '{}'", self.oid, e) return
def __checkMetadataPayload(self): try: # Simple check for its existance self.object.getPayload("formData.tfpackage") self.firstHarvest = False except Exception: self.firstHarvest = True # We need to create it self.log.info( "Creating 'formData.tfpackage' payload for object '{}'", self.oid) # Prep data data = { "viewId": "default", "workflow_source": "Edgar Import", "packageType": "dataset", "redbox:formVersion": self.redboxVersion, "redbox:newForm": "true" } package = JsonSimple(JsonObject(data)) # Store it inStream = IOUtils.toInputStream(package.toString(True), "UTF-8") try: self.object.createStoredPayload("formData.tfpackage", inStream) self.packagePid = "formData.tfpackage" except StorageException, e: self.log.error( "Error creating 'formData.tfpackage' payload for object '{}'", self.oid, e) raise Exception("Error creating package payload: ", e)
def __getJson(self): rvtMap = HashMap() try: oid = formData.get("oid") object = Services.storage.getObject(oid) payload = object.getPayload("imsmanifest.xml") try: from xml.etree import ElementTree xmlStr = IOUtils.toString(payload.open(), "UTF-8") payload.close() xml = ElementTree.XML(xmlStr.encode("UTF-8")) ns = xml.tag[:xml.tag.find("}")+1] resources = {} for res in xml.findall(ns+"resources/"+ns+"resource"): resources[res.attrib.get("identifier")] = res.attrib.get("href") organizations = xml.find(ns+"organizations") defaultName = organizations.attrib.get("default") organizations = organizations.findall(ns+"organization") organizations = [o for o in organizations if o.attrib.get("identifier")==defaultName] organization = organizations[0] title = organization.find(ns+"title").text rvtMap.put("title", title) items = organization.findall(ns+"item") rvtMap.put("toc", self.__getJsonItems(ns, items, resources)) except Exception, e: data["error"] = "Error - %s" % str(e) print data["error"] object.close()
def __getJson(self): rvtMap = JsonObject() try: oid = self.vc("formData").get("oid") object = Services.storage.getObject(oid) payload = object.getPayload("imsmanifest.xml") try: from xml.etree import ElementTree xmlStr = IOUtils.toString(payload.open(), "UTF-8") payload.close() xml = ElementTree.XML(xmlStr.encode("UTF-8")) ns = xml.tag[:xml.tag.find("}")+1] resources = {} for res in xml.findall(ns+"resources/"+ns+"resource"): resources[res.attrib.get("identifier")] = res.attrib.get("href") organizations = xml.find(ns+"organizations") defaultName = organizations.attrib.get("default") organizations = organizations.findall(ns+"organization") organizations = [o for o in organizations if o.attrib.get("identifier")==defaultName] organization = organizations[0] title = organization.find(ns+"title").text rvtMap.put("title", title) items = organization.findall(ns+"item") rvtMap.put("toc", self.__getJsonItems(ns, items, resources)) except Exception, e: data["error"] = "Error - %s" % str(e) print data["error"] object.close()
def process(self, inputStream, outputStream): text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) jsonObject = json.loads(text) count = 0 def keyVal(self, parentkey, jsonObj, retString): for key in jsonObj: value = jsonObj[key] if isinstance(value, list): count = 0 for i in value: if isinstance(i, dict) or isinstance(i, list): count = count + 1 retString = keyVal( self, parentkey + "." + key + "_" + str(count), i, retString) if count == 0: r = {key: value} retString = retString + parentkey + json.dumps( r) + "\n" elif isinstance(value, dict): retString = keyVal(self, parentkey + "." + key, value, retString) else: r = {key: value} retString = retString + parentkey + json.dumps(r) + "\n" return retString retString = keyVal(self, "PA", jsonObject, "") outputStream.write(bytearray(retString.encode('utf-8')))
def process(self, inputStream): content = IOUtils.toString(inputStream, StandardCharsets.UTF_8) (corrects, incorrects, attributes) = Processor().transform_policy_index( content, self._attrs) self._transfer(corrects, attributes, _succ) self._transfer(incorrects, attributes, _fail)
def process(self, inputStream): splitFlowFile = session.create(self.parentFlowFile) writeCallback = WriteCallback() # To read content as a string: data = IOUtils.toString(inputStream, StandardCharsets.UTF_8) curl_input = ['curl', '-i', '-k', '-X', 'POST', 'http://dzaratsian80.field.hortonworks.com:4444/api', '-d', '{"image":"' + re.sub('(\r|\n)','',data) + '"}', '-H', 'content-type:application/ '] result = subprocess.Popen(curl_input, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = result.communicate() prediction_label = json.loads(out.split('\r\n')[-1])['prediction_label'] prediction_prob = json.loads(out.split('\r\n')[-1])['prediction_prob'] payload = 'payload' writeCallback.content = '{"image":"' + re.sub('(\r|\n)','',data) + '"}' splitFlowFile = session.write(splitFlowFile, writeCallback) splitFlowFile = session.putAllAttributes(splitFlowFile, { 'prediction_label': str(prediction_label), 'prediction_prob': str(prediction_prob) }) session.transfer(splitFlowFile, REL_SUCCESS)
def process(self, inputStream, outputStream): text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) obj = json.loads(text) data = format_geotypes(obj, GEOPOINTS) outputStream.write( bytearray(json.dumps(data, indent=4).encode('utf-8')))
def saveResponse(self, context): """ Save into object storage key to assessor's name It has four keys: status, recommendation, size-agreement and comments when status == "submitted", reviewer sees it """ oid = self.request.getParameter("oid") action = self.request.getParameter("action") if action and re.match("submit", action, re.I): status = "submitted" else: status = "draft" recommendation = self.request.getParameter("recommendation") sizeAgreement = self.request.getParameter("size-agreement") comments = self.request.getParameter("comments") storedObj, fileExisted = self.hasResponses(oid) if fileExisted: committeeResponses = self.getResponses(storedObj) else: committeeResponses = JsonObject() assessorResponse = JsonObject() assessorResponse.put("status", status) if status == 'submitted': assessorResponse.put("date",self._getToday()) assessorResponse.put("recommendation",recommendation) assessorResponse.put("size-agreement",sizeAgreement) assessorResponse.put("comments",comments) committeeResponses.put(self.assessor,assessorResponse) StorageUtils.createOrUpdatePayload(storedObj,self.PAYLOAD,IOUtils.toInputStream(committeeResponses.toString(), "UTF-8")) context["response"].sendRedirect(context["portalPath"] +"/detail/"+oid)
def process(self, inputStream, outputStream): text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) obj = json.loads(text) flattened_data = flatten_nested_json(obj) outputStream.write( bytearray( json.dumps(flattened_data, indent=4).encode('utf-8')))
def process(self, inputStream): content = IOUtils.toString(inputStream, StandardCharsets.UTF_8) self._attrs["mapping_data"] = json.loads(content.strip()) (corrects, incorrects, attributes) = Processor().transform_vindex(attrs=self._attrs) self._transfer(corrects, attributes, _succ) self._transfer(incorrects, attributes, _fail)
def pageContent(self): # Object ID oid = self.metadata.getFirst("id") # Determine MIME Type mimeType = "Unknown" mimeList = self.metadata.getList("dc_format") if mimeList is not None and not mimeList.isEmpty(): mimeType = mimeList.get(0) # The HTML payload is the real object, display in a frame because we # have no idea what kind of object it is. if mimeType == "text/html": urlBase = self.config.getString(None, ["urlBase"]) if urlBase is None: "http://%s:%s%s/" % (self.req.serverName, self.serverPort, self.contextPath) objectPath = "%s%s/download/%s/" % (urlBase, self.portalId, oid) return self.frameContent(objectPath) # We are rendering a HTML preview... else: preview = self.metadata.getFirst("preview") # ... of an IMS package or zipped website. Treat as per html above. if mimeType == "application/zip": urlBase = self.config.getString(None, ["urlBase"]) if urlBase is None: "http://%s:%s%s/" % (self.req.serverName, self.serverPort, self.contextPath) objectPath = "%s%s/download/%s/%s" % (urlBase, self.portalId, oid, preview) return self.frameContent(objectPath) # ... of an HTML excerpt, such as an ICE rendition. Render in page. else: try: object = Services.getStorage().getObject(oid) payload = object.getPayload(preview) out = ByteArrayOutputStream() IOUtils.copy(payload.open(), out) payload.close() return out.toString("UTF-8") except StorageException, e: return
def process(self, inputStream, outputStream): text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) obj = json.loads(text) newObj = { "Source": "NiFi", "ID": obj['uuid'] "file_name": obj['file_name'] } outputStream.write(bytearray(json.dumps(newObj, indent=4).encode('utf-8')))
def process(self, inputStream, outputStream): text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) corenlp = json.loads(text) locations = [] for sentence in corenlp['sentences']: for token in sentence['tokens']: if token['ner'] == 'LOCATION': locations.append(token['word']) outputStream.write(bytearray(json.dumps(locations, indent=4).encode('utf-8')))
def updateWorkFlowMetadata(self, workflowMetadata, toWorkflowId, toWorkflowStage): workflowMetaDataJson = JsonSimple(workflowMetadata.open()).getJsonObject() workflowMetaDataJson.put("id", toWorkflowId) workflowMetaDataJson.put("step", toWorkflowStage) inStream = IOUtils.toInputStream(workflowMetaDataJson.toString(), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, "workflow.metadata", inStream) except StorageException: print " ERROR updating dataset payload"
def process(self, inputStream, outputStream): text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) obj = json.loads(text) newObj = { "Source": "NiFi", "Dest": "Stuff", } outputStream.write( bytearray(json.dumps(newObj, indent=4).encode('utf-8')))
def process(self, inputStream, outputStream): global content content.stream = IOUtils.toString(inputStream, StandardCharsets.UTF_8) #do stuff to original file: outputContent = content.stream #------- outputStream.write(outputContent) #keep original content in parent file
def updatePackageType(self, tfPackage, toWorkflowId): tfPackageJson = JsonSimple(tfPackage.open()).getJsonObject() tfPackageJson.put("packageType", toWorkflowId) inStream = IOUtils.toInputStream(tfPackageJson.toString(), "UTF-8") try: StorageUtils.createOrUpdatePayload(self.object, tfPackage.getId(), inStream) except StorageException: print " ERROR updating dataset payload"
def process(self, inputStream, outputStream): obj = json.loads(IOUtils.toString(inputStream, StandardCharsets.UTF_8)) if isinstance(obj, dict): text = json.dumps({"_id": obj.get("_id")}) elif isinstance(obj, list): text = json.dumps([{"_id": x.get("_id")} for x in obj]) else: text = 'none' outputStream.write(bytearray(text.encode('utf-8')))
def process(self, file): def luhnChecksumIsValid(cardNumber): # check to make sure that the card passes a luhn mod-10 checksum total = 0 oddTotal = 0 evenTotal = 0 reversedCardNumber = cardNumber[::-1] oddDigits = reversedCardNumber[0::2] evenDigits = reversedCardNumber[1::2] for count in range(0, len(oddDigits)): oddTotal += int(oddDigits[count]) for count in range(0, len(evenDigits)): evenDigit = int(evenDigits[count]) evenDigit = evenDigit * 2 if evenDigit > 9: evenDigit = evenDigit - 9 evenTotal += evenDigit total = oddTotal + evenTotal return (total % 10 == 0) # Skip non-files if ((file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) or (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS) or (file.isFile() == False)): return IngestModule.ProcessResult.OK inputStream = ReadContentInputStream(file) text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) if self.skipBinaries: if b'\x00' in text: return IngestModule.ProcessResult.OK initialCCPattern = '[1-6](?:\d[ -]*?){13,23}' possibleCCs = re.findall(initialCCPattern, text, re.IGNORECASE) self.fileFlagged = 0 if possibleCCs: for cc in possibleCCs: delim_regex = "\D+" cc = re.sub(delim_regex, '', cc) if luhnChecksumIsValid(cc): if self.fileFlagged == 0: self.filesFound += 1 art = file.newArtifact( BlackboardArtifact.ARTIFACT_TYPE. TSK_INTERESTING_FILE_HIT) att = BlackboardAttribute( BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME. getTypeID(), PaymentCardFileIngestModuleFactory.moduleName, "Files With Possible Payment Card Numbers") art.addAttribute(att) IngestServices.getInstance().fireModuleDataEvent( ModuleDataEvent( PaymentCardFileIngestModuleFactory.moduleName, BlackboardArtifact.ARTIFACT_TYPE. TSK_INTERESTING_FILE_HIT, None)) self.fileFlagged = 1 return IngestModule.ProcessResult.OK
def __getPayloadContent(self, oid, pid): print " * combined.py: oid='%s' pid='%s'" % (oid, pid) payload = self.__storage.getPayload(oid, pid) if payload is None: return "<div>Error: No content for '%s'</div>" % oid mimeType = payload.contentType contentStr = "" if mimeType.startswith("text/"): if mimeType == "text/html": contentStr = '<iframe class="iframe-preview" src="%s/download/%s"></iframe>' % \ (portalPath, oid) else: sw = StringWriter() sw.write("<pre>") IOUtils.copy(payload.getInputStream(), sw) sw.write("</pre>") sw.flush() contentStr = sw.toString() elif mimeType == "application/pdf" or mimeType.find("vnd.ms")>-1 or mimeType.find("vnd.oasis.opendocument.")>-1: # get the html version if exist... pid = os.path.splitext(pid)[0] + ".htm" print " * combined.py: pid=%s" % pid payload = self.__storage.getPayload(oid, pid) saxReader = SAXReader(False) try: document = saxReader.read(payload.getInputStream()) slideNode = document.selectSingleNode("//*[local-name()='body']") slideNode.setName("div") out = ByteArrayOutputStream() format = OutputFormat.createPrettyPrint() format.setSuppressDeclaration(True) format.setExpandEmptyElements(True) writer = XMLWriter(out, format) writer.write(slideNode) writer.close() contentStr = out.toString("UTF-8") except: traceback.print_exc() contentStr = "<p class=\"error\">No preview available</p>" elif mimeType.startswith("image/"): src = "%s/%s" % (oid, pid) contentStr = '<a class="image" href="%(src)s" style="max-width:98%%">' \ '<img src="%(src)s" style="max-width:100%%" /></a>' % { "src": pid } return contentStr
def getPayloadContent(self): mimeType = self.__mimeType print " * detail.py: payload content mimeType=%s" % mimeType contentStr = "" if mimeType.startswith("text/"): if mimeType == "text/html": contentStr = '<iframe class="iframe-preview" src="%s/%s/download/%s"></iframe>' % \ (contextPath, portalId, self.__oid) else: pid = self.__oid[self.__oid.rfind("/")+1:] payload = self.__storage.getPayload(self.__oid, pid) print " * detail.py: pid=%s payload=%s" % (pid, payload) if payload is not None: sw = StringWriter() sw.write("<pre>") IOUtils.copy(payload.getInputStream(), sw) sw.write("</pre>") sw.flush() contentStr = sw.toString() elif mimeType == "application/pdf" or mimeType.find("vnd")>-1 or mimeType.find("vnd.oasis.opendocument.")>-1: # get the html version if exist... pid = os.path.splitext(self.__pid)[0] + ".htm" print " * detail.py: pid=%s" % pid #contentStr = '<iframe class="iframe-preview" src="%s/%s/download/%s/%s"></iframe>' % \ # (contextPath, portalId, self.__oid, pid) payload = self.__storage.getPayload(self.__oid, pid) saxReader = SAXReader(Boolean.parseBoolean("false")) try: document = saxReader.read(payload.getInputStream()) except: traceback.print_exc() #slideNode = document.selectSingleNode("//div[@class='body']") slideNode = document.selectSingleNode("//*[local-name()='body']") #linkNodes = slideNode.selectNodes("//img") #contentStr = slideNode.asXML(); # encode character entities correctly out = ByteArrayOutputStream() format = OutputFormat.createPrettyPrint() format.setSuppressDeclaration(True) writer = XMLWriter(out, format) writer.write(slideNode) writer.close() contentStr = out.toString("UTF-8") return contentStr
def process(self, inputStream): splitFlowFile = session.create(self.parentFlowFile) writeCallback = WriteCallback() try: # To read content as a string: data = IOUtils.toString(inputStream, StandardCharsets.UTF_8) vars = data.split(',') down = vars[0] qtr = vars[1] month_day = vars[2] playtype_lag_index = vars[3] timesecs = vars[4] ydsnet = vars[5] ydstogo = vars[6] yrdline100 = vars[7] accessToken = splitFlowFile.getAttribute('accessToken') curl_input = [ 'curl', '-i', '-k', '-X', 'POST', 'https://172.26.228.121/v2/scoring/online/32b1d108-369d-42ce-966b-48d0a20a6b38', '-d', '{"fields":["down","qtr","month_day","PlayType_lag_index","TimeSecs","ydsnet","ydstogo","yrdline100"],"records":[[' + str(down) + ',' + str(qtr) + ',' + str(month_day) + ',"' + str(playtype_lag_index) + '",' + str(timesecs) + ',' + str(ydsnet) + ',' + str(ydstogo) + ',' + str(yrdline100) + ']]}', '-H', 'content-type:application/json', '-H', str('authorization: Bearer ' + str(accessToken)) ] result = subprocess.Popen(curl_input, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = result.communicate() prediction_results = json.loads( out.split('\r\n')[-1])['records'][0] predictions_yards_gained = str(prediction_results[-1]) #predictions_csv = str(predictions[0][0]) + ',' + str(predictions[0][1]) + ',' + str(predictions[1]) + ',' + str(predictions[2]) payload = ','.join([ str(record) for i, record in enumerate(prediction_results) if i != 8 ]) writeCallback.content = payload splitFlowFile = session.write(splitFlowFile, writeCallback) splitFlowFile = session.putAllAttributes( splitFlowFile, {'predictions_yards_gained': predictions_yards_gained}) except: pass session.transfer(splitFlowFile, REL_SUCCESS)
def process(self, inputStream, outputStream): try: # Read input FlowFile content input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) input_obj = json.loads(input_text) output_text = "{},{},{},{}".format(input_obj['name'],input_obj['value'],input_obj['message'],input_obj['timestamp']) outputStream.write(bytearray(output_text.encode('utf-8'))) except: traceback.print_exc(file=sys.stdout) raise
def __init__(self): basePath = portalId + "/" + pageName uri = request.getAttribute("RequestURI") print " * download.py: basePath=%s uri=%s" % (basePath, uri) uri = uri[len(basePath) + 1 :] if uri.find("%2F") == -1: slash = uri.rfind("/") else: slash = uri.find("/") oid = URLDecoder.decode(uri[:slash]) pid = URLDecoder.decode(uri[slash + 1 :]) print " * download.py: oid=%s pid=%s" % (oid, pid) payload = Services.storage.getPayload(oid, pid) filename = os.path.split(pid)[1] mimeType = payload.contentType if mimeType == "application/octet-stream": response.setHeader("Content-Disposition", "attachment; filename=%s" % filename) out = response.getOutputStream(payload.contentType) IOUtils.copy(payload.inputStream, out) out.close()
def __createOrUpdateArrayPayload(self, oid, payloadName, newObject): """ Create or update a payload object in storage defined by oid The content of this digital object is a JsonArray of objects payloadName: name of the payload newObject: new object to be appended, e.g. a JsonObject """ objList = self.__getPayloadJsonArray(oid, payloadName) objList.add(newObject) storedObj = self.Services.getStorage().getObject(oid) StorageUtils.createOrUpdatePayload(storedObj, payloadName,IOUtils.toInputStream(objList.toString(), "UTF-8"))
def getSourceSample(self, id, limit): # Get source payload object = self.services.getStorage().getObject(id) if object is not None: payload = object.getPayload(object.getSourceId()) # Read to a string if payload is not None: out = ByteArrayOutputStream() IOUtils.copy(payload.open(), out) payload.close() string = out.toString("UTF-8") # Return response if string is not None: if (len(string)) > limit: return string[0:limit] + "..." else: return string else: return ""
def __load(self, oid): template = """<div class="title" /><div class="page-toc" /><div class="body"><div>%s</div></div>""" print "Loading HTML preview for %s..." % oid if oid.startswith("blank-"): ## package = formData.get("package") ## return template % self.__getTableOfContents(package, oid) return template % ('<div class="blank-toc" id="%s-content"></div>' % oid) else: if oid.startswith("package-"): pipId = oid[oid.find("-")+1:] print "package pipId=%s" % pipId manifest = self.__readManifest(pipId) return template % ('<div class="package-description">%s</div><div class="blank-toc" id="%s-content"></div>' % (manifest.get("description"), oid)) else: object = Services.getStorage().getObject(oid) # get preview payload or source if no preview pid = self.__getPreviewPid(object) payload = object.getPayload(pid) mimeType = payload.getContentType() print "pid=%s mimeType=%s" % (pid, mimeType) isHtml = mimeType in ["text/html", "application/xhtml+xml"] if isHtml or mimeType.startswith("text/"): out = ByteArrayOutputStream() IOUtils.copy(payload.open(), out) content = out.toString("UTF-8") if content.find('class="body"'): ## assumes ICE content return content elif isHtml: return template % content elif mimeType == "text/plain": return template % ('<pre>%s</pre>' % content) else: return content elif mimeType.startswith("image/"): return template % ('<div rel="%s" class="image"><img src="%s" /></div><div class="clear"></div>' % (oid, pid)) else: return '<a href="%s" rel="%s">%s</a>' % (oid, mimeType, pid) payload.close() object.close()
def process(self, inputStream, outputStream): text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) obj = json.loads(text) newObj = { "Range": 5, "Rating": obj['rating']['primary']['value'], "SecondaryRatings": {} } for key, value in obj['rating'].iteritems(): if key != "primary": newObj['SecondaryRatings'][key] = {"Id": key, "Range": 5, "Value": value['value']} outputStream.write(bytearray(json.dumps(newObj, indent=4).encode('utf-8')))
def process(self, inputStream, outputStream): try: # Read input FlowFile content input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8) input_obj = json.loads(input_text) # Transform content output_obj = input_obj # Write output content outputStream.write(StringUtil.(output_obj['values'])) except: traceback.print_exc(file=sys.stdout) raise
def __createPackage(self, outputFile=None): title = self.__manifest.getString(None, "title") manifest = self.__createManifest() context = JAXBContext.newInstance("au.edu.usq.fascinator.ims") m = context.createMarshaller() m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, True) writer = StringWriter() jaxbElem = ObjectFactory.createManifest(ObjectFactory(), manifest) m.marshal(jaxbElem, writer) writer.close() if outputFile is not None: print "writing to %s..." % outputFile out = FileOutputStream(outputFile) else: print "writing to http output stream..." filename = urllib.quote(title.replace(" ", "_")) response.setHeader("Content-Disposition", "attachment; filename=%s.zip" % filename) out = response.getOutputStream("application/zip") zipOut = ZipOutputStream(out) zipOut.putNextEntry(ZipEntry("imsmanifest.xml")) IOUtils.write(writer.toString(), zipOut) zipOut.closeEntry() oidList = self.__manifest.search("id") for oid in oidList: obj = Services.getStorage().getObject(oid) for pid in obj.getPayloadIdList(): payload = obj.getPayload(pid) if not PayloadType.Annotation.equals(payload.getType()): zipOut.putNextEntry(ZipEntry("resources/%s/%s" % (oid, pid))) IOUtils.copy(payload.open(), zipOut) payload.close() zipOut.closeEntry() obj.close() zipOut.close() out.close()
def __init__(self): basePath = portalId + "/" + pageName uri = URLDecoder.decode(request.getAttribute("RequestURI")) uri = uri[len(basePath)+1:] oid, pid, payload = self.__resolve(uri) print """ * download.py: uri='%s' oid='%s' pid='%s' payload='%s'""" % (uri, oid, pid, payload) if payload is not None: filename = os.path.split(pid)[1] mimeType = payload.contentType if mimeType == "application/octet-stream": response.setHeader("Content-Disposition", "attachment; filename=%s" % filename) out = response.getOutputStream(payload.contentType) IOUtils.copy(payload.inputStream, out) out.close() else: response.setStatus(404) writer = response.getPrintWriter("text/plain") writer.println("Not found: uri='%s'" % uri) writer.close()