def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession logStatus = startLogStatus(s3filePath) dfCount = 0 try: logging.info( "Start batch Coptero ROD for s3confPath:" + s3confPath + "--------------------------------------") validatedRecords = ValidationsDsl.validateTickets(s3filePath, S3FilesDsl.readFileSchema(s3filePath, getClosedSchema(s3filePath), spark), spark, s3confPath) logging.info("validatedRecords.count().." + str(validatedRecords.count())) ticketToCloseDS = detailClosedColumns(validatedRecords, spark) logging.info("ticketToCloseDS.count().." + str(ticketToCloseDS.count())) esIndex = ticketToCloseDS\ .withColumn("open", F.lit(Constants.OPEN_NO))\ .withColumn("file", F.lit(s3filePath))\ .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) logging.info("Persisting ES index..") dfCount = esIndex.count() logging.info("indexDataFrame.count.." + str(dfCount)) try: ElasticDsl.writeMappedESIndex(esIndex, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", s3confPath) except Exception as e: message = str(e) if message.find("index_closed_exception"): raise e else: # TODO saveToEs {partitioned} works fine but ends with exception ?¿ logging.info("catched index_closed_exception: " + str(e)) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCount logStatus.exception = "" logStatus.end_date = "" logging.info("End batch Coptero ROD ----------------------------------------------------") except Exception as e: logStatus = copy.deepcopy(logStatus) logStatus.success = False logStatus.count = dfCount logStatus.exception = str(e) logStatus.end_date = "" logging.error("catched: " + str(e)) raise e finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame(copy.deepcopy(logStatus_data)) ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", s3confPath)
def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession logStatus = startLogStatus(s3filePath) dfCount = 0 try: logging.info("Start batch Coptero ROD for s3confPath: " + s3confPath + "--------------------------------------") validatedRecords = ValidationsDsl.validateTickets( s3filePath, S3FilesDsl.readFileSchema(s3filePath, getWISchema(s3filePath), spark), spark, s3confPath) indexAgentSmcCluster = getAgentSmcCluster(validatedRecords, s3confPath, spark) indexWithRelations = getRelations(indexAgentSmcCluster, s3confPath, spark) partitioned = indexWithRelations \ .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \ .withColumn("file", F.lit(s3filePath)) \ .withColumn("work_info_category", Utils.getWorkInfoCategory("work_info_notes")) dfCount = partitioned.count() logging.info("Persisting ES index..") logging.info("indexWorkInfoDataFrame.count().." + str(dfCount)) try: ElasticDsl.writeMappedESIndex( partitioned, "copt-rod-wif-{ticket_max_value_partition}", "instanceid", s3confPath) except Exception as e: message = str(e) if message.find("index_closed_exception"): raise e else: # TODO saveToEs {partitioned} works fine but ends with exception ?¿ logging.info("catched index_closed_exception: " + str(e)) AlertDsl.checkCount("copt-rod-wif*", s3filePath, dfCount, spark, s3confPath) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCount logStatus.exception = "" logStatus.end_date = "" logging.info( "End batch Coptero ROD ----------------------------------------------------" ) except Exception as e: logStatus = copy.deepcopy(logStatus) logStatus.success = False logStatus.count = dfCount logStatus.exception = str(e) logStatus.end_date = "" logging.error("catched: " + str(e)) raise e finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame( copy.deepcopy(logStatus_data))
def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession conf = s3confPath logStatus = startLogStatus(s3filePath) dfCountRelation = 0 dfCountIncid = 0 try: logging.info("Start batch Coptero ROD for s3confPath: " + s3confPath + " -------------------------------------") validatedRecords = ValidationsDsl.validateTickets( s3filePath, S3FilesDsl.readFileSchema(s3filePath, getRelationSchema(s3filePath), spark), spark, conf) rodTicketRelation1 = relationColumns(validatedRecords, spark) rodTicketRelation = rodTicketRelation1.withColumn( "relation_id", F.concat(rodTicketRelation1["ticket_id"], F.lit('-'), rodTicketRelation1["related_ticket_id"])) esIndexRel = rodTicketRelation.select( 'relation_id', 'ticket_id', 'ticket_type', 'related_ticket_id', 'related_ticket_type', 'association_type', 'submit_date', 'relation_summary', 'status', 'submitter', 'instanceid') partitioned = esIndexRel \ .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \ .withColumn("file", F.lit(s3filePath)) dfCountRelation = partitioned.count() logging.info("Persisting ES index..") logging.info("indexRelationDataFrame.count().." + str(dfCountRelation)) try: ElasticDsl.writeMappedESIndex( partitioned, "copt-rod-rel-{ticket_max_value_partition}", "relation_id", conf) except Exception as ex: e = str(ex) if e.find("index_closed_exception"): logging.info("catched index_closed_exception: " + e) else: raise ex AlertDsl.checkCount("copt-rod-rel-*", s3filePath, dfCountRelation, spark, conf) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCountRelation logStatus.exception = "" logStatus.end_date = "" persistRelations(esIndexRel, conf, spark) '''relationsDF = esIndexRel \ .filter(esIndexRel.ticket_type == "Incident") \ .groupBy("ticket_id") \ .agg(F.collect_list("related_ticket_id").alias("relations")) \ .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \ .withColumn("file", F.lit(s3filePath)) # TODO writeMappedESIndex CRQ Y PBI dfCountIncid = relationsDF.count() logging.info("Persisting ES index..") logging.info("relationsDF.count().."+ str(dfCountIncid)) try: ElasticDsl.writeMappedESIndex( relationsDF, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", conf) except Exception as ex: e = str(ex) if e.find("index_closed_exception"): logging.info("catched index_closed_exception: " + e) else: raise ex AlertDsl.checkCount("copt-rod-closed-*", s3filePath, dfCountIncid, spark, conf) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCountIncid logStatus.exception = "" logStatus.end_date = ""''' logging.info( "End batch Coptero ROD ----------------------------------------------------" ) except Exception as ex: e = str(ex) logStatus.success = False logStatus.count = 0 logStatus.exception = str(e) logStatus.end_date = "" logging.info("catched: " + e) raise ex finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame( copy.deepcopy(logStatus_data)) ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)
def buildESIndex(detailType, detail, s3confPath, s3filePath, spark): sqlContext = SQLContext(spark) # TODO.confinsteadof.json val confJson = S3FilesDsl.readConfigJson(s3confPath) rodTicketANTags = AdminNumberTags.antagsColumns( S3FilesDsl.readFile(confJson.tags_admin_path, spark), spark) parquetPath = confJson.fast_parquet_path rodPostgreAdminNumber = sqlContext.read.parquet(parquetPath) logging.info("FAST joins..") networkFast = sqlContext.read.parquet(confJson.fast_network_parquet_path) logging.info("common joins..") # TODO: añadir import de utils.constantes # TODO: comprobar parametros que se pasan a los metodos de Utils common3 = joinMasterEntities(detail, spark) common2 = common3.join(rodPostgreAdminNumber, ["admin_number"], "left") common1 = Utils.fillEmptyFastColumns(common2) common = common1.join(networkFast, ["admin_number"], "left"). \ withColumn("networkinfo", Utils.networkNestedObject("fast_customer", "fast_end_customer", "router_interface_vendor_type_set")). \ drop("router_interface_vendor_type_set"). \ join(rodTicketANTags, ["admin_number"], "left"). \ withColumn("open", F.when(common1.status_desc.isin(Constants.openStatus), Constants.OPEN_YES). otherwise(F.when(common1.status_desc.isin(Constants.notOpenStatus), Constants.OPEN_NO). otherwise(Constants.EMPTY_STRING))). \ withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")). \ withColumn("admin_number_escaped", Utils.urlWhitespaces("admin_number")). \ withColumn("fast_max_resolution_time", Utils.validateNumeric("fast_max_resolution_time")). \ withColumn("file", F.lit(s3filePath)). \ fillna(Constants.EMPTY_STRING, ["assigned_agent"]) if detailType == "helpdesk": rodTicketReportedSource = getReportedSource(spark) operationalManager = getOperationalManager(confJson.operational_path, spark) opTags = OperatingTags.operatingTagsColumns(S3FilesDsl.readFile(confJson.tags_operating_path, spark)) customer = Customer.customerColumns(S3FilesDsl.readFile(confJson.customer_path, spark), spark) endCustomer = EndCustomer.endCustomerColumns(S3FilesDsl.readFile(confJson.end_customer_path, spark), spark) index1 = common \ .join(rodTicketReportedSource, ["reported_source_id"], "left") \ .drop("reported_source_id") \ .join(operationalManager, ["operating_company_name", "operating_le"], "left") \ .na.fill(Constants.EMPTY_STRING, ["operational_manager"]) \ .join(opTags, ["operating_company_name", "operating_le"], "left") \ .withColumn("tags", Utils.mergeArrays("tags", "operating_tags")) \ .drop("operating_tags") \ .join(customer, ["operating_company_name"], "left") \ .fillna(Constants.EMPTY_STRING, ["customer_correct"]) \ .join(endCustomer, ["operating_le"], "left") \ .fillna(Constants.EMPTY_STRING, ["end_customer_correct"]) \ .withColumn("end_customer_correct", Utils.emptyEndCustomerCorrect("customer_correct", "end_customer_correct")) \ .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \ .withColumn("end_user_country", Utils.kibanaCountry("end_user_country")) \ .withColumn("smc_cluster", Utils.smcClusterFromGroup("assigned_support_group")) \ .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name")) \ .withColumn("product_categorization_all_tiers", Utils.concat3Columns("product_categorization_tier_1", "product_categorization_tier_2", "product_categorization_tier_3")) \ .withColumn("closure_categorization_all_tiers", Utils.concat3Columns("closure_categorization_tier_1", "closure_categorization_tier_2", "closure_categorization_tier_3")) \ .withColumn("operational_categorization_all_tiers", Utils.concat3Columns("operational_categorization_tier_1", "operational_categorization_tier_2", "operational_categorization_tier_3")) \ .withColumnRenamed("reported_source_desc", "reported_source_id") index = FastDsl.fastCircuitFields(index1, confJson, spark) elif detailType == "problems": index1 = common \ .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \ .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name")) index = FastDsl.fastCircuitFields(index1, confJson, spark) elif detailType == "changes": rodTicketReportedSource = getReportedSource(spark) index = common \ .join(rodTicketReportedSource, ["reported_source_id"], "left") \ .drop("reported_source_id") \ .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \ .withColumn("company_country", Utils.kibanaCountry("company_country")) \ .withColumnRenamed("reported_source_desc", "reported_source_id") # EL USUARIO SOLICITA QUE LAS DESCRIPCIONES DE LOS MAESTROS SE RENOMBREN COMO _id indexRenamed = index \ .withColumnRenamed("status_desc", "status_id") \ .withColumnRenamed("substatus_desc", "substatus_id") \ .withColumnRenamed("urgency_desc", "urgency_id") \ .withColumnRenamed("priority_desc", "priority_id") \ .withColumnRenamed("impact_desc", "impact_id") return indexRenamed