예제 #1
0
    def runJob(sparkSession, s3confPath, s3filePath):
        spark = sparkSession
        logStatus = startLogStatus(s3filePath)
        dfCount = 0
        try:
            logging.info(
                "Start batch Coptero ROD for s3confPath:" + s3confPath + "--------------------------------------")
            validatedRecords = ValidationsDsl.validateTickets(s3filePath,
                                                              S3FilesDsl.readFileSchema(s3filePath,
                                                                                        getClosedSchema(s3filePath),
                                                                                        spark),
                                                                                        spark, s3confPath)

            logging.info("validatedRecords.count().." + str(validatedRecords.count()))
            ticketToCloseDS = detailClosedColumns(validatedRecords, spark)
            logging.info("ticketToCloseDS.count().." + str(ticketToCloseDS.count()))

            esIndex = ticketToCloseDS\
                .withColumn("open", F.lit(Constants.OPEN_NO))\
                .withColumn("file", F.lit(s3filePath))\
                .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id"))

            logging.info("Persisting ES index..")
            dfCount = esIndex.count()
            logging.info("indexDataFrame.count.." + str(dfCount))
            
            try:
                ElasticDsl.writeMappedESIndex(esIndex, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", s3confPath)
            except Exception as e:
                message = str(e)
                if message.find("index_closed_exception"):
                    raise e
                else:
                    # TODO saveToEs {partitioned} works fine but ends with exception ?¿
                    logging.info("catched index_closed_exception: " + str(e))

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCount
            logStatus.exception = ""
            logStatus.end_date = ""
            logging.info("End batch Coptero ROD ----------------------------------------------------")
        except Exception as e:
            logStatus = copy.deepcopy(logStatus)
            logStatus.success = False
            logStatus.count = dfCount
            logStatus.exception = str(e)
            logStatus.end_date = ""
            logging.error("catched: " + str(e))
            raise e
        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception,
                                              logStatus.start_date, logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(copy.deepcopy(logStatus_data))
            ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", s3confPath)
예제 #2
0
    def runJob(sparkSession, s3confPath, s3filePath):
        spark = sparkSession
        logStatus = startLogStatus(s3filePath)
        dfCount = 0
        try:
            logging.info("Start batch Coptero ROD for s3confPath: " +
                         s3confPath + "--------------------------------------")
            validatedRecords = ValidationsDsl.validateTickets(
                s3filePath,
                S3FilesDsl.readFileSchema(s3filePath, getWISchema(s3filePath),
                                          spark), spark, s3confPath)

            indexAgentSmcCluster = getAgentSmcCluster(validatedRecords,
                                                      s3confPath, spark)

            indexWithRelations = getRelations(indexAgentSmcCluster, s3confPath,
                                              spark)

            partitioned = indexWithRelations \
                .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \
                .withColumn("file", F.lit(s3filePath)) \
                .withColumn("work_info_category", Utils.getWorkInfoCategory("work_info_notes"))

            dfCount = partitioned.count()
            logging.info("Persisting ES index..")

            logging.info("indexWorkInfoDataFrame.count().." + str(dfCount))
            try:
                ElasticDsl.writeMappedESIndex(
                    partitioned, "copt-rod-wif-{ticket_max_value_partition}",
                    "instanceid", s3confPath)
            except Exception as e:
                message = str(e)
                if message.find("index_closed_exception"):
                    raise e
                else:
                    # TODO saveToEs {partitioned} works fine but ends with exception ?¿
                    logging.info("catched index_closed_exception: " + str(e))

            AlertDsl.checkCount("copt-rod-wif*", s3filePath, dfCount, spark,
                                s3confPath)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCount
            logStatus.exception = ""
            logStatus.end_date = ""
            logging.info(
                "End batch Coptero ROD ----------------------------------------------------"
            )
        except Exception as e:
            logStatus = copy.deepcopy(logStatus)
            logStatus.success = False
            logStatus.count = dfCount
            logStatus.exception = str(e)
            logStatus.end_date = ""
            logging.error("catched: " + str(e))
            raise e
        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count,
                                              logStatus.success,
                                              logStatus.exception,
                                              logStatus.start_date,
                                              logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(
                copy.deepcopy(logStatus_data))
예제 #3
0
    def runJob(sparkSession, s3confPath, s3filePath):
        spark = sparkSession
        conf = s3confPath
        logStatus = startLogStatus(s3filePath)
        dfCountRelation = 0
        dfCountIncid = 0

        try:
            logging.info("Start batch Coptero ROD for s3confPath: " +
                         s3confPath + " -------------------------------------")
            validatedRecords = ValidationsDsl.validateTickets(
                s3filePath,
                S3FilesDsl.readFileSchema(s3filePath,
                                          getRelationSchema(s3filePath),
                                          spark), spark, conf)

            rodTicketRelation1 = relationColumns(validatedRecords, spark)
            rodTicketRelation = rodTicketRelation1.withColumn(
                "relation_id",
                F.concat(rodTicketRelation1["ticket_id"], F.lit('-'),
                         rodTicketRelation1["related_ticket_id"]))

            esIndexRel = rodTicketRelation.select(
                'relation_id', 'ticket_id', 'ticket_type', 'related_ticket_id',
                'related_ticket_type', 'association_type', 'submit_date',
                'relation_summary', 'status', 'submitter', 'instanceid')
            partitioned = esIndexRel \
                .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \
                .withColumn("file", F.lit(s3filePath))

            dfCountRelation = partitioned.count()

            logging.info("Persisting ES index..")
            logging.info("indexRelationDataFrame.count().." +
                         str(dfCountRelation))

            try:
                ElasticDsl.writeMappedESIndex(
                    partitioned, "copt-rod-rel-{ticket_max_value_partition}",
                    "relation_id", conf)
            except Exception as ex:
                e = str(ex)
                if e.find("index_closed_exception"):
                    logging.info("catched index_closed_exception: " + e)
                else:
                    raise ex

            AlertDsl.checkCount("copt-rod-rel-*", s3filePath, dfCountRelation,
                                spark, conf)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCountRelation
            logStatus.exception = ""
            logStatus.end_date = ""

            persistRelations(esIndexRel, conf, spark)
            '''relationsDF = esIndexRel \
                .filter(esIndexRel.ticket_type == "Incident") \
                .groupBy("ticket_id") \
                .agg(F.collect_list("related_ticket_id").alias("relations")) \
                .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \
                .withColumn("file", F.lit(s3filePath))

            # TODO writeMappedESIndex CRQ Y PBI

            dfCountIncid = relationsDF.count()

            logging.info("Persisting ES index..")
            logging.info("relationsDF.count().."+ str(dfCountIncid))

            try:
                ElasticDsl.writeMappedESIndex(
                    relationsDF, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", conf)
            except Exception as ex:
                e = str(ex)
                if e.find("index_closed_exception"):
                    logging.info("catched index_closed_exception: " + e)
                else:
                    raise ex

            AlertDsl.checkCount("copt-rod-closed-*", s3filePath, dfCountIncid, spark, conf)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCountIncid
            logStatus.exception = ""
            logStatus.end_date = ""'''

            logging.info(
                "End batch Coptero ROD ----------------------------------------------------"
            )
        except Exception as ex:
            e = str(ex)
            logStatus.success = False
            logStatus.count = 0
            logStatus.exception = str(e)
            logStatus.end_date = ""
            logging.info("catched: " + e)
            raise ex
        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count,
                                              logStatus.success,
                                              logStatus.exception,
                                              logStatus.start_date,
                                              logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(
                copy.deepcopy(logStatus_data))
            ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)
예제 #4
0
    def buildESIndex(detailType, detail, s3confPath, s3filePath, spark):
        sqlContext = SQLContext(spark)
        # TODO.confinsteadof.json        val
        confJson = S3FilesDsl.readConfigJson(s3confPath)

        rodTicketANTags = AdminNumberTags.antagsColumns(
            S3FilesDsl.readFile(confJson.tags_admin_path, spark), spark)

        parquetPath = confJson.fast_parquet_path
        rodPostgreAdminNumber = sqlContext.read.parquet(parquetPath)

        logging.info("FAST joins..")
        networkFast = sqlContext.read.parquet(confJson.fast_network_parquet_path)

        logging.info("common joins..")

        # TODO: añadir import de utils.constantes
        # TODO: comprobar parametros que se pasan a los metodos de Utils
        common3 = joinMasterEntities(detail, spark)

        common2 = common3.join(rodPostgreAdminNumber, ["admin_number"], "left")

        common1 = Utils.fillEmptyFastColumns(common2)

        common = common1.join(networkFast, ["admin_number"], "left"). \
            withColumn("networkinfo", Utils.networkNestedObject("fast_customer", "fast_end_customer",
                                                                "router_interface_vendor_type_set")). \
            drop("router_interface_vendor_type_set"). \
            join(rodTicketANTags, ["admin_number"], "left"). \
            withColumn("open", F.when(common1.status_desc.isin(Constants.openStatus), Constants.OPEN_YES).
                       otherwise(F.when(common1.status_desc.isin(Constants.notOpenStatus), Constants.OPEN_NO).
                                 otherwise(Constants.EMPTY_STRING))). \
            withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")). \
            withColumn("admin_number_escaped", Utils.urlWhitespaces("admin_number")). \
            withColumn("fast_max_resolution_time", Utils.validateNumeric("fast_max_resolution_time")). \
            withColumn("file", F.lit(s3filePath)). \
            fillna(Constants.EMPTY_STRING, ["assigned_agent"])

        if detailType == "helpdesk":
            rodTicketReportedSource = getReportedSource(spark)
            operationalManager = getOperationalManager(confJson.operational_path, spark)
            opTags = OperatingTags.operatingTagsColumns(S3FilesDsl.readFile(confJson.tags_operating_path, spark))
            customer = Customer.customerColumns(S3FilesDsl.readFile(confJson.customer_path, spark), spark)
            endCustomer = EndCustomer.endCustomerColumns(S3FilesDsl.readFile(confJson.end_customer_path, spark), spark)

            index1 = common \
                .join(rodTicketReportedSource, ["reported_source_id"], "left") \
                .drop("reported_source_id") \
                .join(operationalManager, ["operating_company_name", "operating_le"], "left") \
                .na.fill(Constants.EMPTY_STRING, ["operational_manager"]) \
                .join(opTags, ["operating_company_name", "operating_le"], "left") \
                .withColumn("tags", Utils.mergeArrays("tags", "operating_tags")) \
                .drop("operating_tags") \
                .join(customer, ["operating_company_name"], "left") \
                .fillna(Constants.EMPTY_STRING, ["customer_correct"]) \
                .join(endCustomer, ["operating_le"], "left") \
                .fillna(Constants.EMPTY_STRING, ["end_customer_correct"]) \
                .withColumn("end_customer_correct",
                            Utils.emptyEndCustomerCorrect("customer_correct", "end_customer_correct")) \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("end_user_country", Utils.kibanaCountry("end_user_country")) \
                .withColumn("smc_cluster", Utils.smcClusterFromGroup("assigned_support_group")) \
                .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name")) \
                .withColumn("product_categorization_all_tiers",
                            Utils.concat3Columns("product_categorization_tier_1", "product_categorization_tier_2",
                                                 "product_categorization_tier_3")) \
                .withColumn("closure_categorization_all_tiers",
                            Utils.concat3Columns("closure_categorization_tier_1", "closure_categorization_tier_2",
                                                 "closure_categorization_tier_3")) \
                .withColumn("operational_categorization_all_tiers",
                            Utils.concat3Columns("operational_categorization_tier_1",
                                                 "operational_categorization_tier_2",
                                                 "operational_categorization_tier_3")) \
                .withColumnRenamed("reported_source_desc", "reported_source_id")

            index = FastDsl.fastCircuitFields(index1, confJson, spark)

        elif detailType == "problems":
            index1 = common \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name"))
            index = FastDsl.fastCircuitFields(index1, confJson, spark)

        elif detailType == "changes":
            rodTicketReportedSource = getReportedSource(spark)
            index = common \
                .join(rodTicketReportedSource, ["reported_source_id"], "left") \
                .drop("reported_source_id") \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("company_country", Utils.kibanaCountry("company_country")) \
                .withColumnRenamed("reported_source_desc", "reported_source_id")

        # EL USUARIO SOLICITA QUE LAS DESCRIPCIONES DE LOS MAESTROS SE RENOMBREN COMO _id
        indexRenamed = index \
            .withColumnRenamed("status_desc", "status_id") \
            .withColumnRenamed("substatus_desc", "substatus_id") \
            .withColumnRenamed("urgency_desc", "urgency_id") \
            .withColumnRenamed("priority_desc", "priority_id") \
            .withColumnRenamed("impact_desc", "impact_id")

        return indexRenamed