Python S3FilesDsl примеры, Dsl.S3FilesDsl.S3FilesDsl Python примеры использования

Пример #1

0

Показать файл

def joinMasterEntities(df, spark):
    fileStatus = S3FilesDsl.readFile("C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_STATUS.txt",
                                     spark)
    fileSubstatus = S3FilesDsl.readFile(
        "C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190923_TICKET_SUBSTATUS.txt", spark)
    fileUrgency = S3FilesDsl.readFile("C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_URGENCY.txt",
                                      spark)
    filePriority = S3FilesDsl.readFile(
        "C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_PRIORITY.txt", spark)
    fileImpact = S3FilesDsl.readFile("C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_IMPACT.txt",
                                     spark)
    rodTicketStatus = TicketStatus.statusColumns(fileStatus)
    rodTicketSubstatus = TicketSubstatus.substatusColumns(fileSubstatus)
    rodTicketUrgency = TicketUrgency.urgencyColumns(fileUrgency)
    rodTicketPriority = TicketPriority.priorityColumns(filePriority)
    rodTicketImpact = TicketImpact.impactColumns(fileImpact)

    df2 = df.join(rodTicketStatus, ["status_id"], "left"). \
        join(rodTicketSubstatus, ["substatus_id", "status_id"], "left"). \
        drop("status_id"). \
        drop("substatus_id"). \
        join(rodTicketUrgency, ["urgency_id"], "left"). \
        drop("urgency_id"). \
        join(rodTicketPriority, ["priority_id"], "left"). \
        drop("priority_id"). \
        join(rodTicketImpact, ["impact_id"], "left"). \
        drop("impact_id")

    return df2

Пример #2

0

Показать файл

def persistRelations(esIndexRel, s3confPath, spark):
    sqlContext = SQLContext(spark)

    relationsDF = esIndexRel.select("ticket_id", "related_ticket_id")
    total = None

    try:
        total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)
    except Exception as e:
        message = str(e)
        if message.find("Path does not exist"):
            raise e
        else:
            logging.info("catched Path does not exist (first job execution): " + str(e))

            relationsDF \
                .repartition(1) \
                .write \
                .mode("overwite") \
                .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)

            total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)

    total.cache()
    logging.info("total.count---------------------------------------------" + str(total.count()))

    result = total \
        .unionByName(relationsDF) \
        .distinct() \
        .repartition(1) \
        .write \
        .mode("overwrite") \
        .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)

    return result

Пример #3

0

Показать файл

def removeClosedAgentSmc(esIndex, s3confPath, spark):
    sqlContext = SQLContext(spark)
    closed = esIndex.select("ticket_id", "assigned_agent", "smc_cluster", "reported_source_id").distinct()

    auxOuterJoin = closed.select("ticket_id")

    total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    total.cache()

    logging.info("totalWithoutClosed.count---------------------------------------------" + str(total.count()))

    totalWithoutClosed = total \
        .join(auxOuterJoin, ["ticket_id"], "left") \
        .where(auxOuterJoin["ticket_id"].isNull())

    result = totalWithoutClosed \
        .repartition(1) \
        .write \
        .mode("overwrite") \
        .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    logging.info(
        "totalWithoutClosed.count---------------------------------------------" + str(totalWithoutClosed.count()))
    return result

Пример #4

0

Показать файл

    def adminNumberTags(tagsAdminPath):
        rodTicketANTags = AdminNumberTags.antagsColumns(S3FilesDsl.readFile())

        ticketANTags = rodTicketANTags \
            .groupBy("admin_number") \
            .agg(F.concat_ws(",", F.collect_set("tags").alias("tags"))) \
            .withColumn("tags", Utils.stringToArray("tags"))
        return ticketANTags

Пример #5

0

Показать файл

    def runJob(sparkSession, s3confPath, s3filePath):
        spark = sparkSession
        logStatus = startLogStatus(s3filePath)
        dfCount = 0
        try:
            logging.info(
                "Start batch Coptero ROD for s3confPath:" + s3confPath + "--------------------------------------")
            validatedRecords = ValidationsDsl.validateTickets(s3filePath,
                                                              S3FilesDsl.readFileSchema(s3filePath,
                                                                                        getClosedSchema(s3filePath),
                                                                                        spark),
                                                                                        spark, s3confPath)

            logging.info("validatedRecords.count().." + str(validatedRecords.count()))
            ticketToCloseDS = detailClosedColumns(validatedRecords, spark)
            logging.info("ticketToCloseDS.count().." + str(ticketToCloseDS.count()))

            esIndex = ticketToCloseDS\
                .withColumn("open", F.lit(Constants.OPEN_NO))\
                .withColumn("file", F.lit(s3filePath))\
                .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id"))

            logging.info("Persisting ES index..")
            dfCount = esIndex.count()
            logging.info("indexDataFrame.count.." + str(dfCount))
            
            try:
                ElasticDsl.writeMappedESIndex(esIndex, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", s3confPath)
            except Exception as e:
                message = str(e)
                if message.find("index_closed_exception"):
                    raise e
                else:
                    # TODO saveToEs {partitioned} works fine but ends with exception ?¿
                    logging.info("catched index_closed_exception: " + str(e))

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCount
            logStatus.exception = ""
            logStatus.end_date = ""
            logging.info("End batch Coptero ROD ----------------------------------------------------")
        except Exception as e:
            logStatus = copy.deepcopy(logStatus)
            logStatus.success = False
            logStatus.count = dfCount
            logStatus.exception = str(e)
            logStatus.end_date = ""
            logging.error("catched: " + str(e))
            raise e
        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception,
                                              logStatus.start_date, logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(copy.deepcopy(logStatus_data))
            ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", s3confPath)

Пример #6

0

Показать файл

 def writeESCorruptRecordsIndex(index, name, conf):
     prefix = S3FilesDsl.readConfigJson(conf).elastic_env_index_prefix
     config = {
         "elastic_nodes": "127.0.0.1",
         "elastic_port": "9200",
         "elastic_user": "******",
         "elastic_pass": "******"
     }
     toElastic(config, index, addId,
               prefix + name + datetime.now().strftime("%Y"))
     '''index.write.format(

Пример #7

0

Показать файл

def fullPersistAgentSmc(esIndex, s3confPath):
    preload = esIndex.select("ticket_id", "assigned_agent", "smc_cluster", "reported_source_id")

    result = preload \
        .repartition(1) \
        .write \
        .mode("overwrite") \
        .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    logging.info("total.count---------------------------------------------" + str(preload.count()))
    return result

Пример #8

0

Показать файл

def persistAgentSmc(esIndex, s3confPath, spark):
    sqlContext = SQLContext(spark)
    newOrUpdated = esIndex.select("ticket_id", "assigned_agent", "smc_cluster", "reported_source_id").distinct()
    auxOuterJoin = newOrUpdated.select("ticket_id")
    total = None

    try:
        total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)
    except Exception as e:
        message = str(e)
        if message.find("Path does not exist"):
            raise e
        else:
            logging.info("catched Path does not exist (first job execution): " + str(e))

            newOrUpdated \
                .repartition(1) \
                .write \
                .mode("overwite") \
                .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

            total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    total.cache()

    logging.info("total.count---------------------------------------------" + str(total.count()))

    totalWithoutNewOrUpdated = total \
        .join(auxOuterJoin, ["ticket_id"], "left") \
        .where(auxOuterJoin["ticket_id"].isNull())

    updatedToParquet = totalWithoutNewOrUpdated.unionByName(newOrUpdated)

    result = updatedToParquet \
        .repartition(1) \
        .write \
        .mode("overwrite") \
        .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)
    return result

Пример #9

0

Показать файл

def getRelations(esIndex, s3confPath, spark):
    sqlContext = SQLContext(spark)

    parquet = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    agents = parquet \
        .filter(parquet.reported_source_id != "Vendor") \
        .withColumnRenamed("ticket_id", "agent_ticket_id")

    relations = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)

    relatedAgents = relations \
        .join(agents, relations.related_ticket_id == agents.agent_ticket_id, "inner") \
        .groupBy("ticket_id") \
        .agg(F.collect_set("assigned_agent").alias("assignee"),
             F.collect_set("smc_cluster").alias("smc"))

    result = esIndex \
        .join(relatedAgents, ["ticket_id"], "left") \
        .withColumn("assignee", Utils.addToArray("assigned_agent", "assignee")) \
        .withColumn("smc", Utils.addToArray("smc_cluster", "smc")) \
        .drop("assigned_agent", "smc_cluster")

    return result

Пример #10

0

Показать файл

 def checkCount(indexName, fileName, dfCount, spark, conf):
     prefix = S3FilesDsl.readConfigJson(conf).elastic_env_index_prefix
     sqlContext = SQLContext(spark)
     logging.info('dfCount.. ' + str(dfCount))
     path = fileName.replace(':', '\\:').replace("/", "\\/")
     qResultDF1 = sqlContext.read \
         .option("es.resource",prefix + indexName) \
         .option("es.query", "?q=file:\''" + path + " '\'") \
         .format("org.elasticsearch.spark.sql") \
         .load()
     # ¿Equivalente de qResultDF = spark.esDF("${indexName}", "?q=file:\"" + path + "\"").select("ticket_id") ?
     qResultDF = qResultDF1.select("ticket_id")
     qResultDF.cache()
     queryCount = qResultDF.count()
     qResultDF.unpersist()
     logging.info("queryCount.. " + str(queryCount))
     if dfCount != queryCount:
         alertDataFrame = sqlContext.createDataFrame(
             [(fileName, dfCount, queryCount, datetime.now().strftime("%Y%m%d%H%M%S"))],
             ["file", "expected_count", "result_count", "date"])
         ElasticDsl.writeESAlertsIndex(alertDataFrame, conf)

Пример #11

0

Показать файл

    def runJob(sparkSession, s3confPath, s3filePath):
        spark = sparkSession
        conf = s3confPath
        logStatus = startLogStatus(s3filePath)
        dfCount = 0
        try:
            logging.info("Start batch Coptero ROD for s3confPath:" +
                         s3confPath + "--------------------------------------")
            validatedRecords = ValidationsDsl.validateTickets(
                s3filePath,
                S3FilesDsl.readFileSchema(s3filePath,
                                          getIncidSchema(s3filePath), spark),
                spark, conf)

            logging.info("fileDetailHelpdesk.count().." +
                         str(validatedRecords.count()))
            rodTicketDetailHelpdesk = TicketDetailHelpdesk.detailHPDColumns(
                validatedRecords)
            logging.info("rodTicketDetailHelpdesk.count.." +
                         str(rodTicketDetailHelpdesk.count()))

            calendar = datetime.now().strftime("%Y%m%d%H%M%S")

            filtered = rodTicketDetailHelpdesk. \
                filter((rodTicketDetailHelpdesk.status_id == "5") | (rodTicketDetailHelpdesk.status_id == "6")). \
                filter(rodTicketDetailHelpdesk.last_modification_date < calendar)

            #TODO
            #valfileStatus: DataFrame = readFile(getAuxTablePath("TICKET_STATUS"))
            #val rodTicketStatus: Dataset[TicketStatus] = statusColumns(fileStatus)
            # rodTicketDetailHelpdesk
            #.join(rodTicketStatus, Seq("status_id"), "left")
            #.filter($"status_desc" == = "Closed" | | $"status_desc" == = "Cancelled")
            #.drop("status_desc")
            #filter($"last_modification_date" < new impleDateFormat("yyyyMMddHHmmss").format(calendar.getTime))

            # val cisClosedDates = getCIsLastClosedDates(rodTicketDetailHelpdesk)

            esIndex = RemedyDsl.buildESIndex("helpdesk", filtered, s3confPath,
                                             s3filePath, spark)
            # TODO ? esIndex.as[IncidESIndex]with Option[String] = None
            logging.info("Persisting ES index..")
            #dfCount = esIndex.count()
            #logging.info("indexDataFrame.count.." + str(dfCount))
            try:
                ElasticDsl.writeMappedESIndex(
                    esIndex, "copt-rod-closed-{ticket_max_value_partition}",
                    "ticket_id", conf)
            except Exception as e:
                message = str(e)
                if message.find("index_closed_exception"):
                    raise e
                else:
                    # TODO saveToEs {partitioned} works fine but ends with exception ?¿
                    logging.info("catched index_closed_exception: " + str(e))

            removeClosedAgentSmc(esIndex, s3confPath, spark)

            AlertDsl.checkCount("copt-rod-closed-*", s3filePath, dfCount,
                                spark, s3confPath)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCount
            logStatus.exception = ""
            logStatus.end_date = ""
            logging.info(
                "End batch Coptero ROD ----------------------------------------------------"
            )
        except Exception as e:
            logStatus = copy.deepcopy(logStatus)
            logStatus.success = False
            logStatus.count = dfCount
            logStatus.exception = str(e)
            logStatus.end_date = ""
            logging.error("catched: " + str(e))
            raise e
        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count,
                                              logStatus.success,
                                              logStatus.exception,
                                              logStatus.start_date,
                                              logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(
                copy.deepcopy(logStatus_data))
            logDataFrame.show(5)
            ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)

Пример #12

0

Показать файл

    def runJob(sparkSession, s3confPath, s3filePath):
        spark = sparkSession
        conf = s3confPath
        logStatus = startLogStatus(s3filePath)
        dfCountRelation = 0
        dfCountIncid = 0

        try:
            logging.info("Start batch Coptero ROD for s3confPath: " +
                         s3confPath + " -------------------------------------")
            validatedRecords = ValidationsDsl.validateTickets(
                s3filePath,
                S3FilesDsl.readFileSchema(s3filePath,
                                          getRelationSchema(s3filePath),
                                          spark), spark, conf)

            rodTicketRelation1 = relationColumns(validatedRecords, spark)
            rodTicketRelation = rodTicketRelation1.withColumn(
                "relation_id",
                F.concat(rodTicketRelation1["ticket_id"], F.lit('-'),
                         rodTicketRelation1["related_ticket_id"]))

            esIndexRel = rodTicketRelation.select(
                'relation_id', 'ticket_id', 'ticket_type', 'related_ticket_id',
                'related_ticket_type', 'association_type', 'submit_date',
                'relation_summary', 'status', 'submitter', 'instanceid')
            partitioned = esIndexRel \
                .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \
                .withColumn("file", F.lit(s3filePath))

            dfCountRelation = partitioned.count()

            logging.info("Persisting ES index..")
            logging.info("indexRelationDataFrame.count().." +
                         str(dfCountRelation))

            try:
                ElasticDsl.writeMappedESIndex(
                    partitioned, "copt-rod-rel-{ticket_max_value_partition}",
                    "relation_id", conf)
            except Exception as ex:
                e = str(ex)
                if e.find("index_closed_exception"):
                    logging.info("catched index_closed_exception: " + e)
                else:
                    raise ex

            AlertDsl.checkCount("copt-rod-rel-*", s3filePath, dfCountRelation,
                                spark, conf)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCountRelation
            logStatus.exception = ""
            logStatus.end_date = ""

            persistRelations(esIndexRel, conf, spark)
            '''relationsDF = esIndexRel \
                .filter(esIndexRel.ticket_type == "Incident") \
                .groupBy("ticket_id") \
                .agg(F.collect_list("related_ticket_id").alias("relations")) \
                .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \
                .withColumn("file", F.lit(s3filePath))

            # TODO writeMappedESIndex CRQ Y PBI

            dfCountIncid = relationsDF.count()

            logging.info("Persisting ES index..")
            logging.info("relationsDF.count().."+ str(dfCountIncid))

            try:
                ElasticDsl.writeMappedESIndex(
                    relationsDF, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", conf)
            except Exception as ex:
                e = str(ex)
                if e.find("index_closed_exception"):
                    logging.info("catched index_closed_exception: " + e)
                else:
                    raise ex

            AlertDsl.checkCount("copt-rod-closed-*", s3filePath, dfCountIncid, spark, conf)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCountIncid
            logStatus.exception = ""
            logStatus.end_date = ""'''

            logging.info(
                "End batch Coptero ROD ----------------------------------------------------"
            )
        except Exception as ex:
            e = str(ex)
            logStatus.success = False
            logStatus.count = 0
            logStatus.exception = str(e)
            logStatus.end_date = ""
            logging.info("catched: " + e)
            raise ex
        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count,
                                              logStatus.success,
                                              logStatus.exception,
                                              logStatus.start_date,
                                              logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(
                copy.deepcopy(logStatus_data))
            ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)

Пример #13

0

Показать файл

 def writeESAlertsIndex(index, config):
     prefix = S3FilesDsl.readConfigJson(config).elastic_env_index_prefix
     index.write.format('org.elasticsearch.spark.sql').mode(
         'append').option('es.write.operation', 'index').option(
             'es.resource', prefix + 'copt-rod-alerts').save()

Пример #14

0

Показать файл

    def buildESIndex(detailType, detail, s3confPath, s3filePath, spark):
        sqlContext = SQLContext(spark)
        # TODO.confinsteadof.json        val
        confJson = S3FilesDsl.readConfigJson(s3confPath)

        rodTicketANTags = AdminNumberTags.antagsColumns(
            S3FilesDsl.readFile(confJson.tags_admin_path, spark), spark)

        parquetPath = confJson.fast_parquet_path
        rodPostgreAdminNumber = sqlContext.read.parquet(parquetPath)

        logging.info("FAST joins..")
        networkFast = sqlContext.read.parquet(confJson.fast_network_parquet_path)

        logging.info("common joins..")

        # TODO: añadir import de utils.constantes
        # TODO: comprobar parametros que se pasan a los metodos de Utils
        common3 = joinMasterEntities(detail, spark)

        common2 = common3.join(rodPostgreAdminNumber, ["admin_number"], "left")

        common1 = Utils.fillEmptyFastColumns(common2)

        common = common1.join(networkFast, ["admin_number"], "left"). \
            withColumn("networkinfo", Utils.networkNestedObject("fast_customer", "fast_end_customer",
                                                                "router_interface_vendor_type_set")). \
            drop("router_interface_vendor_type_set"). \
            join(rodTicketANTags, ["admin_number"], "left"). \
            withColumn("open", F.when(common1.status_desc.isin(Constants.openStatus), Constants.OPEN_YES).
                       otherwise(F.when(common1.status_desc.isin(Constants.notOpenStatus), Constants.OPEN_NO).
                                 otherwise(Constants.EMPTY_STRING))). \
            withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")). \
            withColumn("admin_number_escaped", Utils.urlWhitespaces("admin_number")). \
            withColumn("fast_max_resolution_time", Utils.validateNumeric("fast_max_resolution_time")). \
            withColumn("file", F.lit(s3filePath)). \
            fillna(Constants.EMPTY_STRING, ["assigned_agent"])

        if detailType == "helpdesk":
            rodTicketReportedSource = getReportedSource(spark)
            operationalManager = getOperationalManager(confJson.operational_path, spark)
            opTags = OperatingTags.operatingTagsColumns(S3FilesDsl.readFile(confJson.tags_operating_path, spark))
            customer = Customer.customerColumns(S3FilesDsl.readFile(confJson.customer_path, spark), spark)
            endCustomer = EndCustomer.endCustomerColumns(S3FilesDsl.readFile(confJson.end_customer_path, spark), spark)

            index1 = common \
                .join(rodTicketReportedSource, ["reported_source_id"], "left") \
                .drop("reported_source_id") \
                .join(operationalManager, ["operating_company_name", "operating_le"], "left") \
                .na.fill(Constants.EMPTY_STRING, ["operational_manager"]) \
                .join(opTags, ["operating_company_name", "operating_le"], "left") \
                .withColumn("tags", Utils.mergeArrays("tags", "operating_tags")) \
                .drop("operating_tags") \
                .join(customer, ["operating_company_name"], "left") \
                .fillna(Constants.EMPTY_STRING, ["customer_correct"]) \
                .join(endCustomer, ["operating_le"], "left") \
                .fillna(Constants.EMPTY_STRING, ["end_customer_correct"]) \
                .withColumn("end_customer_correct",
                            Utils.emptyEndCustomerCorrect("customer_correct", "end_customer_correct")) \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("end_user_country", Utils.kibanaCountry("end_user_country")) \
                .withColumn("smc_cluster", Utils.smcClusterFromGroup("assigned_support_group")) \
                .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name")) \
                .withColumn("product_categorization_all_tiers",
                            Utils.concat3Columns("product_categorization_tier_1", "product_categorization_tier_2",
                                                 "product_categorization_tier_3")) \
                .withColumn("closure_categorization_all_tiers",
                            Utils.concat3Columns("closure_categorization_tier_1", "closure_categorization_tier_2",
                                                 "closure_categorization_tier_3")) \
                .withColumn("operational_categorization_all_tiers",
                            Utils.concat3Columns("operational_categorization_tier_1",
                                                 "operational_categorization_tier_2",
                                                 "operational_categorization_tier_3")) \
                .withColumnRenamed("reported_source_desc", "reported_source_id")

            index = FastDsl.fastCircuitFields(index1, confJson, spark)

        elif detailType == "problems":
            index1 = common \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name"))
            index = FastDsl.fastCircuitFields(index1, confJson, spark)

        elif detailType == "changes":
            rodTicketReportedSource = getReportedSource(spark)
            index = common \
                .join(rodTicketReportedSource, ["reported_source_id"], "left") \
                .drop("reported_source_id") \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("company_country", Utils.kibanaCountry("company_country")) \
                .withColumnRenamed("reported_source_desc", "reported_source_id")

        # EL USUARIO SOLICITA QUE LAS DESCRIPCIONES DE LOS MAESTROS SE RENOMBREN COMO _id
        indexRenamed = index \
            .withColumnRenamed("status_desc", "status_id") \
            .withColumnRenamed("substatus_desc", "substatus_id") \
            .withColumnRenamed("urgency_desc", "urgency_id") \
            .withColumnRenamed("priority_desc", "priority_id") \
            .withColumnRenamed("impact_desc", "impact_id")

        return indexRenamed

Пример #15

0

Показать файл

    def runJob(sparkSession, s3confPath, s3filePath):

        spark = sparkSession
        conf = s3confPath
        logStatus = startLogStatus(s3filePath)
        dfCount = 0

        try:
            logging.info("Start batch Coptero ROD for s3confPath: " +
                         s3confPath + "-------------------------------------")

            validatedRecords = ValidationsDsl.validateTickets(
                s3filePath,
                S3FilesDsl.readFileSchema(s3filePath, getPBISchema(s3filePath),
                                          spark), spark, conf)

            rodTicketDetailProblems = detailPBMColumns(validatedRecords, spark)
            esIndexPBM = RemedyDsl.buildESIndex("problems",
                                                rodTicketDetailProblems,
                                                s3confPath, s3filePath, spark)

            dfCount = esIndexPBM.count()

            logging.info("Persisting ES indexes..")
            logging.info("indexProblemDataFrame.count().." + str(dfCount))

            try:
                ElasticDsl.writeMappedESIndex(
                    esIndexPBM, "copt-rod-pbi-{ticket_max_value_partition}",
                    "ticket_id", conf)
            except Exception as ex:
                e = str(ex)
                if e.find("index_closed_exception"):
                    logging.info("catched index_closed_exception: " + e)
                else:
                    raise ex

            AlertDsl.checkCount("copt-rod-pbi-*", s3filePath, dfCount, spark,
                                conf)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCount
            logStatus.exception = ""
            logStatus.end_date = ""

            logging.info(
                "End batch Coptero ROD ----------------------------------------------------"
            )
        except Exception as ex:
            e = str(ex)
            logStatus.success = False
            logStatus.count = dfCount
            logStatus.exception = e
            logStatus.end_date = ""
            logging.info("catched: " + e)
            raise ex

        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count,
                                              logStatus.success,
                                              logStatus.exception,
                                              logStatus.start_date,
                                              logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(
                copy.deepcopy(logStatus_data))
            ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)

Пример #16

0

Показать файл

 def writeMappedESIndex(index, name, mapId, config):
     prefix = S3FilesDsl.readConfigJson(config).elastic_env_index_prefix
     index.write.format('org.elasticsearch.spark.sql').option(
         'es.mapping.id', mapId).option('es.resource',
                                        prefix + name).save()

Пример #17

0

Показать файл

def getOperationalManager(s3path, spark):
    fileOperationalManager = S3FilesDsl.readFile(s3path, spark)
    return OperationalManager.operationalManagerColumns(fileOperationalManager)

Пример #18

0

Показать файл

def getReportedSource(spark):
    fileReportedSource = S3FilesDsl.readFile(
        "C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_REPORTED_SOURCE.txt", spark)
    return TicketReportedSource.reportedSourceColumns(fileReportedSource)

Пример #19

0

Показать файл

    def runJob(sparkSession, s3confPath, s3filePath):
        spark = sparkSession
        conf = s3confPath
        logStatus = startLogStatus(s3filePath)
        dfCount = 0
        try:
            logging.info("Start batch Coptero ROD for s3confPath:" +
                         s3confPath + "--------------------------------------")
            validatedRecords = ValidationsDsl.validateTickets(
                s3filePath,
                S3FilesDsl.readFileSchema(s3filePath, getCRQSchema(s3filePath),
                                          spark), spark, conf)

            rodTicketDetailChanges = detailCHGColumns(validatedRecords)
            esIndexCHG = RemedyDsl.buildESIndex("changes",
                                                rodTicketDetailChanges,
                                                s3confPath, s3filePath, spark)

            logging.info("Persisting ES index..")
            dfCount = esIndexCHG.count()
            logging.info("indexDataFrame.count.." + str(dfCount))

            try:
                ElasticDsl.writeMappedESIndex(
                    esIndexCHG, "copt-rod-crq-{ticket_max_value_partition}",
                    "ticket_id", conf)
            except Exception as e:
                ex = str(e)
                if ex.find("index_closed_exception"):
                    raise e
                else:
                    # TODO saveToEs {partitioned} works fine but ends with exception ?¿
                    logging.info("catched index_closed_exception: " + ex)

            AlertDsl.checkCount("copt-rod-crq-*", s3filePath, dfCount, spark,
                                s3confPath)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCount
            logStatus.exception = ""
            logStatus.end_date = ""
            logging.info(
                "End batch Coptero ROD ----------------------------------------------------"
            )
        except Exception as e:
            ex = str(e)
            logStatus = copy.deepcopy(logStatus)
            logStatus.success = False
            logStatus.count = dfCount
            logStatus.exception = ex
            logStatus.end_date = ""
            logging.error("catched: " + ex)
            raise e
        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count,
                                              logStatus.success,
                                              logStatus.exception,
                                              logStatus.start_date,
                                              logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(
                copy.deepcopy(logStatus_data))
            ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)

Пример #20

0

Показать файл

def getAgentSmcCluster(esIndex, s3confPath, spark):
    sqlContext = SQLContext(spark)

    parquet = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    return esIndex.join(parquet.select("ticket_id", "smc_cluster", "assigned_agent"), ["ticket_id"], "left")

Пример #21

0

Показать файл

    def runJob(sparkSession, s3confPath, s3filePath):
        spark = sparkSession
        conf = s3confPath
        logStatus = startLogStatus(s3filePath)
        dfCount = 0
        try:
            logging.info("Start batch Coptero ROD for s3confPath:" +
                         s3confPath + "--------------------------------------")
            validatedRecords = ValidationsDsl.validateTickets(
                s3filePath,
                S3FilesDsl.readFileSchema(s3filePath,
                                          getIncidSchema(s3filePath), spark),
                spark, conf)

            logging.info("fileDetailHelpdesk.count().." +
                         str(validatedRecords.count()))
            rodTicketDetailHelpdesk = TicketDetailHelpdesk.detailHPDColumns(
                validatedRecords)
            logging.info("rodTicketDetailHelpdesk.count.." +
                         str(rodTicketDetailHelpdesk.count()))
            # val cisClosedDates = getCIsLastClosedDates(rodTicketDetailHelpdesk)
            esIndex = RemedyDsl.buildESIndex("helpdesk",
                                             rodTicketDetailHelpdesk,
                                             s3confPath, s3filePath, spark)
            print("NUESTRO DATAFRAME")
            esIndex.show()
            print("VUESTRO DATAFRAME")
            # TODO ? esIndex.as[IncidESIndex]with Option[String] = None
            logging.info("Persisting ES index..")
            dfCount = esIndex.count()
            logging.info("indexDataFrame.count.." + str(dfCount))
            try:
                ElasticDsl.writeMappedESIndex(
                    esIndex, "copt-rod-closed-{ticket_max_value_partition}",
                    "ticket_id", conf)
            except Exception as e:
                message = str(e)
                if message.find("index_closed_exception"):
                    raise e
                else:
                    # TODO saveToEs {partitioned} works fine but ends with exception ?¿
                    logging.info("catched index_closed_exception: " + str(e))

            persistAgentSmc(esIndex, s3confPath, spark)

            AlertDsl.checkCount("copt-rod-closed-*", s3filePath, dfCount,
                                spark, conf)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCount
            logStatus.exception = ""
            logStatus.end_date = ""
            logging.info(
                "End batch Coptero ROD ----------------------------------------------------"
            )
        except Exception as e:
            logStatus = copy.deepcopy(logStatus)
            logStatus.success = False
            logStatus.count = dfCount
            logStatus.exception = str(e)
            logStatus.end_date = ""
            logging.error("catched: " + str(e))
            raise e
        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count,
                                              logStatus.success,
                                              logStatus.exception,
                                              logStatus.start_date,
                                              logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(
                copy.deepcopy(logStatus_data))
            ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)

Пример #22

0

Показать файл

    def runJob(sparkSession, s3confPath, s3filePath):
        spark = sparkSession
        logStatus = startLogStatus(s3filePath)
        dfCount = 0
        try:
            logging.info("Start batch Coptero ROD for s3confPath: " +
                         s3confPath + "--------------------------------------")
            validatedRecords = ValidationsDsl.validateTickets(
                s3filePath,
                S3FilesDsl.readFileSchema(s3filePath, getWISchema(s3filePath),
                                          spark), spark, s3confPath)

            indexAgentSmcCluster = getAgentSmcCluster(validatedRecords,
                                                      s3confPath, spark)

            indexWithRelations = getRelations(indexAgentSmcCluster, s3confPath,
                                              spark)

            partitioned = indexWithRelations \
                .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \
                .withColumn("file", F.lit(s3filePath)) \
                .withColumn("work_info_category", Utils.getWorkInfoCategory("work_info_notes"))

            dfCount = partitioned.count()
            logging.info("Persisting ES index..")

            logging.info("indexWorkInfoDataFrame.count().." + str(dfCount))
            try:
                ElasticDsl.writeMappedESIndex(
                    partitioned, "copt-rod-wif-{ticket_max_value_partition}",
                    "instanceid", s3confPath)
            except Exception as e:
                message = str(e)
                if message.find("index_closed_exception"):
                    raise e
                else:
                    # TODO saveToEs {partitioned} works fine but ends with exception ?¿
                    logging.info("catched index_closed_exception: " + str(e))

            AlertDsl.checkCount("copt-rod-wif*", s3filePath, dfCount, spark,
                                s3confPath)

            logStatus = copy.deepcopy(logStatus)
            logStatus.success = True
            logStatus.count = dfCount
            logStatus.exception = ""
            logStatus.end_date = ""
            logging.info(
                "End batch Coptero ROD ----------------------------------------------------"
            )
        except Exception as e:
            logStatus = copy.deepcopy(logStatus)
            logStatus.success = False
            logStatus.count = dfCount
            logStatus.exception = str(e)
            logStatus.end_date = ""
            logging.error("catched: " + str(e))
            raise e
        finally:
            sqlContext = SQLContext(spark)
            logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S")
            logStatus_data = logESindexSchema(logStatus.file, logStatus.count,
                                              logStatus.success,
                                              logStatus.exception,
                                              logStatus.start_date,
                                              logStatus.end_date)
            logDataFrame = sqlContext.createDataFrame(
                copy.deepcopy(logStatus_data))

Пример #23

0

Показать файл

 def writeESLogIndex(index, name, config):
     prefix = S3FilesDsl.readConfigJson(config).elastic_env_index_prefix
     index.write.format('org.elasticsearch.spark.sql').mode(
         'append').option('es.write.operation', 'index').option(
             'es.resource',
             prefix + name + datetime.now().strftime("%Y")).save()

Python S3FilesDsl примеры использования