Пример #1
0
def joinMasterEntities(df, spark):
    fileStatus = S3FilesDsl.readFile("C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_STATUS.txt",
                                     spark)
    fileSubstatus = S3FilesDsl.readFile(
        "C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190923_TICKET_SUBSTATUS.txt", spark)
    fileUrgency = S3FilesDsl.readFile("C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_URGENCY.txt",
                                      spark)
    filePriority = S3FilesDsl.readFile(
        "C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_PRIORITY.txt", spark)
    fileImpact = S3FilesDsl.readFile("C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_IMPACT.txt",
                                     spark)
    rodTicketStatus = TicketStatus.statusColumns(fileStatus)
    rodTicketSubstatus = TicketSubstatus.substatusColumns(fileSubstatus)
    rodTicketUrgency = TicketUrgency.urgencyColumns(fileUrgency)
    rodTicketPriority = TicketPriority.priorityColumns(filePriority)
    rodTicketImpact = TicketImpact.impactColumns(fileImpact)

    df2 = df.join(rodTicketStatus, ["status_id"], "left"). \
        join(rodTicketSubstatus, ["substatus_id", "status_id"], "left"). \
        drop("status_id"). \
        drop("substatus_id"). \
        join(rodTicketUrgency, ["urgency_id"], "left"). \
        drop("urgency_id"). \
        join(rodTicketPriority, ["priority_id"], "left"). \
        drop("priority_id"). \
        join(rodTicketImpact, ["impact_id"], "left"). \
        drop("impact_id")

    return df2
Пример #2
0
    def adminNumberTags(tagsAdminPath):
        rodTicketANTags = AdminNumberTags.antagsColumns(S3FilesDsl.readFile())

        ticketANTags = rodTicketANTags \
            .groupBy("admin_number") \
            .agg(F.concat_ws(",", F.collect_set("tags").alias("tags"))) \
            .withColumn("tags", Utils.stringToArray("tags"))
        return ticketANTags
Пример #3
0
    def buildESIndex(detailType, detail, s3confPath, s3filePath, spark):
        sqlContext = SQLContext(spark)
        # TODO.confinsteadof.json        val
        confJson = S3FilesDsl.readConfigJson(s3confPath)

        rodTicketANTags = AdminNumberTags.antagsColumns(
            S3FilesDsl.readFile(confJson.tags_admin_path, spark), spark)

        parquetPath = confJson.fast_parquet_path
        rodPostgreAdminNumber = sqlContext.read.parquet(parquetPath)

        logging.info("FAST joins..")
        networkFast = sqlContext.read.parquet(confJson.fast_network_parquet_path)

        logging.info("common joins..")

        # TODO: añadir import de utils.constantes
        # TODO: comprobar parametros que se pasan a los metodos de Utils
        common3 = joinMasterEntities(detail, spark)

        common2 = common3.join(rodPostgreAdminNumber, ["admin_number"], "left")

        common1 = Utils.fillEmptyFastColumns(common2)

        common = common1.join(networkFast, ["admin_number"], "left"). \
            withColumn("networkinfo", Utils.networkNestedObject("fast_customer", "fast_end_customer",
                                                                "router_interface_vendor_type_set")). \
            drop("router_interface_vendor_type_set"). \
            join(rodTicketANTags, ["admin_number"], "left"). \
            withColumn("open", F.when(common1.status_desc.isin(Constants.openStatus), Constants.OPEN_YES).
                       otherwise(F.when(common1.status_desc.isin(Constants.notOpenStatus), Constants.OPEN_NO).
                                 otherwise(Constants.EMPTY_STRING))). \
            withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")). \
            withColumn("admin_number_escaped", Utils.urlWhitespaces("admin_number")). \
            withColumn("fast_max_resolution_time", Utils.validateNumeric("fast_max_resolution_time")). \
            withColumn("file", F.lit(s3filePath)). \
            fillna(Constants.EMPTY_STRING, ["assigned_agent"])

        if detailType == "helpdesk":
            rodTicketReportedSource = getReportedSource(spark)
            operationalManager = getOperationalManager(confJson.operational_path, spark)
            opTags = OperatingTags.operatingTagsColumns(S3FilesDsl.readFile(confJson.tags_operating_path, spark))
            customer = Customer.customerColumns(S3FilesDsl.readFile(confJson.customer_path, spark), spark)
            endCustomer = EndCustomer.endCustomerColumns(S3FilesDsl.readFile(confJson.end_customer_path, spark), spark)

            index1 = common \
                .join(rodTicketReportedSource, ["reported_source_id"], "left") \
                .drop("reported_source_id") \
                .join(operationalManager, ["operating_company_name", "operating_le"], "left") \
                .na.fill(Constants.EMPTY_STRING, ["operational_manager"]) \
                .join(opTags, ["operating_company_name", "operating_le"], "left") \
                .withColumn("tags", Utils.mergeArrays("tags", "operating_tags")) \
                .drop("operating_tags") \
                .join(customer, ["operating_company_name"], "left") \
                .fillna(Constants.EMPTY_STRING, ["customer_correct"]) \
                .join(endCustomer, ["operating_le"], "left") \
                .fillna(Constants.EMPTY_STRING, ["end_customer_correct"]) \
                .withColumn("end_customer_correct",
                            Utils.emptyEndCustomerCorrect("customer_correct", "end_customer_correct")) \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("end_user_country", Utils.kibanaCountry("end_user_country")) \
                .withColumn("smc_cluster", Utils.smcClusterFromGroup("assigned_support_group")) \
                .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name")) \
                .withColumn("product_categorization_all_tiers",
                            Utils.concat3Columns("product_categorization_tier_1", "product_categorization_tier_2",
                                                 "product_categorization_tier_3")) \
                .withColumn("closure_categorization_all_tiers",
                            Utils.concat3Columns("closure_categorization_tier_1", "closure_categorization_tier_2",
                                                 "closure_categorization_tier_3")) \
                .withColumn("operational_categorization_all_tiers",
                            Utils.concat3Columns("operational_categorization_tier_1",
                                                 "operational_categorization_tier_2",
                                                 "operational_categorization_tier_3")) \
                .withColumnRenamed("reported_source_desc", "reported_source_id")

            index = FastDsl.fastCircuitFields(index1, confJson, spark)

        elif detailType == "problems":
            index1 = common \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name"))
            index = FastDsl.fastCircuitFields(index1, confJson, spark)

        elif detailType == "changes":
            rodTicketReportedSource = getReportedSource(spark)
            index = common \
                .join(rodTicketReportedSource, ["reported_source_id"], "left") \
                .drop("reported_source_id") \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("company_country", Utils.kibanaCountry("company_country")) \
                .withColumnRenamed("reported_source_desc", "reported_source_id")

        # EL USUARIO SOLICITA QUE LAS DESCRIPCIONES DE LOS MAESTROS SE RENOMBREN COMO _id
        indexRenamed = index \
            .withColumnRenamed("status_desc", "status_id") \
            .withColumnRenamed("substatus_desc", "substatus_id") \
            .withColumnRenamed("urgency_desc", "urgency_id") \
            .withColumnRenamed("priority_desc", "priority_id") \
            .withColumnRenamed("impact_desc", "impact_id")

        return indexRenamed
Пример #4
0
def getOperationalManager(s3path, spark):
    fileOperationalManager = S3FilesDsl.readFile(s3path, spark)
    return OperationalManager.operationalManagerColumns(fileOperationalManager)
Пример #5
0
def getReportedSource(spark):
    fileReportedSource = S3FilesDsl.readFile(
        "C:/Users/gonza/Downloads/PruebasLAB/resources/RD_TR_20190909_TICKET_REPORTED_SOURCE.txt", spark)
    return TicketReportedSource.reportedSourceColumns(fileReportedSource)