Пример #1
0
    java_import(sc._jvm, "edu.isi.karma")

    inputFilename = argv[1]
    outputFilename = argv[2]

    fileUtil = FileUtil(sc)
    workflow = Workflow(sc)
    contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json"

    #1. Read the input

    #test big file
    # inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1000)

    #test small file
    inputRDD = workflow.batch_read_csv(inputFilename)

    #2. Apply the karma Model
    outputRDD = workflow.run_karma(
        inputRDD,
        "https://raw.githubusercontent.com/american-art/npg/master/NPGConstituents/NPGConstituents-model.ttl",
        "http://dig.isi.edu/npgConstituents/",
        "http://www.cidoc-crm.org/cidoc-crm/E39_Actor1",
        "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json",
        data_type="csv",
        additional_settings={"karma.input.delimiter": ","})

    #3. Save the output
    # fileUtil.save_file(outputRDD, outputFilename, "text", "json")

    reducedRDD = workflow.reduce_rdds(outputRDD)
Пример #2
0
    sc = SparkContext(appName="TEST")

    java_import(sc._jvm, "edu.isi.karma")

    inputFilename = argv[1]
    outputFilename = argv[2]
    numPartitions = 1000
    numFramerPartitions = max(10, numPartitions / 10)

    fileUtil = FileUtil(sc)
    workflow = Workflow(sc)
    contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json"

    #1. Read the input
    inputRDD = workflow.batch_read_csv(inputFilename)

    #2. Apply the karma Model
    outputRDD = workflow.run_karma(inputRDD,
                                   "https://raw.githubusercontent.com/american-art/npg/master/NPGConstituents/NPGConstituents-model.ttl",
                                   "http://americanartcollaborative.org/npg/",
                                   "http://www.cidoc-crm.org/cidoc-crm/E39_Actor1",
                                   "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json",
                                   num_partitions=numPartitions,
                                   data_type="csv",
                                   additional_settings={"karma.input.delimiter":","})

    #3. Save the output
    # fileUtil.save_file(outputRDD, outputFilename, "text", "json")

    #4. Reduce rdds
Пример #3
0
            line = line.rstrip()
            params = line.split("\t")
            data_file_URL = str(params[0])
            num_partitions = int(params[1])
            model_file_URL= str(params[2])
            base = str(params[3])
            root = str(params[4])
            context = str(params[5])
            output_folder = str(params[6])
            output_zip_path = str(params[7])

            #0. Download data file
            dataFileName = download_file(data_file_URL)

            #1. Read the input
            inputRDD = workflow.batch_read_csv(dataFileName).partitionBy(num_partitions)

            #2. Apply the karma Model
            outputRDD = workflow.run_karma(inputRDD,
                                            model_file_URL,
                                            base,
                                            root,
                                            context,
                            data_type="csv",
                            additional_settings={"karma.input.delimiter":",", "karma.output.format": "n3"})

            #3. Save the output
            outputPath = outputFilename + "/" + output_folder
            outputRDD.map(lambda x: x[1]).saveAsTextFile(outputPath)
            print "Successfully apply karma!"
Пример #4
0
    sc = SparkContext(appName="TEST")

    java_import(sc._jvm, "edu.isi.karma")

    inputFilename = argv[1]
    outputFilename = argv[2]


    fileUtil = FileUtil(sc)
    workflow = Workflow(sc)
    contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json"

    #1. Read the input

    #test big file
    inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1)

    #test small file
    # inputRDD = workflow.batch_read_csv(inputFilename)


    #2. Apply the karma Model
    outputRDD = workflow.run_karma(inputRDD,
                                   "https://raw.githubusercontent.com/american-art/autry/master/AutryMakers/AutryMakers-model.ttl",
                                   "http://dig.isi.edu/AutryMakers/",
                                   "http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object1",
                                   "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json",
                                   data_type="csv",
                                   additional_settings={"karma.input.delimiter":","})

    #3. Save the output
Пример #5
0
    sc = SparkContext(appName="TEST")

    java_import(sc._jvm, "edu.isi.karma")

    inputFilename = argv[1]
    outputFilename = argv[2]

    fileUtil = FileUtil(sc)
    workflow = Workflow(sc)
    contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json"

    #1. Read the input

    #test big file
    inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1)

    #test small file
    # inputRDD = workflow.batch_read_csv(inputFilename)

    #2. Apply the karma Model
    outputRDD = workflow.run_karma(
        inputRDD,
        "https://raw.githubusercontent.com/american-art/autry/master/AutryMakers/AutryMakers-model.ttl",
        "http://dig.isi.edu/AutryMakers/",
        "http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object1",
        "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json",
        data_type="csv",
        additional_settings={"karma.input.delimiter": ","})

    #3. Save the output