workflow = Workflow(sc) contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json" #1. Read the input #test big file # inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1000) #test small file inputRDD = workflow.batch_read_csv(inputFilename) #2. Apply the karma Model outputRDD = workflow.run_karma( inputRDD, "https://raw.githubusercontent.com/american-art/npg/master/NPGConstituents/NPGConstituents-model.ttl", "http://dig.isi.edu/npgConstituents/", "http://www.cidoc-crm.org/cidoc-crm/E39_Actor1", "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json", data_type="csv", additional_settings={"karma.input.delimiter": ","}) #3. Save the output # fileUtil.save_file(outputRDD, outputFilename, "text", "json") reducedRDD = workflow.reduce_rdds(outputRDD) reducedRDD.persist() types = [{ "name": "E39_Actor", "uri": "http://www.cidoc-crm.org/cidoc-crm/E39_Actor" }, { "name": "E82_Actor_Appellation", "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation"
outputFilename = argv[2] numPartitions = 1000 numFramerPartitions = max(10, numPartitions / 10) fileUtil = FileUtil(sc) workflow = Workflow(sc) contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json" #1. Read the input inputRDD = workflow.batch_read_csv(inputFilename) #2. Apply the karma Model outputRDD = workflow.run_karma(inputRDD, "https://raw.githubusercontent.com/american-art/npg/master/NPGConstituents/NPGConstituents-model.ttl", "http://americanartcollaborative.org/npg/", "http://www.cidoc-crm.org/cidoc-crm/E39_Actor1", "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json", num_partitions=numPartitions, data_type="csv", additional_settings={"karma.input.delimiter":","}) #3. Save the output # fileUtil.save_file(outputRDD, outputFilename, "text", "json") #4. Reduce rdds reducedRDD = workflow.reduce_rdds(numFramerPartitions, outputRDD) reducedRDD.persist() types = [ {"name": "E39_Actor", "uri": "http://www.cidoc-crm.org/cidoc-crm/E39_Actor"}, {"name": "E82_Actor_Appellation", "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation"}, {"name": "E67_Birth", "uri": "http://www.cidoc-crm.org/cidoc-crm/E67_Birth"},
root = str(params[4]) context = str(params[5]) output_folder = str(params[6]) output_zip_path = str(params[7]) #0. Download data file dataFileName = download_file(data_file_URL) #1. Read the input inputRDD = workflow.batch_read_csv(dataFileName).partitionBy(num_partitions) #2. Apply the karma Model outputRDD = workflow.run_karma(inputRDD, model_file_URL, base, root, context, data_type="csv", additional_settings={"karma.input.delimiter":",", "karma.output.format": "n3"}) #3. Save the output outputPath = outputFilename + "/" + output_folder outputRDD.map(lambda x: x[1]).saveAsTextFile(outputPath) print "Successfully apply karma!" #4. Concate data files input_sum_file = outputFilename + "/" + output_folder + "/" output_sum_file = outputFilename + "/" + output_folder + ".n3" concate_file(input_sum_file, output_sum_file) print "Successfully generate whole data file!"
contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json" #1. Read the input #test big file inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1) #test small file # inputRDD = workflow.batch_read_csv(inputFilename) #2. Apply the karma Model outputRDD = workflow.run_karma(inputRDD, "https://raw.githubusercontent.com/american-art/autry/master/AutryMakers/AutryMakers-model.ttl", "http://dig.isi.edu/AutryMakers/", "http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object1", "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json", data_type="csv", additional_settings={"karma.input.delimiter":","}) #3. Save the output # fileUtil.save_file(outputRDD, outputFilename, "text", "json") reducedRDD = workflow.reduce_rdds(outputRDD) reducedRDD.persist() types = [ {"name": "E82_Actor_Appellation", "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation"} ] frames = [ {"name": "AutryMakers", "url": "https://raw.githubusercontent.com/american-art/aac-alignment/master/frames/autryMakers.json-ld"}
workflow = Workflow(sc) contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json" #1. Read the input #test big file inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1) #test small file # inputRDD = workflow.batch_read_csv(inputFilename) #2. Apply the karma Model outputRDD = workflow.run_karma( inputRDD, "https://raw.githubusercontent.com/american-art/autry/master/AutryMakers/AutryMakers-model.ttl", "http://dig.isi.edu/AutryMakers/", "http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object1", "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json", data_type="csv", additional_settings={"karma.input.delimiter": ","}) #3. Save the output # fileUtil.save_file(outputRDD, outputFilename, "text", "json") reducedRDD = workflow.reduce_rdds(outputRDD) reducedRDD.persist() types = [{ "name": "E82_Actor_Appellation", "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation" }] frames = [{