def test_set_output_pass():
    attributes = [Attribute("att1", AttributeType.TEXT)]
    sets = [Set("set1", SetType.OUTPUT, attributes)]
    output = [Set("set4", SetType.OUTPUT, attributes)]
    expected_result = [x.get_specification() for x in output]
    transformation = Transformation("tf1", sets=sets)
    transformation.output = output
    assert transformation.output == expected_result
def test_set_sets_pass():
    attributes = [Attribute("att1", AttributeType.TEXT)]
    sets = [Set("set1", SetType.INPUT, attributes)]
    new_sets = [
        Set("set1", SetType.INPUT, attributes),
        Set("set2", SetType.OUTPUT, attributes)
    ]
    expected_result = [x.get_specification() for x in new_sets]
    transformation = Transformation("tf1", sets=sets)
    transformation.sets = new_sets
    assert transformation.sets == expected_result
示例#3
0
def test_get_dependency_pass():
    tag = "set1"
    type = SetType.INPUT
    dependency = "dependency"
    attributes = [Attribute("att1", AttributeType.TEXT)]
    set = Set(tag, type, attributes, dependency=dependency)
    assert set.dependency == dependency
示例#4
0
def test_get_attributes_pass():
    tag = "set1"
    type = SetType.INPUT
    attributes = [Attribute("att1", AttributeType.TEXT)]
    expected_result = [x.get_specification() for x in attributes]
    set = Set(tag, type, attributes)
    assert set.attributes == expected_result
示例#5
0
def test_set_type_pass():
    tag = "set1"
    type = SetType.INPUT
    new_type = SetType.OUTPUT
    attributes = [Attribute("att1", AttributeType.TEXT)]
    set = Set(tag, type, attributes)
    set.type = new_type
    assert set.type == new_type.value
def test_get_specification_pass():
    tag = "tf1"
    sets = [
        Set("set1", SetType.INPUT, [Attribute("att1", AttributeType.TEXT)])
    ]
    expected_result = {
        "sets": [x.get_specification() for x in sets],
        "tag": tag
    }
    transformation = Transformation(tag, sets)
    assert transformation.get_specification() == expected_result
示例#7
0
def test_get_specification_pass():
    tag = "set1"
    type = SetType.INPUT
    attributes = [Attribute("att1", AttributeType.TEXT)]
    expected_result = {
        "attributes": [attributes[0].get_specification()],
        "tag": tag,
        "type": type.value
    }
    set = Set(tag, type, attributes)
    assert set.get_specification() == expected_result
def add_transformation(df, label, input_attr, output_attr, prev_out=None):
    """
    :param df: Dataflow tag
    :param label: Label for transformation
    :param input_attr: list of Attribute(label, type)
    :param output_attr: list of Attribute(label, type)
    :param prev_out: tf_output object of previous step:
    :return: tf, tf_input, tf_output
    """
    tf = Transformation(label)
    tf_input = Set(f"i{label}", SetType.INPUT, input_attr)
    tf_output = Set(f"o{label}", SetType.OUTPUT, output_attr)

    if prev_out:
        tf.set_sets([prev_out, tf_input, tf_output])
    else:
        tf.set_sets([tf_input, tf_output])
    df.add_transformation(tf)

    return tf, tf_input, tf_output
def test_get_input_pass():
    attributes = [Attribute("att1", AttributeType.TEXT)]
    sets = [Set("set1", SetType.INPUT, attributes)]
    expected_result = [sets[0].get_specification()]
    transformation = Transformation("tf1", sets=sets)
    assert transformation.input == expected_result
示例#10
0
    #PROVENIÊNCIA
    ############################

    dataflow_tag = "prov-df-{}".format(aggreg_unit)
    df = Dataflow(dataflow_tag)

    logger.info('Inicializando o processador Spark')
    processador = ProcessadorSparkClass(logger, spark, df, dataflow_tag)

    ##PROVENIÊNCIA PROSPECTIVA
    #Transformação para extrair o primeiro stats: ExtrairStats1
    tf1 = Transformation('load_data') ## Usando o nome da task spark
    tf1_input = Set("i{}1".format('load_data'), SetType.INPUT,
        [
            Attribute("datafiles", AttributeType.TEXT),
            Attribute("tables", AttributeType.TEXT),
            Attribute("currenttime", AttributeType.TEXT),
            Attribute("aggregationunit", AttributeType.TEXT),
            Attribute("csvseparator", AttributeType.TEXT)
        ])

    tf1_output = Set("o{}1".format('load_data'), SetType.OUTPUT,
      [
            Attribute("currenttime", AttributeType.TEXT),
            Attribute("elapsedtime", AttributeType.NUMERIC)
      ])

    tf1.set_sets([tf1_input, tf1_output])
    df.add_transformation(tf1)
    tf2 = Transformation('initial_data_stats') ## Usando o nome da task spark
    tf2_input = Set("i{}1".format('initial_data_stats'), SetType.INPUT,
        [
示例#11
0
#dirin_do_ficheiro = sys.argv[0]
#dirin_arg_pas = sys.argv[0:]
###print "O nome do diretorio de entrada do ficheiro e: " + dirin_do_ficheiro 
###print "E os argumentos passados sao: " + str(dirin_arg_pas)


############################
#PROVENIÊNCIA
############################
dataflow_tag = "mafft-df"
df = Dataflow(dataflow_tag)

##PROVENIÊNCIA PROSPECTIVA
#Transformação para extrair nome dos arquivos: ExtrairNome
tf1 = Transformation("ExtrairNome")
tf1_input = Set("iExtrairNome", SetType.INPUT,
  [Attribute("DIRIN_FILE", AttributeType.FILE)])
tf1_output = Set("oExtrairNome", SetType.OUTPUT,
  [Attribute("FASTA_FILE", AttributeType.FILE),
  Attribute("MAFFT_FILE", AttributeType.FILE)])
tf1.set_sets([tf1_input, tf1_output])
df.add_transformation(tf1)

#Transformação para ler o arquivo e contar o numero de sequencias: ContarSequencias
tf2 = Transformation("ContarSequencias")
tf2_input = Set("iContarSequencias", SetType.INPUT,
  [Attribute("FASTA_FILE", AttributeType.FILE)])#leitor file-fasta/att text-file
tf2_output = Set("oContarSequencias", SetType.OUTPUT,
  [Attribute("NUMERO_SEQUENCIAS", AttributeType.NUMERIC)])
tf2.set_sets([tf2_input, tf2_output])
df.add_transformation(tf2)
示例#12
0
def test_get_type_pass():
    tag = "set1"
    type = SetType.INPUT
    attributes = [Attribute("att1", AttributeType.TEXT)]
    set = Set(tag, type, attributes)
    assert set.type == type.value