示例#1
0
def create_structmap(workspace, divs, structmap, filegrp, path=''):
    """Create structmap based on directory structure
    """
    fptr_list = []
    div_list = []
    for div in divs.keys():
        # It's a file if there is "-techmd.xml", lets create file+fptr
        # elements
        if div.endswith('-techmd.xml'):
            div = div[:-len('-techmd.xml')]
            div_path = encode_path(os.path.join(decode_path(path), div))
            amdids = get_links_event_agent(workspace, div_path)
            fileid = add_file_to_filesec(workspace, div_path, filegrp, amdids)
            fptr = mets.fptr(fileid)
            fptr_list.append(fptr)
        # It's not a file, lets create a div element
        else:
            div_path = encode_path(os.path.join(decode_path(path), div))
            amdids = get_links_event_agent(workspace, div_path)
            _, dmdsec_id = ids_for_files(workspace, div_path, 'dmdsec.xml')
            div_el = mets.div(type_attr=div, dmdid=dmdsec_id, admid=amdids)
            div_list.append(div_el)

            create_structmap(workspace, divs[div], div_el, filegrp, div_path)

    # Add fptr list first, then div list
    for fptr_elem in fptr_list:
        structmap.append(fptr_elem)
    for div_elem in div_list:
        structmap.append(div_elem)
示例#2
0
def test_encode_path():
    """Tests for the encode_path function."""

    encoded_path = utils.encode_path('tests/testpath')
    assert encoded_path == 'tests%2Ftestpath'

    encoded_path = utils.encode_path(
        'tests/testpath', suffix='-testsuffix', prefix='testprefix-'
    )
    assert encoded_path == 'testprefix-tests%2Ftestpath-testsuffix'

    encoded_path = utils.encode_path('tästs/tøstpath')
    assert encoded_path == 't%C3%A4sts%2Ft%C3%B8stpath'
示例#3
0
def add_file_to_filesec(workspace, path, filegrp):
    """Add file element to fileGrp element given as parameter.

    :param workspace: Workspace directorye from which administrative MD
                      files and amd reference files searched.
    :param path: url encoded path of the file
    :param lxml.etree.Element filegrp: fileGrp element
    :param str returns: id of file added to fileGrp
    :returns: unique identifier of file element
    """
    fileid = '_{}'.format(uuid4())

    # Create list of IDs of amdID elements
    amdids = get_md_references(workspace, path=path)

    # Create XML element and add it to fileGrp
    file_el = mets.file_elem(fileid,
                             admid_elements=set(amdids),
                             loctype='URL',
                             xlink_href='file://%s' %
                             encode_path(path, safe='/'),
                             xlink_type='simple',
                             groupid=None)

    streams = get_objectlist(workspace, path)
    if streams:
        for stream in streams:
            stream_ids = get_md_references(workspace, path=path, stream=stream)
            stream_el = mets.stream(admid_elements=stream_ids)
            file_el.append(stream_el)

    filegrp.append(file_el)

    return fileid
def add_file_to_filesec(all_amd_refs, object_refs, path, filegrp):
    """Add file element to fileGrp element given as parameter.

    :all_amd_refs: XML element tree of administrative metadata references
    :object_refs: XML tree of object references
    :path: url encoded path of the file
    :filegrp: fileGrp element
    :returns: unique identifier of file element
    """
    fileid = '_{}'.format(uuid4())

    # Create list of IDs of amdID elements
    amdids = get_md_references(all_amd_refs, path=path)

    # Create XML element and add it to fileGrp
    file_el = mets.file_elem(fileid,
                             admid_elements=set(amdids),
                             loctype='URL',
                             xlink_href='file://%s' %
                             encode_path(path, safe='/'),
                             xlink_type='simple',
                             groupid=None)

    streams = get_objectlist(object_refs, path)
    if streams:
        for stream in streams:
            stream_ids = get_md_references(all_amd_refs,
                                           path=path,
                                           stream=stream)
            stream_el = mets.stream(admid_elements=stream_ids)
            file_el.append(stream_el)

    filegrp.append(file_el)

    return fileid
示例#5
0
def create_premis_agent_file(workspace,
                             event_type,
                             agent_name,
                             agent_type,
                             agent_identifier,
                             event_target=None):
    """Creates `<event_type>-agent.xml` file. If path to target file is given
    as `event_target` parameter, the URL-encoded path is used as filename
    prefix. The file is METS XML file that contains PREMIS agent element inside
    digiprovMD element. The ID attribute of digiprovMD is hashed from the
    filename.

    :param workspace: path to directory where file is created
    :param event_type: event type (for filename)
    :param agent_name: PREMIS agentName
    :param agent_type: PREMIS agentType
    :param agent_identifier: PREMIS agentIdentifierValue
    :param event_target: event target file (for filename)
    :returns: output file path and METS XML element object
    """
    output_filename = '%s-agent-amd.xml' % (event_type)
    if event_target:
        output_filename = '%s-%s' % (event_target, output_filename)
    output_filename = encode_path(output_filename)

    agent_id = encode_id(output_filename)

    premis_agent = create_premis_agent(agent_name, agent_type,
                                       agent_identifier)

    agent_mets = _create_mets(premis_agent, agent_id, 'PREMIS:AGENT')
    _write_mets(agent_mets, os.path.join(workspace, output_filename))

    return (os.path.join(workspace, output_filename), agent_mets)
示例#6
0
    def write_md(self,
                 metadata,
                 mdtype,
                 mdtypeversion,
                 othermdtype=None,
                 section=None,
                 stdout=False):
        """
        Wraps XML metadata into MD element and writes it to a lxml.etree XML
        file in the workspace. The output filename is
            <mdtype>-<hash>-othermd.xml,
        where <mdtype> is the type of metadata given as parameter and <hash>
        is a string generated from the metadata.

        Serializing and hashing the root xml element can be rather time
        consuming and as such this method should not be called for each file
        unless more efficient way of separating files by the metadata can't
        be easily implemented. This implementation should be done by the
        subclasses of metadata_creator.

        :metadata (Element): metadata XML element
        :mdtype (string): Value of mdWrap MDTYPE attribute
        :mdtypeversion (string): Value of mdWrap MDTYPEVERSION attribute
        :othermdtype (string): Value of mdWrap OTHERMDTYPE attribute
        :section (string): Type of mets metadata section
        :stdout (boolean): Print also to stdout
        :returns: md_id, filename - Metadata id and filename
        """
        digest = generate_digest(metadata)
        suffix = othermdtype if othermdtype else mdtype
        filename = encode_path("%s-%s-amd.xml" % (digest, suffix))
        md_id = '_{}'.format(digest)
        filename = os.path.join(self.workspace, filename)

        if not os.path.exists(filename):

            xmldata = mets.xmldata()
            xmldata.append(metadata)
            mdwrap = mets.mdwrap(mdtype, mdtypeversion, othermdtype)
            mdwrap.append(xmldata)
            if section == 'digiprovmd':
                amd = mets.digiprovmd(md_id)
            else:
                amd = mets.techmd(md_id)
            amd.append(mdwrap)
            amdsec = mets.amdsec()
            amdsec.append(amd)
            mets_ = mets.mets()
            mets_.append(amdsec)

            with open(filename, 'wb+') as outfile:
                outfile.write(xml_helpers.utils.serialize(mets_))
                if stdout:
                    print(xml_helpers.utils.serialize(mets_).decode("utf-8"))
                print("Wrote lxml.etree %s administrative metadata to file "
                      "%s" % (mdtype, outfile.name))

        return md_id, filename
def main(arguments=None):
    """The main method for import_description"""
    args = parse_arguments(arguments)

    if args.dmdsec_target:
        url_t_path = encode_path(args.dmdsec_target, suffix='-dmdsec.xml')
    else:
        url_t_path = 'dmdsec.xml'

    with open(args.dmdsec_location, 'r') as content_file:
        content = content_file.read()

    _mets = mets.mets()

    tree = lxml.etree.fromstring(content)

    if args.desc_root == 'remove':
        childs = tree.findall('*')
    else:
        childs = [tree]
    xmldata_e = mets.xmldata(child_elements=childs)
    ns = h.get_namespace(childs[0])

    if ns in METS_MDTYPES.keys():
        mdt = METS_MDTYPES[ns]['mdtype']
        if 'othermdtype' in METS_MDTYPES[ns]:
            mdo = METS_MDTYPES[ns]['othermdtype']
        else:
            mdo = None
        mdv = METS_MDTYPES[ns]['version']
    else:
        raise TypeError("Invalid namespace: %s" % ns)

    mdwrap_e = mets.mdwrap(mdtype=mdt,
                           othermdtype=mdo,
                           mdtypeversion=mdv,
                           child_elements=[xmldata_e])
    dmdsec_e = mets.dmdsec(encode_id(url_t_path), child_elements=[mdwrap_e])

    _mets.append(dmdsec_e)

    if args.stdout:
        print h.serialize(_mets)

    output_file = os.path.join(args.workspace, url_t_path)
    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'w+') as outfile:
        outfile.write(h.serialize(_mets))

    print "import_description created file: %s" % output_file

    return 0
示例#8
0
def ead3_c_div(parent, structmap, filegrp, workspace, cnum=None):
    """Create div elements based on ead3 c elements. Fptr elements are
    created based on ead dao elements. The Ead3 elements tags are put
    into @type and the @level or @otherlevel attributes from ead3 will
    be put into @label.
    """
    allowed_c_subs = [
        'c', 'c01', 'c02', 'c03', 'c04', 'c05', 'c06', 'c07', 'c08', 'c09',
        'c10', 'c11', 'c12'
    ]

    if parent.xpath("./@otherlevel"):
        level = parent.xpath("./@otherlevel")[0]
    else:
        level = parent.xpath("./@level")[0]

    if cnum:
        c_div = mets.div(type_attr=('c' + str(cnum)), label=level)
        cnum_sub = str('0') + str(int(cnum) + 1)
    else:
        c_div = mets.div(type_attr='c', label=level)
        cnum_sub = None

    for elem in parent.findall("./*"):
        if ET.QName(elem.tag).localname in allowed_c_subs:
            ead3_c_div(elem, c_div, filegrp, workspace, cnum=cnum_sub)

    for files in parent.xpath("./ead3:did/*", namespaces=NAMESPACES):
        if ET.QName(files.tag).localname in ['dao', 'daoset']:
            if ET.QName(files.tag).localname == 'daoset':
                tech_file = encode_path(
                    files.xpath("./ead3:dao/@href", namespaces=NAMESPACES)[0])
            else:
                tech_file = encode_path(files.xpath("./@href")[0])
            amdids = get_links_event_agent(workspace, tech_file)
            fileid = add_file_to_filesec(workspace, tech_file, filegrp, amdids)
            dao = mets.fptr(fileid=fileid)
            c_div.append(dao)

    structmap.append(c_div)
示例#9
0
def main(arguments=None):
    """The main method for argparser"""
    args = parse_arguments(arguments)

    # Loop files and create premis objects
    files = collect_filepaths(dirs=args.files, base=args.base_path)
    for filename in files:
        if args.base_path != '':
            filerel = os.path.relpath(filename, args.base_path)
        else:
            filerel = filename

        xmldata = mets.xmldata()
        premis_object = create_premis_object(
            xmldata, filename, args.skip_inspection, args.format_name,
            args.format_version, args.digest_algorithm, args.message_digest,
            args.date_created, args.charset)

        mdwrap = mets.mdwrap('PREMIS:OBJECT', '2.3', child_elements=[xmldata])
        techmd = mets.techmd(encode_id(
            encode_path(filerel, suffix="-techmd.xml")),
                             child_elements=[mdwrap])
        amdsec = mets.amdsec(child_elements=[techmd])
        _mets = mets.mets(child_elements=[amdsec])

        if args.stdout:
            print h.serialize(_mets)

        if not os.path.exists(args.workspace):
            os.makedirs(args.workspace)

        filename = encode_path(filerel, suffix="-techmd.xml")

        with open(os.path.join(args.workspace, filename), 'w+') as outfile:
            outfile.write(h.serialize(_mets))
            print "Wrote METS technical metadata to file %s" % outfile.name

    return 0
def test_import_object_ok(input_file, testpath):

    arguments = ['--workspace', testpath, input_file]
    return_code = import_object.main(arguments)

    output = os.path.join(testpath, encode_path(input_file,
        suffix='-techmd.xml'))

    tree = ET.parse(output)
    root = tree.getroot()

    assert len(root.xpath('/mets:mets/mets:amdSec/mets:techMD', namespaces=NAMESPACES)) == 1

    assert return_code == 0
示例#11
0
    def write_dict(self, file_metadata_dict, premis_amd_id):
        """
        Write streams to a file for further scripts.

        :file_metadata_dict: File metadata dict
        :premis_amd_id: The AMDID of corresponding premis FILE object
        """
        digest = premis_amd_id[1:]
        filename = encode_path("%s-scraper.json" % digest)
        filename = os.path.join(self.workspace, filename)

        if not os.path.exists(filename):
            with open(filename, 'wt') as outfile:
                json.dump(file_metadata_dict, outfile)
            print("Wrote technical data to: %s" % (outfile.name))
示例#12
0
def get_fileid(filesec, path):
    """Find a file with `path` from fileSec. Returns the ID attribute of
    matching file element.

    :param path: path of the file
    :param lxml.etree Element filesec: fileSec element
    :returns: file element identifier
    """
    encoded_path = encode_path(path, safe='/')
    element = filesec.xpath(
        '//mets:fileGrp/mets:file/mets:FLocat[@xlink:href="file://%s"]/..' %
        encoded_path,
        namespaces=NAMESPACES)[0]

    return element.attrib['ID']
def test_import_object_skip_inspection_nodate_ok(input_file, testpath):

    arguments = ['--workspace', testpath, input_file, '--skip_inspection',
                 '--format_name', 'image/dpx', '--format_version', '1.0',
                 '--digest_algorithm', 'MD5', '--message_digest',
                 '1qw87geiewgwe9']
    return_code = import_object.main(arguments)

    output = os.path.join(testpath, encode_path(input_file,
        suffix='-techmd.xml'))

    tree = ET.parse(output)
    root = tree.getroot()

    assert len(root.xpath('/mets:mets/mets:amdSec/mets:techMD',
         namespaces=NAMESPACES)) == 1
    assert return_code == 0
    def get_provenance_ids(self):
        """List identifiers of provenance events.

        Gets list of dataset provenance events from Metax, and reads
        provenance IDs of the events from event.xml files found in the
        workspace directory.

        :returns: list of provenance IDs
        """
        config_object = Configuration(self.config)
        metax_client = Metax(
            config_object.get('metax_url'),
            config_object.get('metax_user'),
            config_object.get('metax_password'),
            verify=config_object.getboolean('metax_ssl_verification'))
        metadata = metax_client.get_dataset(self.dataset_id)
        languages = get_dataset_languages(metadata)

        # Get the reference file path from Luigi task input
        # It already contains the workspace path.
        event_ids = get_md_references(
            read_md_references(
                self.workspace,
                os.path.basename(
                    self.input()['create_provenance_information'].path)))

        event_type_ids = {}
        for event_id in event_ids:
            event_file = event_id[1:] + "-PREMIS%3AEVENT-amd.xml"
            event_file_path = os.path.join(self.sip_creation_path, event_file)
            if not os.path.exists(event_file_path):
                continue
            root = ET.parse(encode_path(event_file_path)).getroot()
            event_type = root.xpath("//premis:eventType",
                                    namespaces=NAMESPACES)[0].text
            event_type_ids[event_type] = event_id

        provenance_ids = []
        for provenance in metadata["research_dataset"]["provenance"]:
            event_type = get_localized_value(
                provenance["preservation_event"]["pref_label"],
                languages=languages)
            provenance_ids += [event_type_ids[event_type]]

        return provenance_ids
def test_import_object_validate_pdf_ok(input_file, testpath):
    arguments = ['--workspace', testpath, 'tests/data/test_import.pdf']
    return_code = import_object.main(arguments)

    output = os.path.join(testpath, encode_path(input_file,
        suffix='-techmd.xml'))

    tree = ET.parse(output)
    root = tree.getroot()

    assert len(root.xpath('/mets:mets/mets:amdSec/mets:techMD',
        namespaces=NAMESPACES)) == 1
    assert root.xpath('//premis:formatName/text()',
        namespaces=NAMESPACES)[0] == 'application/pdf'
    assert root.xpath('//premis:formatVersion/text()',
        namespaces=NAMESPACES)[0] == '1.4'

    assert return_code == 0
def test_import_object_structured_ok(testpath):

    workspace = os.path.abspath(testpath)
    do = os.path.abspath(os.path.join(os.curdir,
                                      'tests/data/structured'))
    test_file = ""
    for element in iterate_files(do):
        arguments = ['--workspace', workspace,
                     os.path.relpath(element, os.curdir)]
        return_code = import_object.main(arguments)
        test_file = os.path.relpath(element, os.curdir)
        output = os.path.join(testpath, encode_path(test_file,
                suffix='-techmd.xml'))

        tree = ET.parse(output)
        root = tree.getroot()

        assert len(root.xpath('/mets:mets/mets:amdSec/mets:techMD',
                  namespaces=NAMESPACES)) == 1
        assert return_code == 0
示例#17
0
def create_premis_event_file(workspace,
                             event_type,
                             event_datetime,
                             event_detail,
                             event_outcome,
                             event_outcome_detail,
                             event_target=None,
                             agent_identifier=None):
    """Creates `<event_type>-event.xml` file. If path to target file is given
    as `event_target` parameter, the URL-encoded path is used as filename
    prefix. The file is METS XML file that contains PREMIS event element inside
    digiprovMD element. The ID attribute of digiprovMD is hashed from the
    filename.

    :param workspace: path to directory where file is created
    :param event_type: PREMIS eventType
    :param event_datetime: PREMIS eventDateTime
    :param event_detail: PREMIS eventDetail
    :param event_outcome: PREMIS eventOutcome
    :param event_outcome_detail: PREMIS eventOutcomeDetail
    :param agent_identifier: PREMIS linkingAgentIdentifierValue
    :param event_target: event target file (for filename)
    :returns: output file path and METS XML element object
    """
    output_filename = '%s-event-amd.xml' % event_type
    if event_target:
        output_filename = '%s-%s' % (event_target, output_filename)
    output_filename = encode_path(output_filename)

    event_id = encode_id(output_filename)

    premis_event_elem = create_premis_event(event_type, event_datetime,
                                            event_detail, event_outcome,
                                            event_outcome_detail,
                                            agent_identifier)

    event_mets = _create_mets(premis_event_elem, event_id, 'PREMIS:EVENT')
    _write_mets(event_mets, os.path.join(workspace, output_filename))

    return (os.path.join(workspace, output_filename), event_mets)
示例#18
0
def _find_event(workspace,
                event_type,
                event_datetime,
                event_detail,
                event_outcome,
                event_outcome_detail):
    """Helper function to find if a similar event already is created
    by using the digest of the metadata to see if a file already
    exist.
    """

    event = create_premis_event(
        event_type=event_type,
        event_datetime=event_datetime,
        event_detail=event_detail,
        event_outcome=event_outcome,
        event_outcome_detail=event_outcome_detail)

    digest = generate_digest(event)
    expected_filename = encode_path("%s-PREMIS:EVENT-amd.xml" % digest)

    return os.path.exists(os.path.join(workspace, expected_filename))
def get_fileid(filesec, path, file_ids=None):
    """Returns the ID for a file. Either finds a file with `path` from
    fileSec or reads the ID from a dict of `path` and `ID`. Returns the
    ID attribute of the matching file element.

    :filesec: fileSec element
    :path: path of the file
    :file_ids: Dict of file paths and file IDs
    :returns: file identifier
    """
    if not file_ids:
        encoded_path = encode_path(path, safe='/')
        element = filesec.xpath(
            '//mets:fileGrp/mets:file/mets:FLocat[@xlink:href="file://%s"]/..'
            % encoded_path,
            namespaces=NAMESPACES)[0]

        fileid = element.attrib['ID']
    else:
        fileid = file_ids[path]

    return fileid
示例#20
0
    def write(self,
              mdtype="OTHER",
              mdtypeversion="8.3",
              othermdtype="ADDML",
              filerel=None,
              section=None,
              stdout=False,
              file_metadata_dict=None):
        """ Write all the METS XML files and md-reference file.
        Base class write is overwritten to handle the references
        correctly and add flatFile fields to METS XML files.

        :returns: None
        """

        for key in self.etrees:
            metadata = self.etrees[key]
            filenames = self.filenames[key]

            # Create METS XML file
            amd_id, amd_fname = \
                self.write_md(metadata, mdtype, mdtypeversion, othermdtype)

            # Add all the files to references
            for filename in filenames:
                self.add_reference(amd_id, filerel if filerel else filename)

            # Append all the flatFile elements to the METS XML file
            append = [
                flat_file_str(encode_path(filename), "ref001")
                for filename in filenames
            ]
            append_lines(amd_fname, "<addml:flatFiles>", append)

        # Write md-references
        self.write_references()

        # Clear filenames and etrees
        self.__init__(self.workspace)
    def run(self):
        """Create a METS document that contains logical structural map.

        Logical structural map is based on dataset metadata retrieved
        from Metax.

        :returns: ``None``
        """
        # Read the generated physical structmap from file
        physical_structmap = ET.parse(
            os.path.join(self.sip_creation_path, 'structmap.xml'))

        # Get dmdsec id from physical_structmap
        dmdsec_id = physical_structmap.getroot()[0][0].attrib['DMDID']

        # Get provenance id's
        provenance_ids = self.get_provenance_ids()

        # Init logical structmap
        logical_structmap = mets.structmap(type_attr='Fairdata-logical')
        mets_structmap = mets.mets(child_elements=[logical_structmap])

        # Create logical structmap
        categories = self.find_file_categories()
        wrapper_div = mets.div(type_attr='logical',
                               dmdid=[dmdsec_id],
                               admid=provenance_ids)
        for category in categories:
            div = mets.div(type_attr=category)
            for filename in categories.get(category):
                fileid = self.get_fileid(encode_path(filename, safe='/'))
                div.append(mets.fptr(fileid))
            wrapper_div.append(div)
        logical_structmap.append(wrapper_div)

        with self.output().open('wb') as output:
            output.write(h.serialize(mets_structmap))
示例#22
0
def main(arguments=None):
    """The main method for premis_event"""
    args = parse_arguments(arguments)

    if args.agent_name:

        _mets = mets.mets()
        amdsec = mets.amdsec()
        _mets.append(amdsec)

        if args.event_target:
            agent_id = encode_id(
                encode_path('%s-%s-agent.xml' %
                            (args.event_target, args.event_type)))
            output_file = os.path.join(
                args.workspace,
                encode_path('%s-%s-agent.xml' %
                            (args.event_target, args.event_type)))
        else:
            agent_id = encode_id(
                encode_path('%s-agent.xml' % (args.event_type)))
            output_file = os.path.join(
                args.workspace,
                encode_path('%s-agent.xml' % (args.event_type)))
        linking_agent_identifier = create_premis_agent(amdsec, agent_id,
                                                       args.agent_name,
                                                       args.agent_type)

        if args.stdout:
            print h.serialize(_mets)

        if not os.path.exists(os.path.dirname(output_file)):
            os.makedirs(os.path.dirname(output_file))

        with open(output_file, 'w+') as outfile:
            outfile.write(h.serialize(_mets))

        print "premis_event created file: %s" % output_file

    else:
        linking_agent_identifier = None

    # Create event
    _mets = mets.mets()
    amdsec = mets.amdsec()
    _mets.append(amdsec)

    if args.event_target:
        event_id = encode_id(
            encode_path('%s-%s-event.xml' %
                        (args.event_target, args.event_type)))
        output_file = os.path.join(
            args.workspace,
            encode_path('%s-%s-event.xml' %
                        (args.event_target, args.event_type)))
    else:
        event_id = encode_id(encode_path('%s-event.xml' % (args.event_type)))
        output_file = os.path.join(
            args.workspace, encode_path('%s-event.xml' % (args.event_type)))

    create_premis_event(amdsec, args.event_type, args.event_datetime,
                        args.event_detail, args.event_outcome,
                        args.event_outcome_detail, linking_agent_identifier,
                        event_id)

    if args.stdout:
        print h.serialize(_mets)

    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'w+') as outfile:
        outfile.write(h.serialize(_mets))

    print "premis_event created file: %s" % output_file

    return 0
示例#23
0
def create_addml_metadata(csv_file,
                          delimiter,
                          isheader,
                          charset,
                          record_separator,
                          quoting_char,
                          flatfile_name=None):
    """Creates ADDML metadata for a CSV file by default
    without flatFile element, which is added by the
    write() method of the AddmlCreator class. This is done to
    avoid getting different hashes for the same metadata,
    but different filename.

    flatFile elements is added if optional parameter
    flatfile_name is provided. csv_file parameter is not
    used as the flatFile element name attribute since that will
    differ from the original filepath e.g. when it is a tmpfile
    downloaded from IDA.

    :csv_file: Path to the CSV file
    :delimiter: Delimiter used in the CSV file
    :isheader: True if CSV has a header else False
    :charset: Charset used in the CSV file
    :record_separator: Char used for separating CSV file fields
    :quoting_char: Quotation char used in the CSV file
    :flatfile_name: flatFile elements name attribute

    :returns: ADDML metadata XML element
    """

    header = csv_header(csv_file, delimiter, isheader)

    description = ET.Element(addml.addml_ns('description'))
    reference = ET.Element(addml.addml_ns('reference'))

    headers = header.split(delimiter)
    field_definitions = addml.wrapper_elems('fieldDefinitions')

    for col in headers:
        elems = addml.definition_elems('fieldDefinition', col, 'String')
        field_definitions.append(elems)

    record_definition = addml.definition_elems('recordDefinition', 'record',
                                               'rdef001', [field_definitions])
    record_definitions = addml.wrapper_elems('recordDefinitions',
                                             [record_definition])

    flat_file_definition = addml.definition_elems('flatFileDefinition',
                                                  'ref001', 'rec001',
                                                  [record_definitions])
    flat_file_definitions = addml.wrapper_elems('flatFileDefinitions',
                                                [flat_file_definition])

    data_type = addml.addml_basic_elem('dataType', 'string')
    field_type = addml.definition_elems('fieldType',
                                        'String',
                                        child_elements=[data_type])
    field_types = addml.wrapper_elems('fieldTypes', [field_type])

    trimmed = ET.Element(addml.addml_ns('trimmed'))
    record_type = addml.definition_elems('recordType',
                                         'rdef001',
                                         child_elements=[trimmed])
    record_types = addml.wrapper_elems('recordTypes', [record_type])

    delim_file_format = addml.delimfileformat(record_separator, delimiter,
                                              quoting_char)
    charset_elem = addml.addml_basic_elem('charset', charset)
    flat_file_type = addml.definition_elems(
        'flatFileType',
        'rec001',
        child_elements=[charset_elem, delim_file_format])
    flat_file_types = addml.wrapper_elems('flatFileTypes', [flat_file_type])

    structure_types = addml.wrapper_elems(
        'structureTypes', [flat_file_types, record_types, field_types])

    if flatfile_name:
        flatfile = addml.definition_elems('flatFile',
                                          encode_path(flatfile_name), 'ref001')
        elems = [flatfile, flat_file_definitions, structure_types]
    else:
        elems = [flat_file_definitions, structure_types]

    flatfiles = addml.wrapper_elems('flatFiles', elems)
    addml_root = addml.addml([description, reference, flatfiles])

    return addml_root