def categorize(group, output, log):
    logging.getLogger().setLevel(loggingMap[log])
    dtx = DigitoolXML(xml_dirname)
    c = Categorize(dtx, output)
    oai_ids = dtx.getList()
    categories[group](oai_ids, dtx, c)
    print(c)
def convert(dspace_admin_passwd, dspace_admin_username, test, run, skip):
    dt = Digitool(digitool_category)
    dt.download_list()
    if skip:
        dtx = DigitoolXML(xml_dirname, skip_missing=True)
    else:
        dtx = DigitoolXML(xml_dirname)
    c = MetadataConvertor()
    ds = Dspace(dspace_admin_username, dspace_admin_passwd)

    if test:
        problems = []
    for record in dt.list[:10]:
        oai_id = dt.get_oai_id(record)
        checked, convertedMetadata, attachements = convertItem(
            oai_id, test, skip)
        if not checked:
            problems.append(oai_id)
        if run:
            ds.new_item(273, converted_metadata,
                        [("lorem-ipsum.pdf", "application/pdf", "Dokument")])
    if test:
        click.clear()
        print("problems", problems)
    ds.logout()
def convertItem(oai_id, test, skip):
    dt = Digitool(digitool_category)
    record = dt.get_item(oai_id)
    if skip:
        dtx = DigitoolXML(xml_dirname, skip_missing=True)
    else:
        dtx = DigitoolXML(xml_dirname)
    c = MetadataConvertor()
    originalMetadata = dt.get_metadata(record)
    if originalMetadata is None:
        if skip:
            return False
        else:
            raise Exception("No metadata in {}".format(oai_id))
    if 'dc' in originalMetadata.keys():  #3112
        convertedMetadataDC = c.convertDC(originalMetadata['dc'], oai_id)
    if 'record' in originalMetadata.keys():  #358, žádný průnik
        convertedMetadataRecord = c.convertRecord(originalMetadata['record'],
                                                  oai_id)
    attachements = list(dtx.get_attachements(str(oai_id) + ".xml"))
    if test:
        click.clear()
        print("converting ", oai_id)
        print("originalMetadata:\n")
        for i in originalMetadata:
            print(i)
        print("convertedMetadata:\n")
        print("attachements:\n")
        print(attachements)
        checked = click.confirm("Is converting OK?", default=True)
        return (checked, convertedMetadataDC, attachements)
    else:
        return (False, convertedMetadataDC, attachements)
示例#4
0
def test_convert():
    oai_ids = Digitool(digitool_category).download_list()
    dtx = DigitoolXML(xml_dirname)
    categorize = Categorize(dtx)
    for oai_id in oai_ids:
        # checked, convertedMetadata, attachements = convertItem(oai_id, False)
        #print(oai_id)
        attachements = list(dtx.get_attachements(oai_id))
def categorize(group, skip):
    #TODO všechny dalši skupiny viz ostatni TODO

    if skip:
        dtx = DigitoolXML(xml_dirname, skip_missing=True)
    else:
        dtx = DigitoolXML(xml_dirname)
    c = Categorize(dtx)
    if group == 'oai':
        dt = Digitool(digitool_category)
        bugs.oai(dt, dtx, c, skip=skip)
    elif group == 'forgot':
        dt = Digitool(digitool_category)
        bugs.forgot_attachements(dt, dtx, c, xml_dirname + "/ls_streams.txt")
    c.print()
def descriptions():
    dt = Digitool(digitool_category)
    dt.download_list()
    if skip:
        dtx = DigitoolXML(xml_dirname, skip_missing=True)
    else:
        dtx = DigitoolXML(xml_dirname)
    c = FilenameConvertor()

    problems = []
    for record in dt.list:
        oai_id = dt.get_oai_id(record)
        attachements = list(dtx.get_attachements(oai_id + ".xml", full=True))
        if skip:
            if len(attachements) == 0:
                continue
        else:
            if len(attachements) == 0:
                raise Exception("No attachement in {}.", format(oai_id))
        descriptions = c.generate_description(attachements)
        if isinstance(descriptions, list):
            continue
        print(descriptions)
def convert(dspace_admin_passwd, dspace_admin_username, run, archive,
            catalogue, log):
    #TODO aleph, weird_attachmement by měli být nulové a ostatní by tak měli zustat
    logging.getLogger().setLevel(loggingMap[log])
    if log == 'error':
        urllib3.disable_warnings()
    dtx = DigitoolXML(xml_dirname)
    oai_ids = dtx.getList()
    categorize = Categorize(dtx)
    if run:
        ds = Dspace(server,
                    dspace_admin_username,
                    dspace_admin_passwd,
                    xml_dirname=xml_dirname)
    records = aleph.openAleph("dtl_2006.xml")
    if catalogue:
        f = open('output/' + server, 'w')

    count = 0
    #for oai_id in oai_ids:
    facultysum = {}
    for oai_id in oai_ids:
        count += 1
        digitoolMetadata = dtx.get_metadata(oai_id)['marc']
        aleph_id = aleph.normalise(digitoolMetadata['001'])
        originalMetadata = records[aleph_id]
        metadataTopic = metadataConvertor.convertMarc(categorize, oai_id,
                                                      originalMetadata)
        convertedMetadata, collection = metadataConvertor.createDC(
            server, categorize, oai_id, metadataTopic, originalMetadata)
        attachements = list(dtx.get_attachements(oai_id))
        fc = filenameConvertor.FilenameConvertor(categorize)
        attachementsDescription = fc.generate_description(oai_id, attachements)

        if collection == None:
            raise Exception('Unknown faculty')

        #if collection == 248:
        #    for row in convertedMetadata['metadata']:
        #        if row['key'] == 'dc.title':
        #            print(row['value'])
        #        if row['key'] == 'dc.description.faculty':
        #            print(row['value'])
        if False:
            for row in convertedMetadata['metadata']:
                print(row)
            print(attachementsDescription)
        if run:
            ds.new_item(collection, convertedMetadata, attachementsDescription)
        if archive:
            createArchive(oai_id, xml_dirname, convertedMetadata,
                          attachementsDescription)
        if catalogue:
            f.write("{} {}\n".format(oai_id, collection))
        #for row in convertedMetadata['metadata']:
        #    if row['key'] == 'dc.description.faculty':
        #        faculty =  row['value']
    #    if faculty not in facultysum:
    #        facultysum[faculty] = 1
    #    else:
    #        facultysum[faculty] +=1
    #    if count % 1000 == 0:
    #        time.sleep(1)
    if run:
        ds.logout()
    if catalogue:
        f.close()

    if facultysum:
        print(facultysum)
示例#8
0
def test_noattachement():
    dtx = DigitoolXML(xml_dirname)
    c = Categorize(dtx)
    oai_ids = Digitool(digitool_category).download_list()
    bugs.no_attachements(oai_ids, dtx, c)
    assert str(c) == '''
示例#9
0
def test_forgot():
    dtx = DigitoolXML(xml_dirname)
    c = Categorize(dtx)
    oai_ids = Digitool(digitool_category).download_list()
    bugs.forgot_attachements(oai_ids, dtx, c, xml_dirname + "/ls_streams.txt")
    assert str(c) == '''
示例#10
0
def test_oai():
    dtx = DigitoolXML(xml_dirname)
    c = Categorize(dtx)
    oai_ids = Digitool(digitool_category).download_list()
    bugs.oai(oai_ids, dtx, c)
    assert str(c) == '''