Exemplo n.º 1
0
def post_store_hook(sip_uuid):
    """
    Hook for doing any work after an AIP is stored successfully.
    """
    elasticSearchFunctions.setup_reading_from_client_conf()
    client = elasticSearchFunctions.get_client()

    # SIP ARRANGEMENT

    # Mark files in this SIP as in an AIP (aip_created)
    file_uuids = models.File.objects.filter(sip=sip_uuid).values_list('uuid', flat=True)
    models.SIPArrange.objects.filter(file_uuid__in=file_uuids).update(aip_created=True)

    # Check if any of component transfers are completely stored
    # TODO Storage service should index AIPs, knows when to update ES
    transfer_uuids = set(models.SIPArrange.objects.filter(file_uuid__in=file_uuids).values_list('transfer_uuid', flat=True))
    for transfer_uuid in transfer_uuids:
        print('Checking if transfer', transfer_uuid, 'is fully stored...')
        arranged_uuids = set(models.SIPArrange.objects.filter(transfer_uuid=transfer_uuid).filter(aip_created=True).values_list('file_uuid', flat=True))
        backlog_uuids = set(models.File.objects.filter(transfer=transfer_uuid).values_list('uuid', flat=True))
        # If all backlog UUIDs have been arranged
        if arranged_uuids == backlog_uuids:
            print('Transfer', transfer_uuid, 'fully stored, sending delete request to storage service, deleting from transfer backlog')
            # Submit delete req to SS (not actually delete), remove from ES
            storage_service.request_file_deletion(
                uuid=transfer_uuid,
                user_id=0,
                user_email='archivematica system',
                reason_for_deletion='All files in Transfer are now in AIPs.'
            )
            elasticSearchFunctions.remove_transfer_files(client, transfer_uuid)

    # DSPACE HANDLE TO ARCHIVESSPACE
    dspace_handle_to_archivesspace(sip_uuid)

    # POST-STORE CALLBACK
    storage_service.post_store_aip_callback(sip_uuid)
os.environ['DJANGO_SETTINGS_MODULE'] = 'settings.common'
sys.path.append("/usr/lib/archivematica/archivematicaCommon")
import elasticSearchFunctions

from elasticsearch import ConnectionError, TransportError

# allow "-f" to override prompt
options = sys.argv[1:]
if len(sys.argv) < 2 or not '-f' in options:
    proceed = raw_input("Are you sure you want to erase the ElasticSearch indexes? (y/N)\n")
    if proceed.lower() != 'y':
        print('Not going to erase the indexes.')
        sys.exit(0)


elasticSearchFunctions.setup_reading_from_client_conf()
client = elasticSearchFunctions.get_client()

try:
    client.info()
except (ConnectionError, TransportError):
    print("Connection error: Elasticsearch may not be running.")
    sys.exit(1)

# delete transfers ElasticSearch index
# Ignore 404, in case the index is missing (e.g. already deleted)
client.indices.delete('transfers', ignore=404)
client.indices.delete('aips', ignore=404)

print("ElasticSearch indexes deleted.")
Exemplo n.º 3
0
def index_aip():
    """ Write AIP information to ElasticSearch. """
    sip_uuid = sys.argv[1]  # %SIPUUID%
    sip_name = sys.argv[2]  # %SIPName%
    sip_path = sys.argv[3]  # %SIPDirectory%
    sip_type = sys.argv[4]  # %SIPType%

    # Check if ElasticSearch is enabled
    client_config_path = '/etc/archivematica/MCPClient/clientConfig.conf'
    config = ConfigParser.SafeConfigParser()
    config.read(client_config_path)
    elastic_search_disabled = False
    try:
        elastic_search_disabled = config.getboolean(
            'MCPClient', "disableElasticsearchIndexing")
    except ConfigParser.NoOptionError:
        pass
    if elastic_search_disabled:
        print('Skipping indexing: indexing is currently disabled in',
              client_config_path)
        return 0

    elasticSearchFunctions.setup_reading_from_client_conf(config)
    client = elasticSearchFunctions.get_client()

    print('SIP UUID:', sip_uuid)
    aip_info = storage_service.get_file_info(uuid=sip_uuid)
    print('AIP info:', aip_info)
    aip_info = aip_info[0]

    mets_name = 'METS.{}.xml'.format(sip_uuid)
    mets_path = os.path.join(sip_path, mets_name)

    identifiers = get_identifiers(sip_path)

    # If this is an AIC, find the number of AIP stored in it and index that
    aips_in_aic = None
    if sip_type == "AIC":
        try:
            uv = UnitVariable.objects.get(unittype="SIP",
                                          unituuid=sip_uuid,
                                          variable="AIPsinAIC")
            aips_in_aic = uv.variablevalue
        except UnitVariable.DoesNotExist:
            pass

    print('Indexing AIP info')
    # Delete ES index before creating new one if reingesting
    if 'REIN' in sip_type:
        print('Deleting outdated entry for AIP and AIP files with UUID',
              sip_uuid, 'from archival storage')
        elasticSearchFunctions.delete_aip(client, sip_uuid)
        elasticSearchFunctions.delete_aip_files(client, sip_uuid)

    # Index AIP
    elasticSearchFunctions.index_aip(client,
                                     sip_uuid,
                                     sip_name,
                                     aip_info['current_full_path'],
                                     mets_path,
                                     size=aip_info['size'],
                                     aips_in_aic=aips_in_aic,
                                     identifiers=identifiers)

    # Index AIP files
    print('Indexing AIP files')
    # Even though we treat MODS identifiers as SIP-level, we need to index them
    # here because the archival storage tab actually searches on the
    # aips/aipfile index.
    exitCode = elasticSearchFunctions.index_files(
        client,
        index='aips',
        type_='aipfile',
        uuid=sip_uuid,
        pathToArchive=sip_path,
        identifiers=identifiers,
        sipName=sip_name,
    )
    if exitCode == 1:
        print('Error indexing AIP files', file=sys.stderr)
        return 1

    return 0