示例#1
0
 def all():
     'Find all Audio objects by content model within the configured pidspace.'
     search_opts = {
         'type': AudioObject,
         # restrict to objects in configured pidspace
         'pid__contains': '%s:*' % settings.FEDORA_PIDSPACE,
         # restrict by cmodel in dc:format
         'format__contains': AudioObject.AUDIO_CONTENT_MODEL,
     }
     repo = Repository()
     return repo.find_objects(**search_opts)
示例#2
0
def largefile_ingest(request):
    '''Large-file ingest.  On GET, displays a form allowing user to
    select a BagIt that has been uploaded to the configured large-file
    ingest staging area for ingest and association with a collection.
    '''
    # ingest content from upload staging area

    context = {}
    template_name = 'file/largefile_ingest.html'
    form = None

    # on POST, process the form and ingest if valid
    if request.method == 'POST':
        form = LargeFileIngestForm(request.POST)

        # if form is not valid, add to context for redisplay with errors
        if not form.is_valid():
            context['form'] = form

        # otherwise, process the form
        else:
            repo = Repository(request=request)

            # Get collection & check for optional comment
            collection = repo.get_object(pid=form.cleaned_data['collection'],
                                         type=CollectionObject)
            # get user comment if any; default to a generic ingest comment
            comment = form.cleaned_data['comment'] or 'initial repository ingest'
            bag = form.cleaned_data['bag']

            # create dict with file info to add success/failure info
            file_info = {'label': os.path.basename(bag)}

            #assuming type of ingest from subdirectory
            type = bag.split('/')[-2]
            try:

                if type == 'diskimage':
                    obj = DiskImage.init_from_bagit(bag, request)

                elif type == 'video':
                    obj = Video.init_from_bagit(bag, request)

                # set collection on ingest
                obj.collection = collection

                ## NOTE: Due to a bug in Fedora 3.4 with checksums and
                ## and file uri ingest, the content datastream checksum
                ## must be cleared before ingest; manually check it
                ## after ingest to confirm Fedora calculated what we expect.
                ## This work-around can be removed once we upgrade to Fedora 3.6

                # store datastream checksum that would be sent to fedora
                checksum = obj.content.checksum
                obj._content_checksum = checksum
                # clear it out so Fedora can ingest without erroring
                obj.content.checksum = None

                # file URIs also used for supplemental files; needs
                # to be handled the same way as content datastream
                # - look for any supplementN datastreams, store checksum, and remove
                supplemental_checksums = {}
                for i in range(20):
                    try:
                        dsid = 'supplement%d' % i
                        dsobj = getattr(obj, dsid)
                        supplemental_checksums[dsid] = dsobj.checksum
                        dsobj.checksum = None
                    except AttributeError:
                        # stop iterating - we have found last supplemental file
                        break

                # same for access copy checksum on Video files
                if type == 'video':
                    access_checksum = obj.access_copy.checksum
                    obj.access_copy.checksum = None
                
                pids_exists = []
                if type == 'video':
                    pids_exists = repo.find_objects(type=Video, label=obj.label)
                
                if type == 'diskimage':
                    pids_exists = repo.find_objects(type=DiskImage, label=obj.label)

                exists = 0
                for pid in pids_exists:
                    if pid.pid:
                        exists += 1

                if exists == 0:
                    obj.save(comment)
                else:
                    raise ValueError('Duplicate content detected.')


                # remove the ingested bag from large-file staging area
                shutil.rmtree(bag)

                # re-init to allow checking fedora-calculated checksums on
                # supplemental datastreams
                if type == 'diskimage':
                    obj = repo.get_object(obj.pid, type=DiskImage)
                elif type == 'video':
                    obj = repo.get_object(obj.pid, type=Video)

                # if save succeded (no exceptions), set summary info for display
                file_info.update({'type' : type, 'success': True,
                                  'pid': obj.pid, 'url': obj.get_absolute_url(),
                                  'checksum': obj.content.checksum})
                if type == 'video':
                    file_info['access_checksum'] = obj.access_copy.checksum

                # compare checksum generated by Fedora
                # (required because of file uri bug in fedora 3.4;
                #  this can be removed once we upgrade to fedora 3.6+)
                checksum_errors = []

                if obj.content.checksum != checksum:
                    checksum_errors.append('content')

                for dsid, checksum in supplemental_checksums.iteritems():
                    dsobj = obj.getDatastreamObject(dsid)
                    if dsobj.checksum != checksum:
                        checksum_errors.append(dsid)

                if type == 'video' and obj.access_copy.checksum != access_checksum:
                    checksum_errors.append('access_copy')

                if checksum_errors:
                    message = 'Checksum mismatch%s detected on ' + \
                       '%s datastream%s; please contact a repository administrator.'''
                    file_info['message'] = message % (
                        'es' if len(checksum_errors) > 1 else '',
                        ', '.join(checksum_errors),
                        's' if len(checksum_errors) > 1 else ''
                    )

            except bagit.BagValidationError as err:
                logger.error(err)
                file_info.update({'success': False, 'message': 'BagIt error: %s' % err})

            # special case: detected as duplicate content
            except DuplicateContent as e:
                # mark as failed and generate message with links to records
                # NOTE: pid url is duplicated logic from web upload view...
                links = []
                for pid in e.pids:
                    # use fedora type-inferring logic with list of content models
                    # pulled from solr results
                    obj = repo.get_object(pid,
                        type=repo.best_subtype_for_object(pid, e.pid_cmodels[pid]))
                    # use appropriate object class to get the object url
                    links.append('<a href="%s">%s</a>' % (
                        obj.get_absolute_url(), pid)
                    )
                msg = mark_safe('%s: %s' % (unicode(e), '; '.join(links)))
                file_info.update({
                    'success': False,
                    'message': msg
                })

            except Exception as err:
                logger.error('Error: %s' % err)
                file_info.update({'success': False, 'message': '%s' % err})

            # report success/failure in the same format as web-upload ingest
            context['ingest_results'] = [file_info]
            messages.success(request, 'Ingest results: %s' % file_info)
            return HttpResponseRedirect("/admin")

    # on GET display form to select item(s) for ingest
    # OR on completed valid form post
    files = large_file_uploads()
    if request.method == 'GET' or \
      form is not None and form.is_valid():
        if len(files):
            context['form'] = LargeFileIngestForm()
        else:
            # indicator that no files are available for ingest
            context['no_files'] = True

    return TemplateResponse(request, template_name, context)
示例#3
0
class Command(BaseCommand):
    '''Migrates old-style Rushdie objects to new-style. This includes adding
    The objects to a SimpleCollection, converting old datastreams to new datastreams
    and associating each object to the main collection'''
    def get_password_option(option, opt, value, parser):
        setattr(parser.values, option.dest, getpass())

    #Set up additional options
    option_list = BaseCommand.option_list + (
        make_option('--noact',
                    '-n',
                    action='store_true',
                    dest='no-act',
                    default=False,
                    help='Do not do anything'),
        make_option(
            '--simple-collection-step',
            '-S',
            action='store_true',
            dest='simple-collection-step',
            default=False,
            help=
            'Only run the step to collect objects into a SimpleCollection.  \
            If Simple Collection exists it will use  the existing one'),
        make_option('--datastreams',
                    '-D',
                    action='store_true',
                    dest='datastreams-step',
                    default=False,
                    help='Only run the step to convert datastreams'),
        make_option('--master-collection-pid',
                    '-m',
                    action='store',
                    dest='master-collection-pid',
                    default="",
                    help='Pid of the Master Collection'),
        make_option('--simple-collection',
                    '-s',
                    action='store',
                    dest='simple-collection',
                    default="",
                    help='Label of the SimpleCollection'),
        make_option('--username',
                    '-u',
                    dest='username',
                    action='store',
                    help='''Username to connect to fedora'''),
        make_option(
            '--password',
            dest='password',
            action='callback',
            callback=get_password_option,
            help='''Prompt for password required when username used'''),
    )

    help = __doc__

    def _create_series_lookup(self):
        #series / subseries info
        series = {}

        #exist query params
        return_fields = ['eadid']
        search_fields = {'eadid': 'rushdie1000'}

        queryset = Series.objects.also(*return_fields).filter(**search_fields)
        for s in queryset:
            #series info
            series[s.title] = {}
            series[s.title]['series_info'] = {}
            series[s.title]['series_info']['id'] = s.id
            series[s.title]['series_info']['short_id'] = s.short_id
            series[s.title]['series_info']['base_ark'] = s.eadid.url
            series[s.title]['series_info']['uri'] = "https://findingaids.library.emory.edu/documents/%s/%s" % \
                (s.eadid.value, s.short_id)
            #subseries info
            if s.subseries:
                series[s.title]['subseries_info'] = {}
                for sub in s.subseries:
                    series[s.title]['subseries_info'][sub.title] = {}
                    series[s.title]['subseries_info'][sub.title]['id'] = sub.id
                    series[s.title]['subseries_info'][
                        sub.title]['short_id'] = sub.short_id
                    series[s.title]['subseries_info'][
                        sub.title]['base_ark'] = s.eadid.url
                    series[s.title]['subseries_info'][sub.title]['uri'] = "https://findingaids.library.emory.edu/documents/%s/%s/%s" % \
                    (s.eadid.value, s.short_id, sub.short_id)
        return series

    def _get_unique_objects(self, args):
        all_objs = []
        #if pids specified  only get thoes objects
        if args:
            pids = set(args)
            for pid in pids:
                try:
                    obj = self.repo.get_object(pid=pid, type=ArrangementObject)
                    if obj.exists:
                        all_objs.append(obj)
                    else:
                        if self.verbosity > self.v_none:
                            self.stdout.write("pid %s does not exist\n" %
                                              (pid))

                except Exception as e:
                    if self.verbosity > self.v_none:
                        self.stdout.write("Error getting pid %s : %s\n" %
                                          (pid, e))
        else:
            pids = set()
            #lookup all rushdie pids
            for cm in CONTENT_MODELS:
                try:
                    objs = self.repo.get_objects_with_cmodel(cm)
                except Exception as e:
                    if self.verbosity > self.v_none:
                        self.stdout.write(
                            "Error getting pids with ContentModle %s : %s\n" %
                            (cm, e))
                for obj in objs:
                    try:
                        pids.add(obj.pid)
                    except Exception as e:
                        if self.verbosity > self.v_none:
                            self.stdout.write("Error accessing pid %s : %s\n" %
                                              (obj.pid, e))
            for pid in pids:
                try:
                    all_objs.append(
                        self.repo.get_object(pid=pid, type=ArrangementObject))
                except Exception as e:
                    if self.verbosity > self.v_none:
                        self.stdout.write("Error getting pid  %s : %s\n" %
                                          (pid, e))

        return all_objs

    #Adds pids to sc (SimpleCollection) using a hasMember relation on the SimpleCollection
    def _add_to_simple_collection(self, obj):
        #logic to add to simple collection
        if self.verbosity > self.v_normal:
            self.stdout.write(
                "Adding %s to SimpleCollection %s using hasMember relation\n" %
                (obj.pid, self.simple_collection.label))
        relation = (self.simple_collection.uriref, relsextns.hasMember,
                    obj.uriref)
        self.simple_collection.rels_ext.content.add(relation)

#   Converts the datastreams of the object to new-style

    def _convert_ds(self, obj, master, series_lookup, noact):
        #convert MARBL-MACTECH to FilemasterTech
        mm = obj.getDatastreamObject("MARBL-MACTECH")
        if mm:
            if self.verbosity > self.v_none:
                self.stdout.write("Converting MARBL-MACTECH\n")
            etree = ElementTree.fromstring(mm.content)
            ns = 'info:fedora/emory-control:Rushdie-MacFsData-1.0'

            md5 = etree.find('.//{%s}md5' % ns)
            files = etree.findall('.//{%s}file' % ns)

            for i, file in enumerate(files):

                computer = file.find('.//{%s}computer' % ns)
                path = file.find('.//{%s}path' % ns)
                rawpath = file.find('.//{%s}rawpath' % ns)
                attrib = file.find('.//{%s}attributes' % ns)
                created = file.find('.//{%s}created' % ns)
                modified = file.find('.//{%s}modified' % ns)
                type = file.find('.//{%s}type' % ns)
                creator = file.find('.//{%s}creator' % ns)

                #Make new file section
                obj.filetech.content.file.append(FileMasterTech_Base())
                obj.filetech.content.file[
                    i].md5 = md5.text if md5 is not None else ""
                obj.filetech.content.file[
                    i].computer = computer.text if computer is not None else ""
                obj.filetech.content.file[
                    i].path = path.text if path is not None else ""
                obj.filetech.content.file[
                    i].rawpath = rawpath.text if rawpath is not None else ""
                obj.filetech.content.file[
                    i].attributes = attrib.text if attrib is not None else ""
                obj.filetech.content.file[
                    i].created = created.text if created is not None else ""
                obj.filetech.content.file[
                    i].modified = modified.text if modified is not None else ""
                obj.filetech.content.file[
                    i].type = type.text if type is not None else ""
                obj.filetech.content.file[
                    i].creator = creator.text if creator is not None else ""

                obj.dc.content.title = obj.filetech.content.file[
                    0].path.rpartition("/")[2]
                obj.label = obj.filetech.content.file[0].path.rpartition(
                    "/")[2]

            if not noact:
                obj.api.purgeDatastream(obj.pid, "MARBL-MACTECH")
                if self.verbosity > self.v_normal:
                    self.stdout.write("Removed MARBL-MACTECH\n")
            else:
                if self.verbosity > self.v_normal:
                    self.stdout.write("TEST Removed MARBL-MACTECH\n")

        #convert MARBL-ANALYSIS to MODS
        ma = obj.getDatastreamObject("MARBL-ANALYSIS")
        if ma:
            if self.verbosity > self.v_none:
                self.stdout.write("Converting MARBL-ANALYSIS\n")
            etree = ElementTree.fromstring(ma.content)
            ns = 'info:fedora/emory-control:Rushdie-MarblAnalysis-1.0'
            series = etree.find('.//{%s}series' % ns)
            series = series.text if series is not None else ""
            subseries = etree.find('.//{%s}subseries' % ns)
            subseries = subseries.text if subseries is not None else ""
            verdict = etree.find('.//{%s}verdict' % ns)
            verdict = verdict.text if verdict is not None else ""

            #Translate verdict to code to store in Rights
            status_code_map = {
                "META": "13",
                "VIRTUAL": "2",
                "EMULATION ONLY": "2",
                "EMULATION": "2",
                "AS IS": "2",
                "RESTRICTED": "4",
                "REDACTED": "12"
            }
            try:
                code = status_code_map.get(verdict.upper(),
                                           "") if verdict else ""
                obj.rights.content.create_access_status()
                obj.rights.content.access_status.code = code
            except KeyError:
                pass

            #Map series and sub series

            #Specal Cases for identifying series info
            if series == "Writings":
                series = "Writings by Rushdie"

            if "Correspondence" in series:
                series = "Correspondence"

            if subseries == "Family papers":
                series = "Personal papers"

            if subseries == "Other Writings":
                series = "Writings by Rushdie"

            if subseries == "Non Fiction":
                subseries = "Nonfiction"

            if "Journal" in series or "Journal" in subseries:
                series = "Journals, appointment books, and notebooks"

            if series in series_lookup and \
                "subseries_info" in series_lookup[series] and \
                subseries in series_lookup[series]["subseries_info"]:

                obj.mods.content.create_series()
                obj.mods.content.series.create_series()

                obj.mods.content.series.title = subseries
                obj.mods.content.series.uri = series_lookup[series][
                    "subseries_info"][subseries]["uri"]
                obj.mods.content.series.base_ark = series_lookup[series][
                    "subseries_info"][subseries]["base_ark"]
                obj.mods.content.series.full_id = series_lookup[series][
                    "subseries_info"][subseries]["id"]
                obj.mods.content.series.short_id = series_lookup[series][
                    "subseries_info"][subseries]["short_id"]

                obj.mods.content.series.series.title = series
                obj.mods.content.series.series.uri = series_lookup[series][
                    "series_info"]["uri"]
                obj.mods.content.series.series.base_ark = series_lookup[
                    series]["series_info"]["base_ark"]
                obj.mods.content.series.series.full_id = series_lookup[series][
                    "series_info"]["id"]
                obj.mods.content.series.series.short_id = series_lookup[
                    series]["series_info"]["short_id"]

            elif series in series_lookup and subseries:
                obj.mods.content.create_series()
                obj.mods.content.series.create_series()

                obj.mods.content.series.title = subseries

                obj.mods.content.series.series.title = series
                obj.mods.content.series.series.uri = series_lookup[series][
                    "series_info"]["uri"]
                obj.mods.content.series.series.base_ark = series_lookup[
                    series]["series_info"]["base_ark"]
                obj.mods.content.series.series.full_id = series_lookup[series][
                    "series_info"]["id"]
                obj.mods.content.series.series.short_id = series_lookup[
                    series]["series_info"]["short_id"]

            elif series in series_lookup:
                obj.mods.content.create_series()

                obj.mods.content.series.title = series
                obj.mods.content.series.uri = series_lookup[series][
                    "series_info"]["uri"]
                obj.mods.content.series.base_ark = series_lookup[series][
                    "series_info"]["base_ark"]
                obj.mods.content.series.full_id = series_lookup[series][
                    "series_info"]["id"]
                obj.mods.content.series.short_id = series_lookup[series][
                    "series_info"]["short_id"]

            else:
                if series and subseries:
                    obj.mods.content.create_series()
                    obj.mods.content.series.create_series()

                    obj.mods.content.series.title = subseries

                    obj.mods.content.series.series.title = series

                elif series:
                    obj.mods.content.create_series()
                    obj.mods.content.series.title = series

            #Remove Datastreams
            if not noact:
                obj.api.purgeDatastream(obj.pid, "MARBL-ANALYSIS")
                if self.verbosity > self.v_normal:
                    self.stdout.write("Removed MARBL-ANALYSIS\n")
            else:
                if self.verbosity > self.v_normal:
                    self.stdout.write("TEST Removed MARBL-ANALYSIS\n")

#        Add  Arrangement Relation
        relation = (obj.uriref, model.hasModel,
                    URIRef("info:fedora/emory-control:Arrangement-1.0"))
        obj.rels_ext.content.add(relation)

        #Add  relation to master collection
        relation = (obj.uriref, relsextns.isMemberOf, master.uriref)
        obj.rels_ext.content.add(relation)

        #Add Content Model based on Rights
        allowed = (
            obj.uriref, model.hasModel,
            URIRef("info:fedora/emory-control:ArrangementAccessAllowed-1.0"))
        restricted = (
            obj.uriref, model.hasModel,
            URIRef("info:fedora/emory-control:ArrangementAccessRestricted-1.0")
        )

        if getattr(obj.rights.content.access_status, "code", None) == "2":
            obj.rels_ext.content.add(allowed)
        elif getattr(obj.rights.content.access_status, "code", None):
            obj.rels_ext.content.add(restricted)

        return obj

    def handle(self, *args, **options):
        #setup verbosity
        #0 = none, 1 = normal, 2 = all
        self.v_none = 0
        self.v_normal = 1

        if 'verbosity' in options:
            self.verbosity = int(options['verbosity'])
        else:
            self.verbosity = self.v_normal

        #Create the repo
        repo_args = {}
        if options.get('username') is not None:
            repo_args['username'] = options.get('username')
        if options.get('password') is not None:
            repo_args['password'] = options.get('password')
        self.repo = Repository(**repo_args)

        #Check options

        #if no steps are specified then run all steps
        if not options["simple-collection-step"] and not options[
                "datastreams-step"]:
            options["simple-collection-step"] = True
            options["datastreams-step"] = True

        #This step requires simeple collection Label
        if options["simple-collection-step"]:
            if not options["simple-collection"]:
                raise CommandError(
                    "When running SimpleCollection step SimpleCollection Label is required"
                )
            else:
                #lookup Simplecollection
                try:
                    sc_list = list(
                        self.repo.find_objects(
                            label__exact=options["simple-collection"],
                            type=SimpleCollection))
                    if len(sc_list
                           ) > 1:  # something is wrong need to investigate
                        raise CommandError(
                            "More than one SimpleCollection with Label %s exists"
                            % options["simple-collection"])
                    elif len(
                            sc_list) == 1:  # use this as the simple collection
                        self.simple_collection = sc_list[0]
                    elif len(sc_list) == 0:  # create new simple collection
                        self.simple_collection = self.repo.get_object(
                            type=SimpleCollection)
                        self.simple_collection.label = options[
                            "simple-collection"]
                        self.simple_collection.dc.content.title = options[
                            "simple-collection"]
                        self.simple_collection.mods.content.create_restrictions_on_access(
                        )
                        self.simple_collection.mods.content.restrictions_on_access.text = "Accessioned"
                except Exception as e:
                    if not isinstance(e, CommandError):
                        raise CommandError(
                            "Could not obtain requested SimpleCollection %s : %s"
                            % (options["simple-collection"], e))
                    else:
                        raise e

        if options["datastreams-step"]:
            if not options["master-collection-pid"]:
                raise CommandError(
                    "When running Datastream step Master collection pid is required"
                )
            else:
                try:
                    self.master_collection = self.repo.get_object(
                        pid=options["master-collection-pid"],
                        type=CollectionObject)
                    if not self.master_collection.exists:
                        raise CommandError("Master Collection %s does not exist" \
                            % options["master-collection-pid"])
                except Exception as e:
                    raise CommandError("Could not obtain requested Master Collection %s : %s" \
                        % (options["master-collection-pid"], e))

        #Create lookup for series
        series_lookup = self._create_series_lookup()

        #All objects to be migrated
        self.all_objs = self._get_unique_objects(args)

        #Process each object
        for obj in self.all_objs:
            if self.verbosity > self.v_none:
                self.stdout.write("Processing %s\n" % (obj.pid))

            if options["simple-collection-step"]:
                self._add_to_simple_collection(obj)

            if options["datastreams-step"]:
                obj = self._convert_ds(obj, self.master_collection,
                                       series_lookup, options["no-act"])
                if self.verbosity > self.v_normal:
                    self.stdout.write("===FilemasterTech===\n")
                    self.stdout.write("%s\n" %
                                      (obj.filetech.content.serialize()))
                    self.stdout.write("=== RELS-EXT===\n")
                    for entry in obj.rels_ext.content:
                        self.stdout.write("%s\n" % list(entry))
                    self.stdout.write("===Rights===\n")
                    self.stdout.write("%s\n" %
                                      (obj.rights.content.serialize()))
                    self.stdout.write("===Mods===\n")
                    self.stdout.write("%s\n" % (obj.mods.content.serialize()))

            #Save object
            obj.owner = "thekeep-project"
            if self.verbosity > self.v_normal:
                self.stdout.write("owner:%s\n" % obj.owner)
            if not options["no-act"]:
                obj.save()
                if self.verbosity > self.v_none:
                    self.stdout.write("Saving %s\n" % obj.pid)
            else:
                if self.verbosity > self.v_none:
                    self.stdout.write("TEST Saving Object\n")

        #Print RELS-EXT forSimple Collection
        if options["simple-collection-step"]:
            if self.verbosity > self.v_normal:
                self.stdout.write("===RELS-EXT===\n")
                for entry in self.simple_collection.rels_ext.content:
                    self.stdout.write("%s\n" % list(entry))
                self.stdout.write("===DC===\n")
                self.stdout.write(
                    "%s\n" % self.simple_collection.dc.content.serialize())
                self.stdout.write("===MODS===\n")
                self.stdout.write(
                    "%s\n" % self.simple_collection.mods.content.serialize())

            #Save SimpleCollection
            if not options["no-act"]:
                self.simple_collection.save()
                if self.verbosity > self.v_none:
                    self.stdout.write("Saved %s(%s)\n" % \
                        (self.simple_collection.label, self.simple_collection.pid))
            else:
                if self.verbosity > self.v_none:
                    self.stdout.write("Test saving %s(%s)\n" %
                                      (self.simple_collection.label,
                                       self.simple_collection.pid))
class Command(BaseCommand):
    '''Migrates old-style Rushdie objects to new-style. This includes adding
    The objects to a SimpleCollection, converting old datastreams to new datastreams
    and associating each object to the main collection'''

    def get_password_option(option, opt, value, parser):
        setattr(parser.values, option.dest, getpass())

    #Set up additional options
    option_list = BaseCommand.option_list + (
        make_option('--noact', '-n',
            action='store_true',
            dest='no-act',
            default=False,
            help='Do not do anything'),
        make_option('--simple-collection-step', '-S',
            action='store_true',
            dest='simple-collection-step',
            default=False,
            help='Only run the step to collect objects into a SimpleCollection.  \
            If Simple Collection exists it will use  the existing one'),
        make_option('--datastreams', '-D',
            action='store_true',
            dest='datastreams-step',
            default=False,
            help='Only run the step to convert datastreams'),
        make_option('--master-collection-pid', '-m',
            action='store',
            dest='master-collection-pid',
            default="",
            help='Pid of the Master Collection'),
        make_option('--simple-collection', '-s',
            action='store',
            dest='simple-collection',
            default="",
            help='Label of the SimpleCollection'),
        make_option('--username', '-u',
            dest='username',
            action='store',
            help='''Username to connect to fedora'''),
        make_option('--password',
            dest='password',
            action='callback', callback=get_password_option,
            help='''Prompt for password required when username used'''),
    )

    help = __doc__

    def _create_series_lookup(self):
        #series / subseries info
        series = {}

        #exist query params
        return_fields = ['eadid']
        search_fields = {'eadid': 'rushdie1000'}

        queryset = Series.objects.also(*return_fields).filter(**search_fields)
        for s in queryset:
            #series info
            series[s.title] = {}
            series[s.title]['series_info'] = {}
            series[s.title]['series_info']['id'] = s.id
            series[s.title]['series_info']['short_id'] = s.short_id
            series[s.title]['series_info']['base_ark'] = s.eadid.url
            series[s.title]['series_info']['uri'] = "https://findingaids.library.emory.edu/documents/%s/%s" % \
                (s.eadid.value, s.short_id)
            #subseries info
            if s.subseries:
                series[s.title]['subseries_info'] = {}
                for sub in s.subseries:
                    series[s.title]['subseries_info'][sub.title] = {}
                    series[s.title]['subseries_info'][sub.title]['id'] = sub.id
                    series[s.title]['subseries_info'][sub.title]['short_id'] = sub.short_id
                    series[s.title]['subseries_info'][sub.title]['base_ark'] = s.eadid.url
                    series[s.title]['subseries_info'][sub.title]['uri'] = "https://findingaids.library.emory.edu/documents/%s/%s/%s" % \
                    (s.eadid.value, s.short_id, sub.short_id)
        return series

    def _get_unique_objects(self, args):
        all_objs = []
        #if pids specified  only get thoes objects
        if args:
            pids = set(args)
            for pid in pids:
                try:
                    obj = self.repo.get_object(pid=pid, type=ArrangementObject)
                    if obj.exists:
                        all_objs.append(obj)
                    else:
                        if self.verbosity > self.v_none:
                            self.stdout.write("pid %s does not exist\n" % (pid))

                except Exception as e:
                    if self.verbosity > self.v_none:
                        self.stdout.write("Error getting pid %s : %s\n" % (pid, e))
        else:
            pids = set()
            #lookup all rushdie pids
            for cm in CONTENT_MODELS:
                try:
                    objs = self.repo.get_objects_with_cmodel(cm)
                except Exception as e:
                    if self.verbosity > self.v_none:
                        self.stdout.write("Error getting pids with ContentModle %s : %s\n" % (cm, e))
                for obj in objs:
                    try:
                        pids.add(obj.pid)
                    except Exception as e:
                        if self.verbosity > self.v_none:
                            self.stdout.write("Error accessing pid %s : %s\n" % (obj.pid, e))
            for pid in pids:
                try:
                    all_objs.append(self.repo.get_object(pid=pid, type=ArrangementObject))
                except Exception as e:
                    if self.verbosity > self.v_none:
                        self.stdout.write("Error getting pid  %s : %s\n" % (pid, e))

        return all_objs

    #Adds pids to sc (SimpleCollection) using a hasMember relation on the SimpleCollection
    def _add_to_simple_collection(self,  obj):
        #logic to add to simple collection
        if self.verbosity > self.v_normal:
            self.stdout.write("Adding %s to SimpleCollection %s using hasMember relation\n" % (obj.pid, self.simple_collection.label))
        relation = (self.simple_collection.uriref, relsextns.hasMember, obj.uriref)
        self.simple_collection.rels_ext.content.add(relation)

#   Converts the datastreams of the object to new-style
    def _convert_ds(self, obj, master, series_lookup, noact):
        #convert MARBL-MACTECH to FilemasterTech
        mm = obj.getDatastreamObject("MARBL-MACTECH")
        if mm:
            if self.verbosity > self.v_none:
                self.stdout.write("Converting MARBL-MACTECH\n")
            etree = ElementTree.fromstring(mm.content)
            ns = 'info:fedora/emory-control:Rushdie-MacFsData-1.0'

            md5 = etree.find('.//{%s}md5' % ns)
            files = etree.findall('.//{%s}file' % ns)

            for i, file in enumerate(files):

                computer = file.find('.//{%s}computer' % ns)
                path = file.find('.//{%s}path' % ns)
                rawpath = file.find('.//{%s}rawpath' % ns)
                attrib = file.find('.//{%s}attributes' % ns)
                created = file.find('.//{%s}created' % ns)
                modified = file.find('.//{%s}modified' % ns)
                type = file.find('.//{%s}type' % ns)
                creator = file.find('.//{%s}creator' % ns)

                #Make new file section
                obj.filetech.content.file.append(FileMasterTech_Base())
                obj.filetech.content.file[i].md5 = md5.text if md5 is not None else ""
                obj.filetech.content.file[i].computer = computer.text if computer is not None else ""
                obj.filetech.content.file[i].path = path.text if path is not None else ""
                obj.filetech.content.file[i].rawpath = rawpath.text if rawpath is not None else ""
                obj.filetech.content.file[i].attributes = attrib.text if attrib is not None else ""
                obj.filetech.content.file[i].created = created.text if created is not None else ""
                obj.filetech.content.file[i].modified = modified.text if modified is not None else ""
                obj.filetech.content.file[i].type = type.text if type is not None else ""
                obj.filetech.content.file[i].creator = creator.text if creator is not None else ""

                obj.dc.content.title = obj.filetech.content.file[0].path.rpartition("/")[2]
                obj.label = obj.filetech.content.file[0].path.rpartition("/")[2]

            if not noact:
                obj.api.purgeDatastream(obj.pid, "MARBL-MACTECH")
                if self.verbosity > self.v_normal:
                    self.stdout.write("Removed MARBL-MACTECH\n")
            else:
                if self.verbosity > self.v_normal:
                    self.stdout.write("TEST Removed MARBL-MACTECH\n")

        #convert MARBL-ANALYSIS to MODS
        ma = obj.getDatastreamObject("MARBL-ANALYSIS")
        if ma:
            if self.verbosity > self.v_none:
                self.stdout.write("Converting MARBL-ANALYSIS\n")
            etree = ElementTree.fromstring(ma.content)
            ns = 'info:fedora/emory-control:Rushdie-MarblAnalysis-1.0'
            series = etree.find('.//{%s}series' % ns)
            series = series.text if series is not None else ""
            subseries = etree.find('.//{%s}subseries' % ns)
            subseries = subseries.text if subseries is not None else ""
            verdict = etree.find('.//{%s}verdict' % ns)
            verdict = verdict.text if verdict is not None else ""

            #Translate verdict to code to store in Rights
            status_code_map = {
                "META": "13",
                "VIRTUAL": "2",
                "EMULATION ONLY": "2",
                "EMULATION": "2",
                "AS IS": "2",
                "RESTRICTED": "4",
                "REDACTED": "12"
            }
            try:
                code = status_code_map.get(verdict.upper(), "") if verdict else ""
                obj.rights.content.create_access_status()
                obj.rights.content.access_status.code = code
            except KeyError:
                pass

            #Map series and sub series

            #Specal Cases for identifying series info
            if series == "Writings":
                series = "Writings by Rushdie"

            if "Correspondence" in series:
                series = "Correspondence"

            if subseries == "Family papers":
                series = "Personal papers"

            if subseries == "Other Writings":
                series = "Writings by Rushdie"

            if subseries == "Non Fiction":
                subseries = "Nonfiction"

            if "Journal" in series or "Journal" in subseries:
                series = "Journals, appointment books, and notebooks"

            if series in series_lookup and \
                "subseries_info" in series_lookup[series] and \
                subseries in series_lookup[series]["subseries_info"]:

                obj.mods.content.create_series()
                obj.mods.content.series.create_series()

                obj.mods.content.series.title = subseries
                obj.mods.content.series.uri = series_lookup[series]["subseries_info"][subseries]["uri"]
                obj.mods.content.series.base_ark = series_lookup[series]["subseries_info"][subseries]["base_ark"]
                obj.mods.content.series.full_id = series_lookup[series]["subseries_info"][subseries]["id"]
                obj.mods.content.series.short_id = series_lookup[series]["subseries_info"][subseries]["short_id"]

                obj.mods.content.series.series.title = series
                obj.mods.content.series.series.uri = series_lookup[series]["series_info"]["uri"]
                obj.mods.content.series.series.base_ark = series_lookup[series]["series_info"]["base_ark"]
                obj.mods.content.series.series.full_id = series_lookup[series]["series_info"]["id"]
                obj.mods.content.series.series.short_id = series_lookup[series]["series_info"]["short_id"]

            elif series in series_lookup and subseries:
                obj.mods.content.create_series()
                obj.mods.content.series.create_series()

                obj.mods.content.series.title = subseries

                obj.mods.content.series.series.title = series
                obj.mods.content.series.series.uri = series_lookup[series]["series_info"]["uri"]
                obj.mods.content.series.series.base_ark = series_lookup[series]["series_info"]["base_ark"]
                obj.mods.content.series.series.full_id = series_lookup[series]["series_info"]["id"]
                obj.mods.content.series.series.short_id = series_lookup[series]["series_info"]["short_id"]

            elif series in series_lookup:
                obj.mods.content.create_series()

                obj.mods.content.series.title = series
                obj.mods.content.series.uri = series_lookup[series]["series_info"]["uri"]
                obj.mods.content.series.base_ark = series_lookup[series]["series_info"]["base_ark"]
                obj.mods.content.series.full_id = series_lookup[series]["series_info"]["id"]
                obj.mods.content.series.short_id = series_lookup[series]["series_info"]["short_id"]

            else:
                if series and subseries:
                    obj.mods.content.create_series()
                    obj.mods.content.series.create_series()

                    obj.mods.content.series.title = subseries

                    obj.mods.content.series.series.title = series

                elif series:
                    obj.mods.content.create_series()
                    obj.mods.content.series.title = series

            #Remove Datastreams
            if not noact:
                obj.api.purgeDatastream(obj.pid, "MARBL-ANALYSIS")
                if self.verbosity > self.v_normal:
                    self.stdout.write("Removed MARBL-ANALYSIS\n")
            else:
                if self.verbosity > self.v_normal:
                    self.stdout.write("TEST Removed MARBL-ANALYSIS\n")

#        Add  Arrangement Relation
        relation = (obj.uriref, model.hasModel,
            URIRef("info:fedora/emory-control:Arrangement-1.0"))
        obj.rels_ext.content.add(relation)

        #Add  relation to master collection
        relation = (obj.uriref, relsextns.isMemberOf, master.uriref)
        obj.rels_ext.content.add(relation)

        #Add Content Model based on Rights
        allowed = (obj.uriref, model.hasModel,
            URIRef("info:fedora/emory-control:ArrangementAccessAllowed-1.0"))
        restricted = (obj.uriref, model.hasModel,
            URIRef("info:fedora/emory-control:ArrangementAccessRestricted-1.0"))

        if getattr(obj.rights.content.access_status, "code", None) == "2":
            obj.rels_ext.content.add(allowed)
        elif getattr(obj.rights.content.access_status, "code", None):
            obj.rels_ext.content.add(restricted)

        return obj

    def handle(self, *args, **options):
        #setup verbosity
        #0 = none, 1 = normal, 2 = all
        self.v_none = 0
        self.v_normal = 1

        if 'verbosity' in options:
            self.verbosity = int(options['verbosity'])
        else:
            self.verbosity = self.v_normal

        #Create the repo
        repo_args = {}
        if options.get('username') is not None:
            repo_args['username'] = options.get('username')
        if options.get('password') is not None:
            repo_args['password'] = options.get('password')
        self.repo = Repository(**repo_args)

        #Check options

        #if no steps are specified then run all steps
        if not options["simple-collection-step"] and not options["datastreams-step"]:
            options["simple-collection-step"] = True
            options["datastreams-step"] = True

        #This step requires simeple collection Label
        if options["simple-collection-step"]:
            if not options["simple-collection"]:
                raise CommandError("When running SimpleCollection step SimpleCollection Label is required")
            else:
                #lookup Simplecollection
                try:
                    sc_list = list(self.repo.find_objects(label__exact=options["simple-collection"], type=SimpleCollection))
                    if len(sc_list) > 1:  # something is wrong need to investigate
                        raise CommandError("More than one SimpleCollection with Label %s exists" % options["simple-collection"])
                    elif len(sc_list) == 1:  # use this as the simple collection
                        self.simple_collection = sc_list[0]
                    elif len(sc_list) == 0:  # create new simple collection
                        self.simple_collection = self.repo.get_object(type=SimpleCollection)
                        self.simple_collection.label = options["simple-collection"]
                        self.simple_collection.dc.content.title = options["simple-collection"]
                        self.simple_collection.mods.content.create_restrictions_on_access()
                        self.simple_collection.mods.content.restrictions_on_access.text = "Accessioned"
                except Exception as e:
                    if not isinstance(e, CommandError):
                        raise CommandError("Could not obtain requested SimpleCollection %s : %s" % (options["simple-collection"], e))
                    else:
                        raise e

        if options["datastreams-step"]:
            if not options["master-collection-pid"]:
                raise CommandError("When running Datastream step Master collection pid is required")
            else:
                try:
                    self.master_collection = self.repo.get_object(pid=options["master-collection-pid"],
                        type=CollectionObject)
                    if not self.master_collection.exists:
                        raise CommandError("Master Collection %s does not exist" \
                            % options["master-collection-pid"])
                except Exception as e:
                    raise CommandError("Could not obtain requested Master Collection %s : %s" \
                        % (options["master-collection-pid"], e))

        #Create lookup for series
        series_lookup = self._create_series_lookup()

        #All objects to be migrated
        self.all_objs = self._get_unique_objects(args)

        #Process each object
        for obj in self.all_objs:
            if self.verbosity > self.v_none:
                self.stdout.write("Processing %s\n" % (obj.pid))

            if options["simple-collection-step"]:
                self._add_to_simple_collection(obj)

            if options["datastreams-step"]:
                obj = self._convert_ds(obj, self.master_collection, series_lookup, options["no-act"])
                if self.verbosity > self.v_normal:
                    self.stdout.write("===FilemasterTech===\n")
                    self.stdout.write("%s\n" % (obj.filetech.content.serialize()))
                    self.stdout.write("=== RELS-EXT===\n")
                    for entry in obj.rels_ext.content:
                        self.stdout.write("%s\n" % list(entry))
                    self.stdout.write("===Rights===\n")
                    self.stdout.write("%s\n" % (obj.rights.content.serialize()))
                    self.stdout.write("===Mods===\n")
                    self.stdout.write("%s\n" % (obj.mods.content.serialize()))

            #Save object
            obj.owner = "thekeep-project"
            if self.verbosity > self.v_normal:
                self.stdout.write("owner:%s\n" % obj.owner)
            if not options["no-act"]:
                obj.save()
                if self.verbosity > self.v_none:
                    self.stdout.write("Saving %s\n" % obj.pid)
            else:
                if self.verbosity > self.v_none:
                    self.stdout.write("TEST Saving Object\n")

        #Print RELS-EXT forSimple Collection
        if options["simple-collection-step"]:
            if self.verbosity > self.v_normal:
                self.stdout.write("===RELS-EXT===\n")
                for entry in self.simple_collection.rels_ext.content:
                    self.stdout.write("%s\n" % list(entry))
                self.stdout.write("===DC===\n")
                self.stdout.write("%s\n" % self.simple_collection.dc.content.serialize())
                self.stdout.write("===MODS===\n")
                self.stdout.write("%s\n" % self.simple_collection.mods.content.serialize())

            #Save SimpleCollection
            if not options["no-act"]:
                self.simple_collection.save()
                if self.verbosity > self.v_none:
                    self.stdout.write("Saved %s(%s)\n" % \
                        (self.simple_collection.label, self.simple_collection.pid))
            else:
                if self.verbosity > self.v_none:
                    self.stdout.write("Test saving %s(%s)\n" %
                        (self.simple_collection.label, self.simple_collection.pid))