Пример #1
0
    def execute(self):
        """
            Catalog reduced data files
        """
        config = ConfigParser.RawConfigParser()
        config.read('/etc/autoreduce/icat4.cfg')

        directory = "/" + self._facilityName + "/" + self._instrumentName + "/" +  self._investigationName + "/shared/autoreduce"
        logging.info("reduction output directory: %s" % directory)

        #set dataset name
        dataset = self._factory.create("dataset")

        dsType = self._factory.create("datasetType")
        dsType.id = config.get('DatasetType', 'reduced')
        dataset.type = dsType
        dataset.name = self._runNumber
        dataset.location = directory
        datafiles = []

        pattern =  '*' + self._runNumber + '*'
        for dirpath, dirnames, filenames in os.walk(directory):
            listing = glob.glob(os.path.join(dirpath, pattern))
            for filepath in listing:
                filename =os.path.basename(filepath)
                logging.info("Filename: %s" % filename)
                datafile = self._factory.create("datafile")
                datafile.location = filepath
                datafile.name = filename
                extension = os.path.splitext(filename)[1][1:]
                dfFormat = self._factory.create("datafileFormat")
                dfFormat.id = config.get('DatafileFormat', extension)
                datafile.datafileFormat = dfFormat
                modTime = os.path.getmtime(filepath)
                datafile.datafileCreateTime = epochToISO8601(modTime)
                datafile.fileSize = os.path.getsize(filepath)

                datafiles.append(datafile)

        dataset.datafiles = datafiles
        dataset.type = dsType

        dbDatasets = self._service.search(self._sessionId, "Dataset INCLUDE Datafile [name = '" + str(dataset.name) + "'] <-> Investigation <-> Instrument [name = '" + str(self._instrumentName) + "'] <-> DatasetType [name = 'reduced']")

        if len(dbDatasets) == 0:

            dbInvestigations = self._service.search(self._sessionId, "Investigation INCLUDE Sample [name = '" + str(self._investigationName) + "'] <-> Instrument [name = '" + self._instrumentName + "'] <-> Dataset [name = '" + str(dataset.name) + "']")

            if len(dbInvestigations) == 0:
                logging.error("No investigation entry found: try cataloging the raw data first.")
                return
            else:
                investigation = dbInvestigations[0]
                if len(dbInvestigations)>1:
                    logging.error("Multiple investigation entries found: using the first.")

            logging.debug("Creating dataset: %s" % datetime.now())
            dataset.investigation = investigation
            dataset.sample = investigation.samples[0]
            self._service.create(self._sessionId, dataset)

        elif len(dbDatasets) == 1:
            logging.debug("reduced dataset %s is already cataloged, updating reduced dataset... " % (dataset.name))

            dbDataset = dbDatasets[0]
            # update "one to many" relationships
            if hasattr(dbDataset, "datafiles"):
                dfs = getattr(dbDataset, "datafiles")
                self._service.deleteMany(self._sessionId, dfs)

            for df in datafiles:
                df.dataset = dbDataset
            self._service.createMany(self._sessionId, datafiles)

        else:
            logging.error("ERROR, there should be only one dataset per run number per type reduced")
    def execute(self):
        #find facility, investigation_type
        config = ConfigParser.RawConfigParser()
        config.read('/etc/autoreduce/icat4.cfg')

        investigation = self._factory.create("investigation")

        #find facility, investigation_type
        facility = self._factory.create("facility")
        facility.id = config.get('Facility', 'sns')
        investigation.facility = facility

        invType = self._factory.create("investigationType")
        invType.id = config.get('InvestigationType', 'experiment')
        investigation.type = invType

        totalCounts=0
        protonCharge=0
        duration=0
        entry_count=0

        sample = self._factory.create("sample")
        sample.name = 'NONE'

        #open nexus file
        file = nxs.open(self._infilename, 'r')
        for name, nxclass in file.entries():
            if nxclass == "NXentry" and name != "entry-VETO":
                listing = file.getentries()

                #investigation name
                if listing.has_key('experiment_identifier'):
                    file.opendata('experiment_identifier')
                    investigation.name = file.getdata()
                    file.closedata()
                else:
                    investigation.name = "IPTS-0000"

                #investigation title
                if listing.has_key('title'):
                    file.opendata('title')
                    investigation_title = unicode(file.getdata(), errors='replace')
                    investigation.title = investigation_title.encode('ascii', 'replace')
                    file.closedata()
                else:
                    investigation.title = "NONE"

                #create dataset
                dataset = self._factory.create("dataset")

                #investigation run number
                if listing.has_key('collection_identifier'):
                    file.opendata('collection_identifier')
                    investigation.visitId = str(file.getdata())
                    file.closedata()
                else:
                    investigation.visitId = "0"

                #dataset run number
                file.opendata('run_number')
                dataset.name = file.getdata()
                file.closedata()

                #dataset title
                if listing.has_key('title'):
                    file.opendata('title')
                    dataset_description = unicode(file.getdata(), errors='replace')
                    dataset.description = dataset_description.encode('ascii', 'replace')
                    file.closedata()

                dsType = self._factory.create("datasetType")
                dsType.id = config.get('DatasetType', 'experiment_raw')
                dataset.type = dsType

                #set dataset start time
                if listing.has_key('start_time'):
                    file.opendata('start_time')
                    if entry_count == 0:
                        dataset.startDate = file.getdata()
                    elif dataset.startDate > file.getdata():
                        dataset.startDate = file.getdata()
                    file.closedata()

                #set dataset end time
                if listing.has_key('end_time'):
                    file.opendata('end_time')
                    if entry_count == 0:
                        dataset.endDate = file.getdata()
                    elif dataset.endDate < file.getdata():
                        dataset.endDate = file.getdata()
                    file.closedata()

                #dataset proton_charge
                file.opendata('proton_charge')
                protonCharge = protonCharge + file.getdata()
                file.closedata()

                #dataset total_counts
                file.opendata('total_counts')
                totalCounts = totalCounts + file.getdata()
                file.closedata()

                #dataset duration
                file.opendata('duration')
                duration = duration + file.getdata()
                file.closedata()

                #investigation instrument
                file.opengroup('instrument')
                file.opendata('name')
                for attr,value in file.attrs():
                    if attr == 'short_name':
                        instrument = self._factory.create("instrument")
                        instrument.name = value
                        instrument.id = config.get('Instrument', value.lower())
                        investigation.instrument = instrument
                file.closedata()
                file.closegroup()

                if listing.has_key('sample'):
                    file.opengroup('sample')
                    listSample = file.getentries()
                    if listSample.has_key('name'):
                        file.opendata('name')
                        # Text stored in the Nexus file is XML escaped
                        # ICAT unescapes it automatically, so we need to
                        # do it here if we want to determine whether
                        # the sample is already in the DB.
                        sample_name = unicode(file.getdata(), errors='replace')
                        sample.name = saxutils.unescape(sample_name.encode('ascii', 'replace'))
                        file.closedata()
                    else:
                        sample.name = "NONE"
                    sampleParameters = []

                    #set sample nature
                    if listSample.has_key('nature'):
                        file.opendata('nature')
                        nature = file.getdata()
                        file.closedata()
                        if nature:
                            parameterType = self._factory.create("parameterType")
                            parameterType.id = config.get('ParameterType', 'nature')
                            parameterType.applicableToSample = config.getboolean('ParameterType', 'nature_applicable_to_sample')
                            sampleParameter = self._factory.create("sampleParameter")
                            sampleParameter.type = parameterType
                            sampleParameter.stringValue = nature
                            sampleParameters.append(sampleParameter)

                    if listSample.has_key('identifier'):
                        file.opendata('identifier')
                        identifier = file.getdata()
                        file.closedata()

                        if identifier:
                            parameterType = self._factory.create("parameterType")
                            parameterType.id = config.get('ParameterType', 'identifier')
                            parameterType.applicableToSample = config.getboolean('ParameterType', 'identifier_applicable_to_sample')
                            sampleParameter = self._factory.create("sampleParameter")
                            sampleParameter.type = parameterType
                            sampleParameter.stringValue = identifier
                            sampleParameters.append(sampleParameter)

                    if len(sampleParameters):
                        sample.parameters = sampleParameters

                    file.closegroup()

        file.close()

        #set dataset parameters
        parameters = []

        #1) parameter proton_charge
        if protonCharge:
            parameterType = self._factory.create("parameterType")
            parameterType.id = config.get('ParameterType', 'proton_charge')
            parameterType.applicableToDataset = config.getboolean('ParameterType', 'proton_charge_applicable_to_dataset')
            datasetParameter = self._factory.create("datasetParameter")
            datasetParameter.type = parameterType
            datasetParameter.stringValue = protonCharge
            parameters.append(datasetParameter)

        #2) parameter total_counts
        if totalCounts:
            parameterType = self._factory.create("parameterType")
            parameterType.id = config.get('ParameterType', 'total_counts')
            parameterType.applicableToDataset = config.getboolean('ParameterType', 'total_counts_applicable_to_dataset')
            datasetParameter = self._factory.create("datasetParameter")
            datasetParameter.type = parameterType
            datasetParameter.numericValue = totalCounts
            parameters.append(datasetParameter)

        #3) parameter duration
        if duration:
            parameterType = self._factory.create("parameterType")
            parameterType.id = config.get('ParameterType', 'duration')
            parameterType.applicableToDataset = config.getboolean('ParameterType', 'duration_applicable_to_dataset')
            datasetParameter = self._factory.create("datasetParameter")
            datasetParameter.type = parameterType
            datasetParameter.numericValue = duration
            parameters.append(datasetParameter)

        dataset.parameters = parameters
        dataset.location = self._infilename

        datafiles = []

        token=self._infilename.split("/")
        proposalDir = "/" + token[1] + "/" + token[2] + "/" + token[3]
        for dirpath, dirnames, filenames in os.walk(proposalDir):
            if dirpath.find("shared") == -1 and dirpath.find("data") == -1:
                for filename in [f for f in filenames]:
                    #if dataset.name in filename and os.path.islink(filename) != False:
                    if dataset.name in filename:
                        logging.info("Filename: %s" % filename)
                        datafile = self._factory.create("datafile")
                        filepath = os.path.join(dirpath,filename)
                        extension = os.path.splitext(filename)[1][1:]
                        datafile.name = filename
                        datafile.location = filepath
                        dfFormat = self._factory.create("datafileFormat")
                        dfFormat.id = config.get('DatafileFormat', extension)
                        datafile.datafileFormat = dfFormat
                        modTime = os.path.getmtime(filepath)
                        datafile.datafileCreateTime = epochToISO8601(modTime)
                        datafile.fileSize = os.path.getsize(filepath)
                        datafiles.append(datafile)

        dataset.datafiles = datafiles

        dbDatasets = self._service.search(self._sessionId, "Dataset INCLUDE Datafile [name = '" + str(dataset.name) + "'] <-> Investigation <-> Instrument [name = '" + str(instrument.name) + "'] <-> DatasetType [name = 'experiment_raw']")

        if len(dbDatasets) == 0:
            dbInvestigations = self._service.search(self._sessionId, "Investigation INCLUDE Sample [name = '" + investigation.name + "' AND visitId = '" + investigation.visitId + "'] <-> Instrument [name = '" + instrument.name + "']")

            if len(dbInvestigations) == 0:
                logging.debug("New IPTS: creating investigation, sample, run...")
                # create new investigation
                invId = self._service.create(self._sessionId, investigation)
                investigation.id = invId

                # create new sample
                sample.investigation = investigation
                sampleId = self._service.create(self._sessionId, sample)
                sample.id = sampleId
                logging.debug("  invId: %s  sampleId: %s" % (str(invId), str(sampleId)))

            elif len(dbInvestigations) == 1:
                investigation = dbInvestigations[0]
                dbSamples = investigation.samples

                newSample = True
                for dbSample in dbSamples:
                    if dbSample.name == sample.name:
                        sample.id = dbSample.id
                        newSample = False

                if newSample == True:
                    logging.debug("New run: existing investigation, creating sample and run...")
                    sample.investigation = investigation
                    sampleId = self._service.create(self._sessionId, sample)
                    sample.id = sampleId
                else:
                    logging.debug("New run: existing investigation and sample, creating run...")

            else:
                logging.error("ERROR, there should be only one investigation per instrument per investigation name")

            # create new dataset
            dataset.sample = sample
            dataset.investigation = investigation
            datasetId = self._service.create(self._sessionId, dataset)
            logging.debug("  datasetId: %s" % str(datasetId))

        elif len(dbDatasets) == 1:
            logging.debug("Run %s is already cataloged, updating catalog..." % dataset.name)

            dbDataset = dbDatasets[0]
            logging.debug("  datasetId: %s" % str(dbDataset.id))

            # update "one to many" relationships

            if hasattr(dbDataset, "datafiles"):
                dfs = getattr(dbDataset, "datafiles")
                self._service.deleteMany(self._sessionId, dfs)

            for df in datafiles:
                df.dataset = dbDataset
            self._service.createMany(self._sessionId, datafiles)

            # update "many to one" relationships

            ds = self._service.get(self._sessionId, "Dataset INCLUDE 1", dbDataset.id)
            investigation.id = ds.investigation.id

            dbSamples = self._service.search(self._sessionId, "Sample <-> Investigation [id = '" + str(ds.investigation.id) + "']")
            updateSample = True
            for sa in dbSamples:
                if sa.name == sample.name:
                    sample = sa
                    updateSample = False

            if updateSample == True:
                sample.id = ds.sample.id
                sample.investigation = investigation
                self._service.update(self._sessionId, sample)

            dataset.id = ds.id
            dataset.sample = sample
            dataset.investigation = investigation

            self._service.update(self._sessionId, dataset)
            self._service.update(self._sessionId, investigation)

        else:
            logging.error("ERROR, there should be only one dataset per run number per type experiment_raw")
Пример #3
0
    def execute(self):
        """
            Catalog reduced data files
        """
        config = ConfigParser.RawConfigParser()
        config.read('/etc/autoreduce/icat4.cfg')

        directory = "/" + self._facilityName + "/" + self._instrumentName + "/" + self._investigationName + "/shared/autoreduce"
        logging.info("reduction output directory: %s" % directory)

        #set dataset name
        dataset = self._factory.create("dataset")

        dsType = self._factory.create("datasetType")
        dsType.id = config.get('DatasetType', 'reduced')
        dataset.type = dsType
        dataset.name = self._runNumber
        dataset.location = directory
        datafiles = []

        pattern = '*' + self._runNumber + '*'
        for dirpath, dirnames, filenames in os.walk(directory):
            listing = glob.glob(os.path.join(dirpath, pattern))
            for filepath in listing:
                filename = os.path.basename(filepath)
                logging.info("Filename: %s" % filename)
                datafile = self._factory.create("datafile")
                datafile.location = filepath
                datafile.name = filename
                extension = os.path.splitext(filename)[1][1:]
                dfFormat = self._factory.create("datafileFormat")
                dfFormat.id = config.get('DatafileFormat', extension)
                datafile.datafileFormat = dfFormat
                modTime = os.path.getmtime(filepath)
                datafile.datafileCreateTime = epochToISO8601(modTime)
                datafile.fileSize = os.path.getsize(filepath)

                datafiles.append(datafile)

        dataset.datafiles = datafiles
        dataset.type = dsType

        dbDatasets = self._service.search(
            self._sessionId,
            "Dataset INCLUDE Datafile [name = '" + str(dataset.name) +
            "'] <-> Investigation <-> Instrument [name = '" +
            str(self._instrumentName) +
            "'] <-> DatasetType [name = 'reduced']")

        if len(dbDatasets) == 0:

            dbInvestigations = self._service.search(
                self._sessionId, "Investigation INCLUDE Sample [name = '" +
                str(self._investigationName) + "'] <-> Instrument [name = '" +
                self._instrumentName + "'] <-> Dataset [name = '" +
                str(dataset.name) + "']")

            if len(dbInvestigations) == 0:
                logging.error(
                    "No investigation entry found: try cataloging the raw data first."
                )
                return
            else:
                investigation = dbInvestigations[0]
                if len(dbInvestigations) > 1:
                    logging.error(
                        "Multiple investigation entries found: using the first."
                    )

            logging.debug("Creating dataset: %s" % datetime.now())
            dataset.investigation = investigation
            dataset.sample = investigation.samples[0]
            self._service.create(self._sessionId, dataset)

        elif len(dbDatasets) == 1:
            logging.debug(
                "reduced dataset %s is already cataloged, updating reduced dataset... "
                % (dataset.name))

            dbDataset = dbDatasets[0]
            # update "one to many" relationships
            if hasattr(dbDataset, "datafiles"):
                dfs = getattr(dbDataset, "datafiles")
                self._service.deleteMany(self._sessionId, dfs)

            for df in datafiles:
                df.dataset = dbDataset
            self._service.createMany(self._sessionId, datafiles)

        else:
            logging.error(
                "ERROR, there should be only one dataset per run number per type reduced"
            )
    def execute(self):
        #find facility, investigation_type
        config = ConfigParser.RawConfigParser()
        config.read('/etc/autoreduce/icat4.cfg')

        investigation = self._factory.create("investigation")

        #find facility, investigation_type
        facility = self._factory.create("facility")
        facility.id = config.get('Facility', 'sns')
        investigation.facility = facility

        invType = self._factory.create("investigationType")
        invType.id = config.get('InvestigationType', 'experiment')
        investigation.type = invType

        totalCounts = 0
        protonCharge = 0
        duration = 0
        entry_count = 0

        sample = self._factory.create("sample")
        sample.name = 'NONE'

        #open nexus file
        file = nxs.open(self._infilename, 'r')
        for name, nxclass in file.entries():
            if nxclass == "NXentry" and name != "entry-VETO":
                listing = file.getentries()

                #investigation name
                if listing.has_key('experiment_identifier'):
                    file.opendata('experiment_identifier')
                    investigation.name = file.getdata()
                    file.closedata()
                else:
                    investigation.name = "IPTS-0000"

                #investigation title
                if listing.has_key('title'):
                    file.opendata('title')
                    investigation_title = unicode(file.getdata(),
                                                  errors='replace')
                    investigation.title = investigation_title.encode(
                        'ascii', 'replace')
                    file.closedata()
                else:
                    investigation.title = "NONE"

                #create dataset
                dataset = self._factory.create("dataset")

                #investigation run number
                if listing.has_key('collection_identifier'):
                    file.opendata('collection_identifier')
                    investigation.visitId = str(file.getdata())
                    file.closedata()
                else:
                    investigation.visitId = "0"

                #dataset run number
                file.opendata('run_number')
                dataset.name = file.getdata()
                file.closedata()

                #dataset title
                if listing.has_key('title'):
                    file.opendata('title')
                    dataset_description = unicode(file.getdata(),
                                                  errors='replace')
                    dataset.description = dataset_description.encode(
                        'ascii', 'replace')
                    file.closedata()

                dsType = self._factory.create("datasetType")
                dsType.id = config.get('DatasetType', 'experiment_raw')
                dataset.type = dsType

                #set dataset start time
                if listing.has_key('start_time'):
                    file.opendata('start_time')
                    if entry_count == 0:
                        dataset.startDate = file.getdata()
                    elif dataset.startDate > file.getdata():
                        dataset.startDate = file.getdata()
                    file.closedata()

                #set dataset end time
                if listing.has_key('end_time'):
                    file.opendata('end_time')
                    if entry_count == 0:
                        dataset.endDate = file.getdata()
                    elif dataset.endDate < file.getdata():
                        dataset.endDate = file.getdata()
                    file.closedata()

                #dataset proton_charge
                file.opendata('proton_charge')
                protonCharge = protonCharge + file.getdata()
                file.closedata()

                #dataset total_counts
                file.opendata('total_counts')
                totalCounts = totalCounts + file.getdata()
                file.closedata()

                #dataset duration
                file.opendata('duration')
                duration = duration + file.getdata()
                file.closedata()

                #investigation instrument
                file.opengroup('instrument')
                file.opendata('name')
                for attr, value in file.attrs():
                    if attr == 'short_name':
                        instrument = self._factory.create("instrument")
                        instrument.name = value
                        instrument.id = config.get('Instrument', value.lower())
                        investigation.instrument = instrument
                file.closedata()
                file.closegroup()

                if listing.has_key('sample'):
                    file.opengroup('sample')
                    listSample = file.getentries()
                    if listSample.has_key('name'):
                        file.opendata('name')
                        # Text stored in the Nexus file is XML escaped
                        # ICAT unescapes it automatically, so we need to
                        # do it here if we want to determine whether
                        # the sample is already in the DB.
                        sample_name = unicode(file.getdata(), errors='replace')
                        sample.name = saxutils.unescape(
                            sample_name.encode('ascii', 'replace'))
                        file.closedata()
                    else:
                        sample.name = "NONE"
                    sampleParameters = []

                    #set sample nature
                    if listSample.has_key('nature'):
                        file.opendata('nature')
                        nature = file.getdata()
                        file.closedata()
                        if nature:
                            parameterType = self._factory.create(
                                "parameterType")
                            parameterType.id = config.get(
                                'ParameterType', 'nature')
                            parameterType.applicableToSample = config.getboolean(
                                'ParameterType', 'nature_applicable_to_sample')
                            sampleParameter = self._factory.create(
                                "sampleParameter")
                            sampleParameter.type = parameterType
                            sampleParameter.stringValue = nature
                            sampleParameters.append(sampleParameter)

                    if listSample.has_key('identifier'):
                        file.opendata('identifier')
                        identifier = file.getdata()
                        file.closedata()

                        if identifier:
                            parameterType = self._factory.create(
                                "parameterType")
                            parameterType.id = config.get(
                                'ParameterType', 'identifier')
                            parameterType.applicableToSample = config.getboolean(
                                'ParameterType',
                                'identifier_applicable_to_sample')
                            sampleParameter = self._factory.create(
                                "sampleParameter")
                            sampleParameter.type = parameterType
                            sampleParameter.stringValue = identifier
                            sampleParameters.append(sampleParameter)

                    if len(sampleParameters):
                        sample.parameters = sampleParameters

                    file.closegroup()

        file.close()

        #set dataset parameters
        parameters = []

        #1) parameter proton_charge
        if protonCharge:
            parameterType = self._factory.create("parameterType")
            parameterType.id = config.get('ParameterType', 'proton_charge')
            parameterType.applicableToDataset = config.getboolean(
                'ParameterType', 'proton_charge_applicable_to_dataset')
            datasetParameter = self._factory.create("datasetParameter")
            datasetParameter.type = parameterType
            datasetParameter.stringValue = protonCharge
            parameters.append(datasetParameter)

        #2) parameter total_counts
        if totalCounts:
            parameterType = self._factory.create("parameterType")
            parameterType.id = config.get('ParameterType', 'total_counts')
            parameterType.applicableToDataset = config.getboolean(
                'ParameterType', 'total_counts_applicable_to_dataset')
            datasetParameter = self._factory.create("datasetParameter")
            datasetParameter.type = parameterType
            datasetParameter.numericValue = totalCounts
            parameters.append(datasetParameter)

        #3) parameter duration
        if duration:
            parameterType = self._factory.create("parameterType")
            parameterType.id = config.get('ParameterType', 'duration')
            parameterType.applicableToDataset = config.getboolean(
                'ParameterType', 'duration_applicable_to_dataset')
            datasetParameter = self._factory.create("datasetParameter")
            datasetParameter.type = parameterType
            datasetParameter.numericValue = duration
            parameters.append(datasetParameter)

        dataset.parameters = parameters
        dataset.location = self._infilename

        datafiles = []

        token = self._infilename.split("/")
        proposalDir = "/" + token[1] + "/" + token[2] + "/" + token[3]
        for dirpath, dirnames, filenames in os.walk(proposalDir):
            if dirpath.find("shared") == -1 and dirpath.find("data") == -1:
                for filename in [f for f in filenames]:
                    #if dataset.name in filename and os.path.islink(filename) != False:
                    if dataset.name in filename:
                        logging.info("Filename: %s" % filename)
                        datafile = self._factory.create("datafile")
                        filepath = os.path.join(dirpath, filename)
                        extension = os.path.splitext(filename)[1][1:]
                        datafile.name = filename
                        datafile.location = filepath
                        dfFormat = self._factory.create("datafileFormat")
                        dfFormat.id = config.get('DatafileFormat', extension)
                        datafile.datafileFormat = dfFormat
                        modTime = os.path.getmtime(filepath)
                        datafile.datafileCreateTime = epochToISO8601(modTime)
                        datafile.fileSize = os.path.getsize(filepath)
                        datafiles.append(datafile)

        dataset.datafiles = datafiles

        dbDatasets = self._service.search(
            self._sessionId,
            "Dataset INCLUDE Datafile [name = '" + str(dataset.name) +
            "'] <-> Investigation <-> Instrument [name = '" +
            str(instrument.name) +
            "'] <-> DatasetType [name = 'experiment_raw']")

        if len(dbDatasets) == 0:
            dbInvestigations = self._service.search(
                self._sessionId,
                "Investigation INCLUDE Sample [name = '" + investigation.name +
                "' AND visitId = '" + investigation.visitId +
                "'] <-> Instrument [name = '" + instrument.name + "']")

            if len(dbInvestigations) == 0:
                logging.debug(
                    "New IPTS: creating investigation, sample, run...")
                # create new investigation
                invId = self._service.create(self._sessionId, investigation)
                investigation.id = invId

                # create new sample
                sample.investigation = investigation
                sampleId = self._service.create(self._sessionId, sample)
                sample.id = sampleId
                logging.debug("  invId: %s  sampleId: %s" %
                              (str(invId), str(sampleId)))

            elif len(dbInvestigations) == 1:
                investigation = dbInvestigations[0]
                dbSamples = investigation.samples

                newSample = True
                for dbSample in dbSamples:
                    if dbSample.name == sample.name:
                        sample.id = dbSample.id
                        newSample = False

                if newSample == True:
                    logging.debug(
                        "New run: existing investigation, creating sample and run..."
                    )
                    sample.investigation = investigation
                    sampleId = self._service.create(self._sessionId, sample)
                    sample.id = sampleId
                else:
                    logging.debug(
                        "New run: existing investigation and sample, creating run..."
                    )

            else:
                logging.error(
                    "ERROR, there should be only one investigation per instrument per investigation name"
                )

            # create new dataset
            dataset.sample = sample
            dataset.investigation = investigation
            datasetId = self._service.create(self._sessionId, dataset)
            logging.debug("  datasetId: %s" % str(datasetId))

        elif len(dbDatasets) == 1:
            logging.debug("Run %s is already cataloged, updating catalog..." %
                          dataset.name)

            dbDataset = dbDatasets[0]
            logging.debug("  datasetId: %s" % str(dbDataset.id))

            # update "one to many" relationships

            if hasattr(dbDataset, "datafiles"):
                dfs = getattr(dbDataset, "datafiles")
                self._service.deleteMany(self._sessionId, dfs)

            for df in datafiles:
                df.dataset = dbDataset
            self._service.createMany(self._sessionId, datafiles)

            # update "many to one" relationships

            ds = self._service.get(self._sessionId, "Dataset INCLUDE 1",
                                   dbDataset.id)
            investigation.id = ds.investigation.id

            dbSamples = self._service.search(
                self._sessionId, "Sample <-> Investigation [id = '" +
                str(ds.investigation.id) + "']")
            updateSample = True
            for sa in dbSamples:
                if sa.name == sample.name:
                    sample = sa
                    updateSample = False

            if updateSample == True:
                sample.id = ds.sample.id
                sample.investigation = investigation
                self._service.update(self._sessionId, sample)

            dataset.id = ds.id
            dataset.sample = sample
            dataset.investigation = investigation

            self._service.update(self._sessionId, dataset)
            self._service.update(self._sessionId, investigation)

        else:
            logging.error(
                "ERROR, there should be only one dataset per run number per type experiment_raw"
            )