def _getAllTubes(self):
        """
        Get all tubes in the experiment. If the specimen is set (self._specimen),
        then return only those tubes that belong to it.
        Returns [] if none are found.
        """

        # Set search criteria to retrieve all tubes in the experiment
        # All tubes belong to a virtual tubeset - so the set of tubes in the
        # experiment is exactly the same as the set of tubes in the virtual
        # tubeset
        searchCriteria = SearchCriteria()
        searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, self._experimentPrefix + "_TUBE"))
        expCriteria = SearchCriteria()
        expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId))
        searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria))
        tubes = searchService.searchForSamples(searchCriteria)

        if len(tubes) == 0:
            self._message = "The experiment with code " + \
                            self._experimentCode + "does not contain tubes."
            self._logger.error(self._message)
            return tubes

        # Check that the specimen matches (if needed)
        if self._specimen != "":
            tubes = [tube for tube in tubes if \
                     tube.getPropertyValue(self._experimentPrefix + "_SPECIMEN") == self._specimen]

        # Return the (filtered) tubes
        return tubes
    def _getDataSetsForExperiment(self):
        """
        Return a list of datasets belonging to the experiment and optionally
        to the sample. If the sample ID is empty, only the experiment is used
        in the search criteria.
        If none are found, return [].

        """

        # Set search criteria to retrieve all datasets for the experiment.
        # If the sample code is set, we also filter by it.
        searchCriteria = SearchCriteria()
        searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, "MICROSCOPY_IMG_CONTAINER"))
        expCriteria = SearchCriteria()
        expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId))
        searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria))
        if self._sample is not None:
            self._logger.info("Filter by sample " + self._sampleId)
            sampleCriteria = SearchCriteria()
            sampleCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._sample.permId))
            searchCriteria.addSubCriteria(SearchSubCriteria.createSampleCriteria(sampleCriteria))

        dataSets = searchService.searchForDataSets(searchCriteria)

        if len(dataSets) == 0:
            dataSets = []
            self._message = "Could not retrieve datasets for experiment " \
            "with id " + self._experimentId
            if self._sampleId != "":
                self._message = self._message + " and sample with id " + \
                self._sampleId
            self._logger.error(self._message)

        # Return
        return dataSets
Пример #3
0
def createNewBarcode(project, tr):
    search_service = tr.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project));
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
    foundSamples = search_service.searchForSamples(sc)

    foundSamplesFilter = [s for s in foundSamples if 'ENTITY' not in s.getCode()]

    offset = 0
    exists = True
    while exists:
        # create new barcode
        newBarcode = getNextFreeBarcode(project, len(foundSamplesFilter) + len(newTestSamples) + offset)

        # check if barcode already exists in database
        pc = SearchCriteria()
        pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, newBarcode))
        found = search_service.searchForSamples(pc)
        if len(found) == 0:
            exists = False
        else:
            offset += 1

    return newBarcode
def _getExperimentSample(collectionPermId, expSamplePermId):
    """Retrieve the experiment sample."""

    # Get the experiment sample
    sampleCriteria = SearchCriteria()
    sampleCriteria.addMatchClause(
        MatchClause.createAttributeMatch(
            MatchClauseAttribute.PERM_ID,
            expSamplePermId)
        )
    expCriteria = SearchCriteria()
    expCriteria.addMatchClause(
        MatchClause.createAttributeMatch(
            MatchClauseAttribute.PERM_ID,
            collectionPermId)
        )
    # Add the experiment subcriteria
    sampleCriteria.addSubCriteria(
        SearchSubCriteria.createExperimentCriteria(
            expCriteria)
        )

    # Search
    expSampleList = searchService.searchForSamples(sampleCriteria)

    if len(expSampleList) != 1:
        return None

    # Return the experiment sample
    return expSampleList[0]
    def _getAllPlates(self):
        """
        Get all plates in the experiment. Returns [] if none are found.
        """

        # Set search criteria to retrieve all plates in the experiment
        searchCriteria = SearchCriteria()
        searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, self._experimentPrefix + "_PLATE"))
        expCriteria = SearchCriteria()
        expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId))
        searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria))
        plates = searchService.searchForSamples(searchCriteria)

        if len(plates) == 0:
            self._message = "Could not retrieve plates for experiment with code " + self._experimentCode + "."
            return plates

        # Return the plates
        return plates
    def _getAllPlates(self):
        """
        Get all plates in the experiment. Returns [] if none are found.
        """

        # Set search criteria to retrieve all plates in the experiment
        searchCriteria = SearchCriteria()
        searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, self._experimentPrefix + "_PLATE"))
        expCriteria = SearchCriteria()
        expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId))
        searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria))
        plates = searchService.searchForSamples(searchCriteria)

        if len(plates) == 0:
            self._message = "The experiment with code " + \
                            self._experimentCode + " does not contain plates."
            self._logger.info(self._message)
            return plates

        # Return the plates
        return plates
Пример #7
0
def _getExperimentSample(collectionPermId, expSamplePermId):
    """Retrieve the experiment sample."""

    # Get the experiment sample
    sampleCriteria = SearchCriteria()
    sampleCriteria.addMatchClause(
        MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID,
                                         expSamplePermId))
    expCriteria = SearchCriteria()
    expCriteria.addMatchClause(
        MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID,
                                         collectionPermId))
    # Add the experiment subcriteria
    sampleCriteria.addSubCriteria(
        SearchSubCriteria.createExperimentCriteria(expCriteria))

    # Search
    expSampleList = searchService.searchForSamples(sampleCriteria)

    if len(expSampleList) != 1:
        return None

    # Return the experiment sample
    return expSampleList[0]
    def _getAccessoryDataSetsForExperiment(self):
        """
        Return a list of datasets belonging to the experiment and optionally
        to the sample. If the sample ID is empty, only the experiment is used
        in the search criteria.
        If none are found, return [].

        """

        # Set search criteria to retrieve all datasets of type for the experiment.
        # If the sample code is set, we also filter by it.
        searchCriteria = SearchCriteria()
        searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, "MICROSCOPY_ACCESSORY_FILE"))
        expCriteria = SearchCriteria()
        expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._experiment.permId))
        searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria))
        if self._sample is not None:
            self._logger.info("Filter by sample " + self._sampleId)
            sampleCriteria = SearchCriteria()
            sampleCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, self._sample.permId))
            searchCriteria.addSubCriteria(SearchSubCriteria.createSampleCriteria(sampleCriteria))

        accessoryDataSets = searchService.searchForDataSets(searchCriteria)

        # Append the accessory datasets
        if len(accessoryDataSets) != 0:
            self._message = "Found " + str(len(accessoryDataSets)) + \
                            " accessory datasets for experiment " \
                            "with id " + self._experimentId
            if self._sampleId != "":
                self._message = self._message + " and sample with id " + \
                self._sampleId
            self._logger.info(self._message)

        # Return
        return accessoryDataSets
Пример #9
0
def aggregate(parameters, tableBuilder):
	codes = parameters.get("codes")

	tableBuilder.addHeader(PROJECT)
	tableBuilder.addHeader(DATASETS)

	allCodes = ""
	for code in codes:
		allCodes += code+" "
	sc = SearchCriteria()
	pc = SearchCriteria()
	pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, allCodes))
	sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
	found = searchService.searchForDataSets(sc)
	for ds in found:
		project = ds.getExperiment().getExperimentIdentifier().split("/")[2]
		try:
			projectMap[project] = projectMap[project]+1
		except:
			projectMap[project] = 1
	for key in projectMap:
		row = tableBuilder.addRow()
		row.setCell(PROJECT, key)
		row.setCell(DATASETS, projectMap[key])
Пример #10
0
def createNewBarcode(project, tr):
    search_service = tr.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, project))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
    foundSamples = search_service.searchForSamples(sc)
    space = foundSamples[0].getSpace()

    foundSamplesFilter = [
        s for s in foundSamples if 'ENTITY' not in s.getCode()
    ]

    offset = 0
    exists = True
    while exists:
        # create new barcode
        newBarcode = getNextFreeBarcode(
            project,
            len(foundSamplesFilter) + len(newTestSamples) + offset)

        # check if barcode already exists in database
        #pc = SearchCriteria()
        #pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, newBarcode))
        #found = search_service.searchForSamples(pc)

        # try to fetch the sample, safer if it's new and not indexed yet
        sampleIdentifier = "/" + space + "/" + newBarcode
        if not tr.getSampleForUpdate(sampleIdentifier):
            exists = False
        else:
            offset += 1

    return newBarcode
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1

    # Get the name of the incoming file
    name = transaction.getIncoming().getName()
    foundBarcode = barcode.findall(name)[0]
    wfSample = sPattern.findall(name)[0]

    project = foundBarcode[:5]
    parentCode = foundBarcode[:10]

    ss = transaction.getSearchService()
    sc = SearchCriteria()
    sc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.CODE, wfSample))
    foundSamples = ss.searchForSamples(sc)
    samplehit = foundSamples[0]
    space = foundSamples[0].getSpace()
    sample = transaction.getSampleForUpdate(samplehit.getSampleIdentifier())

    newNumber = 1
    newSampleID = '/' + space + '/' + 'VAC' + str(newNumber) + wfSample
    existingSampleIDs = []

    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, project))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
    foundSamples2 = ss.searchForSamples(sc)

    for samp in foundSamples2:
        existingSampleIDs.append(samp.getSampleIdentifier())

    # search in known ids, but also try to fetch the sample in case it wasn't indexed yet
    while newSampleID in existingSampleIDs or transaction.getSampleForUpdate(
            newSampleID):
        newNumber += 1
        newSampleID = '/' + space + '/' + 'VAC' + str(newNumber) + wfSample

    newSample = transaction.createNewSample(newSampleID, "Q_VACCINE_CONSTRUCT")
    newSample.setParentSampleIdentifiers([samplehit.getSampleIdentifier()])

    existingExperimentIDs = []
    existingExperiments = ss.listExperiments("/" + space + "/" + project)
    numberOfExperiments = len(existingExperiments) + 1

    for eexp in existingExperiments:
        existingExperimentIDs.append(eexp.getExperimentIdentifier())

    newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
        numberOfExperiments)

    while newExpID in existingExperimentIDs:
        numberOfExperiments += 1
        newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
            numberOfExperiments)

    experiment = transaction.createNewExperiment(newExpID,
                                                 "Q_NGS_EPITOPE_SELECTION")
    experiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')
    newSample.setExperiment(experiment)

    #Register files
    dataSetRes = transaction.createNewDataSet('Q_VACCINE_CONSTRUCT_DATA')
    dataSetRes.setMeasuredData(False)
    dataSetRes.setSample(newSample)

    os.remove(
        os.path.realpath(os.path.join(incomingPath, 'source_dropbox.txt')))

    resultsname = name.replace(foundBarcode + '__', '').replace('.txt', '')
    new_folder = os.path.realpath(os.path.join(incomingPath, resultsname))
    os.mkdir(new_folder)

    for f in os.listdir(incomingPath):
        if f.endswith('origlabfilename'):
            os.remove(os.path.realpath(os.path.join(incomingPath, f)))
        elif not os.path.isdir(os.path.join(incomingPath, f)):
            new_name = f.replace(foundBarcode + '__', '')
            os.rename(os.path.realpath(os.path.join(incomingPath, f)),
                      os.path.join(new_folder, new_name))

    transaction.moveFile(new_folder, dataSetRes)
Пример #12
0
def find_and_register_vcf(transaction, jsonContent):
    qbicBarcodes = []
    geneticIDS = []
    sampleSource = []
    for key in jsonContent.keys():
        if key == "type" or key == "files":
            pass
        else:
            geneticIDS.append(jsonContent[key]["id_genetics"])
            qbicBarcodes.append(jsonContent[key]["id_qbic"])
            sampleSource.append(jsonContent[key]["tumor"])

    expType = jsonContent["type"]

    project = qbicBarcodes[0][:5]

    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, project))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))

    foundSamples = search_service.searchForSamples(sc)

    datasetSample = None
    sampleFound = False

    parentIdentifiers = []
    testParentIdentifiers = []

    for barcode, geneticID in zip(qbicBarcodes, geneticIDS):
        for samp in foundSamples:
            space = samp.getSpace()
            qbicBarcodeID = '/' + space + '/' + barcode
            print qbicBarcodeID
            print geneticID
            if qbicBarcodeID in samp.getParentSampleIdentifiers():
                testParentID = samp.getSampleIdentifier()
                for s in foundSamples:
                    sampleType = s.getSampleType()
                    print sampleType
                    print testParentID
                    print s.getParentSampleIdentifiers()
                    print s.getPropertyValue("Q_SECONDARY_NAME")
                    print geneticID
                    if (testParentID in s.getParentSampleIdentifiers()) and (
                            sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (
                                s.getPropertyValue("Q_SECONDARY_NAME")
                                in geneticID):
                        sampleIdent = s.getSampleIdentifier()
                        parentIdentifiers.append(sampleIdent)
                        testParentIdentifiers.append(testParentID)

    numberOfExperiments = len(
        search_service.listExperiments("/" + space + "/" + project)) + 1
    newVCExp = transaction.createNewExperiment(
        '/' + space + '/' + project + '/' + project + 'E' +
        str(numberOfExperiments), "Q_NGS_VARIANT_CALLING")
    newVCExp.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')

    identString = ''
    for genID in geneticIDS:
        identString += genID.split('_')[-1]

    identString2 = ''
    for tpi in testParentIdentifiers:
        identString2 += tpi.split('/')[-1]

    #newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ project + qbicBarcodes[0][5:] + qbicBarcodes[1][5:] + identString, "Q_NGS_VARIANT_CALLING")
    newVCSample = transaction.createNewSample(
        '/' + space + '/' + 'VC' + identString2 + identString,
        "Q_NGS_VARIANT_CALLING")
    newVCSample.setParentSampleIdentifiers(parentIdentifiers)
    newVCSample.setExperiment(newVCExp)

    additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (
        qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1],
        geneticIDS[1], sampleSource[1])

    newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo)
    secName = '%s-%s' % (geneticIDS[0], geneticIDS[1])

    newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName)

    datasetSample = newVCSample
    return datasetSample
def find_and_register_vcf(transaction, jsonContent):
	qbicBarcodes = []
	geneticIDS = []
	sampleSource = []
	for key in jsonContent.keys():
		if key == "type" or key == "files":
			pass
		else:
			geneticIDS.append(jsonContent[key]["id_genetics"])
			qbicBarcodes.append(jsonContent[key]["id_qbic"])
			sampleSource.append(jsonContent[key]["tumor"])
			
			
        expType = jsonContent["type"]

        project = qbicBarcodes[0][:5]

	search_service = transaction.getSearchService()
        sc = SearchCriteria()
        pc = SearchCriteria()
        pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project));
        sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))

	foundSamples = search_service.searchForSamples(sc)
	space = foundSamples[0].getSpace()

	datasetSample = None
	sampleFound = False

	parentIdentifiers = []
        testParentIdentifiers = []

	global numberOfExperiments	
	for barcode, geneticID in zip(qbicBarcodes, geneticIDS):
		if geneticID in newNGSSamples:
			parentIdentifiers.append(newNGSSamples[geneticID])
			testParentIdentifiers.append(oldTestSamples[geneticID])
		else:
	        	for samp in foundSamples:
				qbicBarcodeID = '/' + space + '/' + barcode
                		if qbicBarcodeID in samp.getParentSampleIdentifiers():
                        		testParentID = samp.getSampleIdentifier()
					for s in foundSamples:
						sampleType = s.getSampleType()
						secName = s.getPropertyValue("Q_SECONDARY_NAME")
						extDB = s.getPropertyValue("Q_EXTERNALDB_ID")
						if (testParentID in s.getParentSampleIdentifiers()) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (((secName != None) and (secName in geneticID)) or ((extDB != None) and (extDB in geneticID))):
							sampleIdent = s.getSampleIdentifier()
							parentIdentifiers.append(sampleIdent)
							testParentIdentifiers.append(testParentID)

	#numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) + 1
	#TEST
	numberOfExperiments += 1
	newVCExp = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_VARIANT_CALLING")
	identString = ''
	for genID in geneticIDS:
		identString += genID.split('_')[-1]
	identString2 = ''
	for tpi in testParentIdentifiers:
		identString2 += tpi.split('/')[-1]
	
	#newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ project + qbicBarcodes[0][5:] + qbicBarcodes[1][5:] + identString, "Q_NGS_VARIANT_CALLING")
	newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ identString2  + identString, "Q_NGS_VARIANT_CALLING")
	newVCSample.setParentSampleIdentifiers(parentIdentifiers)
	newVCSample.setExperiment(newVCExp)

	additionalInfo = ""
	secName = ""
	for i, parentBarcode in enumerate(qbicBarcodes):
		additionalInfo += '%s %s Tumor: %s \n' % (qbicBarcodes[i], geneticIDS[i], sampleSource[i])
		secName += '%s ' % (geneticIDS[i])
	secName = secName.strip()
	#additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1], geneticIDS[1], sampleSource[1]) 

	newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo)
	#secName = '%s-%s' % (geneticIDS[0], geneticIDS[1])
	newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName)

	datasetSample = newVCSample
	return datasetSample
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1

    # Get the name of the incoming file
    name = transaction.getIncoming().getName()

    # identifier = pattern.findall(name)[0]
    # if isExpected(identifier):
    #         project = identifier[:5]
    #         #parentCode = identifier[:10]
    # else:
    # print "The identifier "+identifier+" did not match the pattern
    # Q[A-Z]{4}\d{3}\w{2} or checksum"
    propertyMap = mangleFilenameForAttributes(name)

    # we'll get qbic code and patient id
    expID = propertyMap['expID']
    code = propertyMap['qbicID']
    projectCode = code[:5]
    patientID = propertyMap['patientID']
    timepoint = propertyMap['timepoint']
    modality = propertyMap['modality']
    tracer = propertyMap['tracer']
    tissue = propertyMap['tissue']
    timestamp = propertyMap['datestr']

    # print "look for: ", code

    search_service = transaction.getSearchService()
    sc = SearchCriteria()    # Find the patient according to code
    sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(
        SearchCriteria.MatchClauseAttribute.CODE, code))
    foundSamples = search_service.searchForSamples(sc)

    if not len(foundSamples) > 0:
        raise SampleNotFoundError(
            'openBIS query of ' + code + ' failed. Please recheck your QBiC code!')

    # produces an IndexError if sample code does not exist (will check before)
    sampleIdentifier = foundSamples[0].getSampleIdentifier()

    space = foundSamples[0].getSpace()
    rootSample = transaction.getSampleForUpdate(sampleIdentifier)

    # print code, "was found in space", space, "as", sampleIdentifier

    # get or create MS-specific experiment/sample and
    # attach to the test sample
    expType = "Q_BMI_GENERIC_IMAGING"

    # load imaging experiments to append new data
    activeExperiment = None
    experiments = search_service.listExperiments(
        "/" + space + "/" + projectCode)
    experimentIDs = []
    fullExpIdentifier = '/' + space + '/' + projectCode + '/' + expID

    for exp in experiments:
        if exp.getExperimentType() == expType and exp.getExperimentIdentifier() == fullExpIdentifier:
            activeExperiment = exp

    # if expID is not found...
    if (activeExperiment == None):
        raise ExperimentNotFoundError(
            'Experiment with ID ' + expID + ' could not be found! Check the ID.')

    sc = SearchCriteria()    # Find the patient according to code
    sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(
        SearchCriteria.MatchClauseAttribute.TYPE, "Q_BMI_GENERIC_IMAGING_RUN"))

    ec = SearchCriteria()

    ec.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(
        SearchCriteria.MatchClauseAttribute.CODE, expID))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(ec))

    existingSamples = search_service.searchForSamples(sc)

    imagingSampleCode = modality + '-' + tracer + '-' + tissue + '-' + \
        patientID + '-' + timepoint + '-' + \
        str(len(existingSamples) + 1).zfill(3)

    # let's first check if such an imaging run was registered before
    sc = SearchCriteria()
    sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(
        SearchCriteria.MatchClauseAttribute.CODE, imagingSampleCode))
    foundSamples = search_service.searchForSamples(sc)

    if len(foundSamples) > 0:
        raise SampleAlreadyCreatedError(
            'Sample ' + imagingSampleCode + ' has been created already. Please re-check to avoid duplicates! Offending file: ' + incomingPath)

    imagingSample = transaction.createNewSample(
        '/' + space + '/' + imagingSampleCode, "Q_BMI_GENERIC_IMAGING_RUN")
    imagingSample.setParentSampleIdentifiers(
        [rootSample.getSampleIdentifier()])
    imagingSample.setExperiment(activeExperiment)

    sampleLabel = modality + ' imaging (' + patientID + ', ' + timepoint + ')'
    imagingSample.setPropertyValue('Q_SECONDARY_NAME', sampleLabel)
    imagingSample.setPropertyValue('Q_TIMEPOINT', timepoint)

    if tissue == 'liver':
        imagingSample.setPropertyValue('Q_IMAGED_TISSUE', 'LIVER')
    elif tissue == 'tumor':
        imagingSample.setPropertyValue(
            'Q_IMAGED_TISSUE', 'HEPATOCELLULAR_CARCINOMA')

    openbisTimestamp = buildOpenBisTimestamp(timestamp)
    imagingSample.setPropertyValue('Q_MSHCC_IMAGING_DATE', openbisTimestamp)

    # create new dataset
    imagingDataset = transaction.createNewDataSet('Q_BMI_IMAGING_DATA')
    imagingDataset.setMeasuredData(False)
    imagingDataset.setSample(imagingSample)
    imagingDataset.setPropertyValue(
        'Q_SECONDARY_NAME', modality + ' data (' + patientID + ', ' + timepoint + ')')

    # disable hash computation for now... resulted in outOfMemory errors for some bigger files
    #incomingFileSha256Sum = hashlib.sha256(
    #    open(incomingPath, 'rb').read()).hexdigest()
    incomingFileSha256Sum = 'MISSING!'
    imagingDataset.setPropertyValue(
        'Q_TARBALL_SHA256SUM', incomingFileSha256Sum)

    # finish the transaction
    transaction.moveFile(incomingPath, imagingDataset)
Пример #15
0
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1

    # Get the name of the incoming file
    name = transaction.getIncoming().getName()

    identifier = pattern.findall(name)[0]
    if isExpected(identifier):
        experiment = identifier[1:5]
        project = identifier[:5]
        parentCode = identifier[:10]
    else:
        print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum"

    # create new dataset
    dataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA")
    dataSet.setMeasuredData(False)

    search_service = transaction.getSearchService()

    vcf = re.compile("VCQ\w{4}[0-9]{3}[A-Z]\w[A-Z]*")
    vcfCodes = vcf.findall(name)

    if len(vcfCodes) > 0:
        sc = SearchCriteria()
        sc.addMatchClause(
            SearchCriteria.MatchClause.createAttributeMatch(
                SearchCriteria.MatchClauseAttribute.CODE, vcfCodes[0]))
        foundSamples = search_service.searchForSamples(sc)
        vcSample = transaction.getSampleForUpdate(
            foundSamples[0].getSampleIdentifier())
    else:
        # vcf sample needs to be created
        sc = SearchCriteria()
        sc.addMatchClause(
            SearchCriteria.MatchClause.createAttributeMatch(
                SearchCriteria.MatchClauseAttribute.CODE, parentCode))
        foundSamples = search_service.searchForSamples(sc)

        parentSampleIdentifier = foundSamples[0].getSampleIdentifier()
        space = foundSamples[0].getSpace()
        sa = transaction.getSampleForUpdate(parentSampleIdentifier)

        # register new experiment and sample
        #numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) + 1
        #newVariantCallingExperiment = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_VARIANT_CALLING")
        #newVariantCallingExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')

        # register new experiment and sample
        existingExperimentIDs = []
        existingExperiments = search_service.listExperiments("/" + space +
                                                             "/" + project)

        numberOfExperiments = len(existingExperiments) + 1

        for eexp in existingExperiments:
            existingExperimentIDs.append(eexp.getExperimentIdentifier())

        newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
            numberOfExperiments)

        while newExpID in existingExperimentIDs:
            numberOfExperiments += 1
            newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
                numberOfExperiments)

        newVariantCallingExperiment = transaction.createNewExperiment(
            newExpID, "Q_NGS_VARIANT_CALLING")
        newVariantCallingExperiment.setPropertyValue('Q_CURRENT_STATUS',
                                                     'FINISHED')

        search_service = transaction.getSearchService()
        sc = SearchCriteria()
        pc = SearchCriteria()
        pc.addMatchClause(
            SearchCriteria.MatchClause.createAttributeMatch(
                SearchCriteria.MatchClauseAttribute.PROJECT, project))
        sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
        foundSamples2 = search_service.searchForSamples(sc)

        vcNumber = 1
        newSampleID = '/' + space + '/' + 'VC' + str(vcNumber) + parentCode
        existingSampleIDs = []

        for samp in foundSamples2:
            existingSampleIDs.append(samp.getSampleIdentifier())

        while newSampleID in existingSampleIDs:
            vcNumber += 1
            newSampleID = '/' + space + '/' + 'VC' + str(vcNumber) + parentCode

        vcSample = transaction.createNewSample(newSampleID,
                                               "Q_NGS_VARIANT_CALLING")
        vcSample.setParentSampleIdentifiers([sa.getSampleIdentifier()])

        vcSample.setExperiment(newVariantCallingExperiment)

        cegat = False
        sourceLabFile = open(os.path.join(incomingPath, 'source_dropbox.txt'),
                             'r')
        sourceLab = sourceLabFile.readline().strip()
        sourceLabFile.close()

        if sourceLab == 'dmcegat':
            cegat = True
        os.remove(
            os.path.realpath(os.path.join(incomingPath, 'source_dropbox.txt')))

        for f in os.listdir(incomingPath):
            if f.endswith('origlabfilename') and cegat:
                origName = open(os.path.join(incomingPath, f), 'r')
                secondaryName = origName.readline().strip().split('_')[0]
                origName.close()
                #entitySample = transaction.getSampleForUpdate('/%s/%s' % (space,parentCode))
                sa.setPropertyValue('Q_SECONDARY_NAME', secondaryName)
                os.remove(os.path.realpath(os.path.join(incomingPath, f)))

            elif f.endswith('sha256sum') or f.endswith('vcf'):
                pass
                #transaction.moveFile(os.path.join(incomingPath,f), dataSet)
            else:
                os.remove(os.path.realpath(os.path.join(incomingPath, f)))

        dataSet.setSample(vcSample)
        transaction.moveFile(incomingPath, dataSet)
def process(transaction):
        context = transaction.getRegistrationContext().getPersistentMap()

        # Get the incoming path of the transaction
        incomingPath = transaction.getIncoming().getAbsolutePath()

        key = context.get("RETRY_COUNT")
        if (key == None):
                key = 1

        # Get the name of the incoming folder
        name = transaction.getIncoming().getName()

        identifier = pattern.findall(name)[0]
        if isExpected(identifier):
		pass
                #experiment = identifier[1:5]
                #parentCode = identifier[:10]
        else:
                print "The identifier "+identifier+" did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum"

	project = identifier[:5]       
	search_service = transaction.getSearchService()
        sc = SearchCriteria()
        pc = SearchCriteria()
        pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project));
        sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
        foundSamples = search_service.searchForSamples(sc)
        space = foundSamples[0].getSpace()
	global numberOfExperiments
        numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project))

	src = os.path.realpath(os.path.join(incomingPath,'source_dropbox.txt'))
        numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project))

	src = os.path.realpath(os.path.join(incomingPath,'source_dropbox.txt'))
	if os.path.isfile(src):
		os.remove(src)
	
	print "start registration"
	#dataSet = None
        for f in os.listdir(os.path.join(incomingPath,name)):
        		if f.endswith('metadata'):
				jsonContent = parse_metadata_file(os.path.realpath(os.path.join(os.path.join(incomingPath, name),f)))
				rawFiles = jsonContent["files"]
				vcfs = []
				fastqs = []
				gsvars = []
				print "metadata read"
				for rawFile in rawFiles:
					print rawFile
					if rawFile.endswith("vcf") or rawFile.endswith("vcf.gz"):
						vcfs.append(rawFile)
					if rawFile.endswith("fastq") or rawFile.endswith("fastq.gz"):
						fastqs.append(rawFile)
					if rawFile.endswith("GSvar") or rawFile.endswith("GSvar.gz"):
						gsvars.append(rawFile)

				#if rawFiles[0].endswith("vcf") or rawFiles[0].endswith("vcf.gz"):
				#	datasetSample = find_and_register_vcf(transaction, jsonContent)
				#	
				#	dataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA")
				#	dataSet.setSample(datasetSample)

				#elif rawFiles[0].endswith("fastq") or rawFiles[0].endswith("fastq.gz"):
				#	datasetSample = find_and_register_ngs(transaction, jsonContent)

				#	dataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA")
        			#	dataSet.setSample(datasetSample)

        			#os.remove(os.path.realpath(os.path.join(os.path.join(incomingPath,name),f)))
			else:
				pass
	folder = os.path.join(incomingPath, name)
	if len(fastqs) > 0:
		fastqSample = find_and_register_ngs(transaction, jsonContent)
                fastqDataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA")
                fastqDataSet.setSample(fastqSample)
		fastqFolder = os.path.join(folder, "raw")
		os.mkdir(fastqFolder)
		for f in fastqs:
			os.rename(os.path.join(folder, f), os.path.join(fastqFolder, f))
		transaction.moveFile(fastqFolder, fastqDataSet)
	for vc in vcfs:
		vcfSample = find_and_register_vcf(transaction, jsonContent)
		vcfDataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA")
		vcfDataSet.setSample(vcfSample)
		vcfFolder = os.path.join(folder, "vcf")
                os.mkdir(vcfFolder)
               	for f in vcfs:
                        os.rename(os.path.join(folder, f), os.path.join(vcfFolder, f))
               		for g in gsvars:
				if(f.split('.')[0].replace('_vc_strelka','') == g.split('.')[0]):	
					os.rename(os.path.join(folder,g), os.path.join(vcfFolder, g))
		transaction.moveFile(vcfFolder, vcfDataSet)
Пример #17
0
def process(transaction):
        context = transaction.getRegistrationContext().getPersistentMap()

        # Get the incoming path of the transaction
        incomingPath = transaction.getIncoming().getAbsolutePath()

        key = context.get("RETRY_COUNT")
        if (key == None):
                key = 1


        # Get the name of the incoming file
        name = transaction.getIncoming().getName()
        
        identifier = pattern.findall(name)[0]
        if isExpected(identifier):
                experiment = identifier[1:5]
                project = identifier[:5]
                parentCode = identifier[:10]
        else:
                print "The identifier "+identifier+" did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum"
        
        search_service = transaction.getSearchService()
        sc = SearchCriteria()
        sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, parentCode))
        foundSamples = search_service.searchForSamples(sc)

        parentSampleIdentifier = foundSamples[0].getSampleIdentifier()
        space = foundSamples[0].getSpace()
        sa = transaction.getSampleForUpdate(parentSampleIdentifier)
        # find or register new experiment
        expType = "Q_NGS_MAPPING"

        experiments = search_service.listExperiments("/" + space + "/" + project)
        experimentIDs = []
        for exp in experiments:
                experimentIDs.append(exp.getExperimentIdentifier())

        # no existing experiment for samples of this sample preparation found
        expID = experimentIDs[0]
        i = 0
        while expID in experimentIDs:
                i += 1
                expNum = len(experiments) + i
                expID = '/' + space + '/' + project + '/' + project + 'E' + str(expNum)

        #newMappingSample = transaction.createNewSample('/' + space + '/' + 'MP'+ parentCode, "Q_NGS_MAPPING")
        #newMappingSample.setParentSampleIdentifiers([sa.getSampleIdentifier()])
        #newMappingSample.setExperiment(mapExperiment)

        sc = SearchCriteria()
        pc = SearchCriteria()
        pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project))
        sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
        allSamples = search_service.searchForSamples(sc)

        #existingSampleIDs = []

        ngsParents = []
        
        for samp in allSamples:
                #existingSampleIDs.append(samp.getSampleIdentifier())
                if samp.getSampleType()=="Q_NGS_SINGLE_SAMPLE_RUN":
                        if sa.getSampleIdentifier() in samp.getParentSampleIdentifiers():
                                ngsParents.append(samp.getSampleIdentifier())

        #replNumber = 1
        #if len(ngsParents > 1):
        mapSampleID = '/' + space + '/' + 'MP' + parentCode

        sc = SearchCriteria()
        sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, mapSampleID))
        foundMapSample = search_service.searchForSamples(sc)
        #while newSampleID in existingSampleIDs:
        #        vcNumber += 1
        #        newSampleID = '/' + space + '/' + 'MP' + str(vcNumber) + parentCode
        if len(foundMapSample) == 0:
                mapExperiment = transaction.createNewExperiment(expID, expType)
                mapExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')
                mappingSample = transaction.createNewSample(mapSampleID, "Q_NGS_MAPPING")
                mappingSample.setParentSampleIdentifiers(ngsParents)
                mappingSample.setExperiment(mapExperiment)
        else:
                mappingSample = transaction.getSampleForUpdate(foundMapSample[0].getSampleIdentifier())
        # create new dataset
        dataSet = transaction.createNewDataSet("Q_NGS_MAPPING_DATA")
        dataSet.setMeasuredData(False)
        dataSet.setSample(mappingSample)

        transaction.moveFile(incomingPath, dataSet)
def process(transaction, parameters, tableBuilder):
    """Update old flow experiments that have some missing or incorrect
    information.
    
    """

    # Latest experiment version
    EXPERIMENT_VERSION = 1

    # Set up logging
    _logger = setUpLogging()

    # Prepare the return table
    tableBuilder.addHeader("success")
    tableBuilder.addHeader("message")

    # Add a row for the results
    row = tableBuilder.addRow()

    # Retrieve parameters from client
    expPermId = parameters.get("expPermId")

    # Log parameter info
    _logger.info("Requested update of experiment " + expPermId + ".")

    # Get the experiment
    expCriteria = SearchCriteria()
    expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, expPermId))
    experiments = searchService.searchForExperiments(expCriteria)

    # If we did not get the experiment, return here with an error
    if len(experiments) != 1:

        # Prepare the return arguments
        success = False
        message = "The experiment with permID " + expPermId + " could not be found."

        # Log the error
        _logger.error(message)

        # Add the results to current row
        row.setCell("success", success)
        row.setCell("message", message)

        # Return here
        return

    # Get the experiment
    experiment = experiments[0]

    # Get the experiment type
    experimentType = experiment.getExperimentType()

    # Log
    _logger.info("Successfully retrieved Experiment with permId " + 
                 expPermId + " and type " + experimentType + ".")

    # Build the corresponding dataset type
    experimentPrefix = experimentType[0:experimentType.find("_EXPERIMENT")]
    dataSetType = experimentPrefix + "_FCSFILE"

    # Retrieve all FCS files contained in the experiment
    searchCriteria = SearchCriteria()
    searchCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, dataSetType))
    expCriteria = SearchCriteria()
    expCriteria.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID, expPermId))
    searchCriteria.addSubCriteria(SearchSubCriteria.createExperimentCriteria(expCriteria))
    dataSets = searchService.searchForDataSets(searchCriteria)

    # Log
    _logger.info("Retrieved " + str(len(dataSets)) +
                 " dataset(s) for experiment with permId " + expPermId + ".")

    # If we did not get the datasets, return here with an error
    if dataSets is None:

        # Prepare the return arguments
        success = False
        message = "No FCS files could be found for experiment with permID " + \
            expPermId + "."

        # Log the error
        _logger.error(message)

        # Add the results to current row
        row.setCell("success", success)
        row.setCell("message", message)

        # Return here
        return

    # Get the file from the first dataset
    files = getFileForCode(dataSets[0].getDataSetCode())
    if len(files) != 1:

        # Prepare the return arguments
        success = False
        message = "Could not retrieve the FCS file to process!"

        # Log the error
        _logger.error(message)

        # Add the results to current row
        row.setCell("success", success)
        row.setCell("message", message)

        # Return here
        return

    # Get the file
    fcsFile = files[0]

    # Log
    _logger.info("Reading file " + fcsFile + ".")

    # Open the FCS file
    reader = FCSReader(java.io.File(fcsFile), False)

    # Parse the file with data
    if not reader.parse():

        # Prepare the return arguments
        success = False
        message = "Could not process file " + os.path.basename(fcsFile)

        # Log the error
        _logger.error(message)

        # Add the results to current row
        row.setCell("success", success)
        row.setCell("message", message)

        # Return here
        return

    #
    #
    # EXPERIMENT NAME
    #
    #

    # Get the experiment name from the file
    expNameFromFile = reader.getCustomKeyword("EXPERIMENT NAME")

    # Get the experiment name from the registered Experiment
    currentExpName = experiment.getPropertyValue(experimentType + "_NAME")

    # We need the Experiment to be mutable
    mutableExperiment = transaction.makeExperimentMutable(experiment)

    # Are the experiment names matching?
    if expNameFromFile == currentExpName:

        # Log
        _logger.info("Registered experiment name matches the experiment " +
                     "name from the FCS file.")

    else:

        # Update the registered Experiment name
        mutableExperiment.setPropertyValue(experimentType + "_NAME", expNameFromFile)

        # Log
        _logger.info("Updated registered experiment name from '" + 
                     currentExpName + "' to '" + expNameFromFile + "'.")

    #
    #
    # FCS FILE PARAMETERS AND ACQUISITION DATE
    #
    #

    hardwareString = experimentType[0:experimentType.find("_EXPERIMENT")]
    parameterProperty = hardwareString + "_FCSFILE_PARAMETERS"
    acqDateProperty = hardwareString + "_FCSFILE_ACQ_DATE"

    # Log
    _logger.info("Checking properties of " + str(len(dataSets)) + " file(s).")

    for dataSet in dataSets:

        # Check whether the parameters are stored for the file
        parameters = dataSet.getPropertyValue(parameterProperty)

        # Check whether the acquisition date is stored for the file
        acqDate = dataSet.getPropertyValue(acqDateProperty)

        if parameters is None or acqDate is None:

            # Make the DataSet mutable for update
            mutableDataSet = transaction.makeDataSetMutable(dataSet)

            # Get the file from the dataset
            files = getFileForCode(dataSet.getDataSetCode())
            if len(files) != 1:

                # Prepare the return arguments
                success = False
                message = "Could not retrieve the FCS file to process!"

                # Log the error
                _logger.error(message)

                # Add the results to current row
                row.setCell("success", success)
                row.setCell("message", message)

                # Return here
                return     
    
            # Get the FCS file
            fcsFile = files[0]

            # Open and parse the FCS file
            reader = FCSReader(java.io.File(fcsFile), True);

            # Parse the file with data
            if not reader.parse():

                # Prepare the return arguments
                success = False
                message = "Could not process file " + os.path.basename(fcsFile)

                # Log the error
                _logger.error(message)

                # Add the results to current row
                row.setCell("success", success)
                row.setCell("message", message)

                # Return here
                return

            if acqDate is None:

                # Get and format the acquisition date
                dateStr = formatExpDateForPostgreSQL(
                    reader.getStandardKeyword("$DATE"))

                # Update the dataSet
                mutableDataSet.setPropertyValue(acqDateProperty, dateStr)

                # Log
                _logger.info("The acquisition date of file " + str(fcsFile) +
                             " was set to: " + dateStr + ".") 

            if parameters is None:

                # Get the parameters 
                parametersAttr = reader.parametersAttr

                if parametersAttr is None:

                    # Prepare the return arguments
                    success = False
                    message = "Could not read parameters from file " + \
                    os.path.basename(fcsFile)

                    # Log the error
                    _logger.error(message)

                    # Add the results to current row
                    row.setCell("success", success)
                    row.setCell("message", message)

                    # Return here
                    return

                # Convert the parameters to XML
                parametersXML = dictToXML(parametersAttr)

                # Now store them in the dataSet
                mutableDataSet.setPropertyValue(parameterProperty, parametersXML)

                # Log
                _logger.info("The parameters for file " + str(fcsFile) +
                             " were successfully stored (in XML).") 


    # Update the version of the experiment
    mutableExperiment.setPropertyValue(experimentType + "_VERSION",
                                       str(EXPERIMENT_VERSION))

    success = True
    message = "Congratulations! The experiment was successfully upgraded " + \
        "to the latest version."

    # Log
    _logger.info(message)

    # Add the results to current row
    row.setCell("success", success)
    row.setCell("message", message)
Пример #19
0
def find_and_register_ngs(transaction, jsonContent):
    qcValues = jsonContent["sample1"]["qc"]
    genome = jsonContent["sample1"]["genome"]
    idGenetics = jsonContent["sample1"]["id_genetics"]
    qbicBarcode = jsonContent["sample1"]["id_qbic"]
    system = jsonContent["sample1"]["processing_system"]
    tumor = jsonContent["sample1"]["tumor"]
    expType = jsonContent["type"]

    project = qbicBarcode[:5]

    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, project))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
    foundSamples = search_service.searchForSamples(sc)

    datasetSample = None
    sampleFound = False
    sampleIdent = None
    space = foundSamples[0].getSpace()
    testSampleCode = None

    knownCodes = []

    for samp in foundSamples:
        qbicBarcodeID = '/' + samp.getSpace() + '/' + qbicBarcode
        knownCodes.append(samp.getCode())
        print "code: " + samp.getCode()
        if qbicBarcodeID in samp.getParentSampleIdentifiers(
        ) or qbicBarcode == samp.getCode():
            sampleType = samp.getSampleType()
            if sampleType == "Q_TEST_SAMPLE":
                print "searching: " + idGenetics.split('_')[0]
                print samp.getPropertyValue("Q_EXTERNALDB_ID")
                if (samp.getPropertyValue("Q_SAMPLE_TYPE")
                        == typesDict[expType]) and (
                            (samp.getPropertyValue("Q_SECONDARY_NAME")
                             == idGenetics.split('_')[0]) or
                            (samp.getPropertyValue("Q_EXTERNALDB_ID")
                             == idGenetics.split('_')[0])):
                    sampleIdent = samp.getSampleIdentifier()
                    testSampleCode = samp.getCode()
                    oldTestSamples[idGenetics] = sampleIdent
    if not sampleIdent:
        if not idGenetics in newTestSamples:
            for samp in foundSamples:
                if qbicBarcode == samp.getCode():
                    testSampleCode = createNewBarcode(project, transaction)

                    sampleIdent = '/' + space + '/' + testSampleCode
                    testSample = transaction.createNewSample(
                        sampleIdent, "Q_TEST_SAMPLE")
                    testSample.setParentSampleIdentifiers(
                        [samp.getSampleIdentifier()])
                    testSample.setPropertyValue('Q_SECONDARY_NAME',
                                                idGenetics.split('_')[0])
                    testSample.setPropertyValue('Q_SAMPLE_TYPE',
                                                typesDict[expType])
                    global numberOfExperiments
                    numberOfExperiments += 1
                    newTestSampleExperiment = transaction.createNewExperiment(
                        '/' + space + '/' + project + '/' + project + 'E' +
                        str(numberOfExperiments), "Q_SAMPLE_PREPARATION")
                    testSample.setExperiment(newTestSampleExperiment)
                    newTestSamples[idGenetics] = sampleIdent

    for s in foundSamples:
        # There is already a registered NGS run
        if (s.getSampleType() == "Q_NGS_SINGLE_SAMPLE_RUN") and (
                sampleIdent in s.getParentSampleIdentifiers() and
            (s.getPropertyValue("Q_SECONDARY_NAME") in idGenetics)):
            sa = transaction.getSampleForUpdate(s.getSampleIdentifier())
            sa.setPropertyValue("Q_SECONDARY_NAME", idGenetics)

            datasetSample = sa
            sampleFound = True

    if not sampleFound:
        # register new experiment and sample
        numberOfExperiments += 1
        newNGSMeasurementExp = transaction.createNewExperiment(
            '/' + space + '/' + project + '/' + project + 'E' +
            str(numberOfExperiments), "Q_NGS_MEASUREMENT")
        newNGSMeasurementExp.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')
        newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_MODE',
                                              'PAIRED_END')
        newNGSMeasurementExp.setPropertyValue('Q_SEQUENCER_DEVICE',
                                              'IMGAG_ILLUMINA_HISEQ_2500')
        newNGSMeasurementExp.setPropertyValue('Q_ADDITIONAL_INFO', system)
        newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_TYPE',
                                              typesDict[expType])
        newNGSID = '/' + space + '/' + 'NGS' + idGenetics.split(
            '_')[-1] + testSampleCode
        newNGSrunSample = transaction.createNewSample(
            newNGSID, "Q_NGS_SINGLE_SAMPLE_RUN")
        newNGSrunSample.setParentSampleIdentifiers([sampleIdent])
        newNGSrunSample.setExperiment(newNGSMeasurementExp)

        newNGSSamples[idGenetics] = newNGSID

        additionalInfo = '%s: %s\n' % ("Genome", genome)

        for qc in qcValues:
            line = str(qc)
            additionalInfo += '%s\n' % line.replace('{', '').replace('}', '')

        newNGSrunSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo)
        newNGSrunSample.setPropertyValue('Q_SECONDARY_NAME', idGenetics)

        datasetSample = newNGSrunSample
    return datasetSample
Пример #20
0
def find_and_register_vcf(transaction, jsonContent):
    qbicBarcodes = []
    geneticIDS = []
    sampleSource = []
    for key in jsonContent.keys():
        if key == "type" or key == "files":
            pass
        else:
            geneticIDS.append(jsonContent[key]["id_genetics"])
            qbicBarcodes.append(jsonContent[key]["id_qbic"])
            sampleSource.append(jsonContent[key]["tumor"])

    expType = jsonContent["type"]

    project = qbicBarcodes[0][:5]

    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, project))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))

    foundSamples = search_service.searchForSamples(sc)
    space = foundSamples[0].getSpace()

    datasetSample = None
    sampleFound = False

    parentIdentifiers = []
    testParentIdentifiers = []

    global numberOfExperiments
    for barcode, geneticID in zip(qbicBarcodes, geneticIDS):
        if geneticID in newNGSSamples:
            parentIdentifiers.append(newNGSSamples[geneticID])
            testParentIdentifiers.append(oldTestSamples[geneticID])
        else:
            for samp in foundSamples:
                qbicBarcodeID = '/' + space + '/' + barcode
                if qbicBarcodeID in samp.getParentSampleIdentifiers():
                    testParentID = samp.getSampleIdentifier()
                    for s in foundSamples:
                        sampleType = s.getSampleType()
                        secName = s.getPropertyValue("Q_SECONDARY_NAME")
                        extDB = s.getPropertyValue("Q_EXTERNALDB_ID")
                        if (testParentID in s.getParentSampleIdentifiers(
                        )) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (
                            ((secName != None) and (secName in geneticID)) or
                            ((extDB != None) and (extDB in geneticID))):
                            sampleIdent = s.getSampleIdentifier()
                            parentIdentifiers.append(sampleIdent)
                            testParentIdentifiers.append(testParentID)

    #numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) + 1
    #TEST
    numberOfExperiments += 1
    newVCExp = transaction.createNewExperiment(
        '/' + space + '/' + project + '/' + project + 'E' +
        str(numberOfExperiments), "Q_NGS_VARIANT_CALLING")
    identString = ''
    for genID in geneticIDS:
        identString += genID.split('_')[-1]
    identString2 = ''
    for tpi in testParentIdentifiers:
        identString2 += tpi.split('/')[-1]

    #newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ project + qbicBarcodes[0][5:] + qbicBarcodes[1][5:] + identString, "Q_NGS_VARIANT_CALLING")
    newVCSample = transaction.createNewSample(
        '/' + space + '/' + 'VC' + identString2 + identString,
        "Q_NGS_VARIANT_CALLING")
    newVCSample.setParentSampleIdentifiers(parentIdentifiers)
    newVCSample.setExperiment(newVCExp)

    additionalInfo = ""
    secName = ""
    for i, parentBarcode in enumerate(qbicBarcodes):
        additionalInfo += '%s %s Tumor: %s \n' % (
            qbicBarcodes[i], geneticIDS[i], sampleSource[i])
        secName += '%s ' % (geneticIDS[i])
    secName = secName.strip()
    #additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1], geneticIDS[1], sampleSource[1])

    newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo)
    #secName = '%s-%s' % (geneticIDS[0], geneticIDS[1])
    newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName)

    datasetSample = newVCSample
    return datasetSample
Пример #21
0
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1

    # Get the name of the incoming file
    name = transaction.getIncoming().getName()

    identifier = pattern.findall(name)[0]
    if isExpected(identifier):
        experiment = identifier[1:5]
        project = identifier[:5]
        parentCode = identifier[:10]
    else:
        print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum"

    # create new dataset
    dataSet = transaction.createNewDataSet("Q_FASTA_DATA")
    dataSet.setMeasuredData(False)

    search_service = transaction.getSearchService()

    vcf = re.compile("VCQ\w{4}[0-9]{3}[A-Z]\w[A-Z]*")
    vcfCodes = vcf.findall(name)

    if len(vcfCodes) > 0:
        sc = SearchCriteria()
        sc.addMatchClause(
            SearchCriteria.MatchClause.createAttributeMatch(
                SearchCriteria.MatchClauseAttribute.CODE, vcfCodes[0]))
        foundSamples = search_service.searchForSamples(sc)
        vcSample = transaction.getSampleForUpdate(
            foundSamples[0].getSampleIdentifier())
    else:
        # vcf sample needs to be created
        sc = SearchCriteria()
        sc.addMatchClause(
            SearchCriteria.MatchClause.createAttributeMatch(
                SearchCriteria.MatchClauseAttribute.CODE, parentCode))
        foundSamples = search_service.searchForSamples(sc)

        parentSampleIdentifier = foundSamples[0].getSampleIdentifier()
        space = foundSamples[0].getSpace()
        sa = transaction.getSampleForUpdate(parentSampleIdentifier)

        # register new experiment and sample
        existingExperimentIDs = []
        existingExperiments = search_service.listExperiments("/" + space +
                                                             "/" + project)

        numberOfExperiments = len(existingExperiments) + 1

        for eexp in existingExperiments:
            existingExperimentIDs.append(eexp.getExperimentIdentifier())

        newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
            numberOfExperiments)

        while newExpID in existingExperimentIDs:
            numberOfExperiments += 1
            newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
                numberOfExperiments)

        newVariantCallingExperiment = transaction.createNewExperiment(
            newExpID, "Q_FASTA_INFO")
        newVariantCallingExperiment.setPropertyValue('Q_CURRENT_STATUS',
                                                     'FINISHED')

        search_service = transaction.getSearchService()
        sc = SearchCriteria()
        pc = SearchCriteria()
        pc.addMatchClause(
            SearchCriteria.MatchClause.createAttributeMatch(
                SearchCriteria.MatchClauseAttribute.PROJECT, project))
        sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
        foundSamples2 = search_service.searchForSamples(sc)

        vcNumber = 1
        newSampleID = '/' + space + '/' + 'FASTA' + str(vcNumber) + parentCode
        existingSampleIDs = []

        for samp in foundSamples2:
            existingSampleIDs.append(samp.getSampleIdentifier())

        # search in known ids, but also try to fetch the sample in case it wasn't indexed yet
        while newSampleID in existingSampleIDs or transaction.getSampleForUpdate(
                newSampleID):
            vcNumber += 1
            newSampleID = '/' + space + '/' + 'FASTA' + str(
                vcNumber) + parentCode

        vcSample = transaction.createNewSample(newSampleID, "Q_FASTA")
        vcSample.setParentSampleIdentifiers([sa.getSampleIdentifier()])
        vcSample.setExperiment(newVariantCallingExperiment)

        resultsname = name.replace('.fasta', '').replace('.fsa', '')
        new_folder = os.path.realpath(os.path.join(incomingPath, resultsname))
        os.mkdir(new_folder)

        for f in os.listdir(incomingPath):
            if f.endswith('origlabfilename'):
                origName = open(os.path.join(incomingPath, f), 'r')
                secondaryName = origName.readline().strip().split('_')[0]
                origName.close()
                sa.setPropertyValue('Q_SECONDARY_NAME', secondaryName)
                os.remove(os.path.realpath(os.path.join(incomingPath, f)))
            elif f.endswith('sha256sum') or f.endswith('fasta') or f.endswith(
                    'fsa'):
                os.rename(os.path.realpath(os.path.join(incomingPath, f)),
                          os.path.join(new_folder, f))
            elif not os.path.isdir(os.path.join(incomingPath, f)):
                os.remove(os.path.realpath(os.path.join(incomingPath, f)))

        dataSet.setSample(vcSample)
        transaction.moveFile(new_folder, dataSet)
Пример #22
0
def handle_BSA_Run(transaction):
    # Get the name of the incoming file
    name = transaction.getIncoming().getName()
    incomingPath = transaction.getIncoming().getAbsolutePath()

    stem, ext = os.path.splitext(name)

    # Convert the raw file and write it to an mzml tmp folder.
    # Sadly, I can not see a way to make this part of the transaction.
    tmpdir = tempfile.mkdtemp(dir=MZML_TMP)
    try:
        convert = partial(convert_raw,
                          remote_base=REMOTE_BASE,
                          host=MSCONVERT_HOST,
                          timeout=CONVERSION_TIMEOUT,
                          user=MSCONVERT_USER)
        if ext.lower() in VENDOR_FORMAT_EXTENSIONS:
            openbis_format_code = VENDOR_FORMAT_EXTENSIONS[ext.lower()]
        else:
            raise ValueError("Invalid incoming file %s" % incomingPath)

        mzml_path = os.path.join(tmpdir, stem + '.mzML')
        raw_path = os.path.join(incomingPath, name)
        convert(raw_path, mzml_path)

        mzml_name = os.path.basename(mzml_path)
        mzml_dest = os.path.join(DROPBOX_PATH, mzml_name)

        os.rename(mzml_path, mzml_dest)
    finally:
        shutil.rmtree(tmpdir)

    # The MS experiment
    msExp = transaction.getExperiment(BSA_MPC_EXPERIMENT_ID)

    #TODO create new ms sample? if so, use normal qbic barcodes?
    msCode = "MS" + BSA_MPC_BARCODE

    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, BSA_MPC_PROJECT))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))

    foundSamples = search_service.searchForSamples(sc)

    run = 1
    for samp in foundSamples:
        if samp.getSampleType() == "Q_MS_RUN":
            existingRun = int(samp.getCode().split("_")[-1])
            if existingRun >= run:
                run = existingRun + 1

    msSample = transaction.createNewSample(
        '/' + BSA_MPC_SPACE + '/' + msCode + "_" + str(run), "Q_MS_RUN")
    #set parent sample, always the same for bsa run
    msSample.setParentSampleIdentifiers([BSA_MPC_SAMPLE_ID])
    msSample.setExperiment(msExp)

    createRawDataSet(transaction, raw_path, msSample, openbis_format_code)
    GZipAndMoveMZMLDataSet(transaction, mzml_dest, msSample)

    for f in os.listdir(incomingPath):
        if ".testorig" in f:
            os.remove(os.path.realpath(os.path.join(incomingPath, f)))
def process(transaction, parameters, tableBuilder):
    """Update old flow experiments that have some missing or incorrect
    information.
    
    """

    # Latest experiment version
    EXPERIMENT_VERSION = 1

    # Set up logging
    _logger = setUpLogging()

    # Prepare the return table
    tableBuilder.addHeader("success")
    tableBuilder.addHeader("message")

    # Add a row for the results
    row = tableBuilder.addRow()

    # Retrieve parameters from client
    expPermId = parameters.get("expPermId")

    # Log parameter info
    _logger.info("Requested update of experiment " + expPermId + ".")

    # Get the experiment
    expCriteria = SearchCriteria()
    expCriteria.addMatchClause(
        MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID,
                                         expPermId))
    experiments = searchService.searchForExperiments(expCriteria)

    # If we did not get the experiment, return here with an error
    if len(experiments) != 1:

        # Prepare the return arguments
        success = False
        message = "The experiment with permID " + expPermId + " could not be found."

        # Log the error
        _logger.error(message)

        # Add the results to current row
        row.setCell("success", success)
        row.setCell("message", message)

        # Return here
        return

    # Get the experiment
    experiment = experiments[0]

    # Get the experiment type
    experimentType = experiment.getExperimentType()

    # Log
    _logger.info("Successfully retrieved Experiment with permId " + expPermId +
                 " and type " + experimentType + ".")

    # Build the corresponding dataset type
    experimentPrefix = experimentType[0:experimentType.find("_EXPERIMENT")]
    dataSetType = experimentPrefix + "_FCSFILE"

    # Retrieve all FCS files contained in the experiment
    searchCriteria = SearchCriteria()
    searchCriteria.addMatchClause(
        MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE,
                                         dataSetType))
    expCriteria = SearchCriteria()
    expCriteria.addMatchClause(
        MatchClause.createAttributeMatch(MatchClauseAttribute.PERM_ID,
                                         expPermId))
    searchCriteria.addSubCriteria(
        SearchSubCriteria.createExperimentCriteria(expCriteria))
    dataSets = searchService.searchForDataSets(searchCriteria)

    # Log
    _logger.info("Retrieved " + str(len(dataSets)) +
                 " dataset(s) for experiment with permId " + expPermId + ".")

    # If we did not get the datasets, return here with an error
    if dataSets is None:

        # Prepare the return arguments
        success = False
        message = "No FCS files could be found for experiment with permID " + \
            expPermId + "."

        # Log the error
        _logger.error(message)

        # Add the results to current row
        row.setCell("success", success)
        row.setCell("message", message)

        # Return here
        return

    # Get the file from the first dataset
    files = getFileForCode(dataSets[0].getDataSetCode())
    if len(files) != 1:

        # Prepare the return arguments
        success = False
        message = "Could not retrieve the FCS file to process!"

        # Log the error
        _logger.error(message)

        # Add the results to current row
        row.setCell("success", success)
        row.setCell("message", message)

        # Return here
        return

    # Get the file
    fcsFile = files[0]

    # Log
    _logger.info("Reading file " + fcsFile + ".")

    # Open the FCS file
    reader = FCSReader(java.io.File(fcsFile), False)

    # Parse the file with data
    if not reader.parse():

        # Prepare the return arguments
        success = False
        message = "Could not process file " + os.path.basename(fcsFile)

        # Log the error
        _logger.error(message)

        # Add the results to current row
        row.setCell("success", success)
        row.setCell("message", message)

        # Return here
        return

    #
    #
    # EXPERIMENT NAME
    #
    #

    # Get the experiment name from the file
    expNameFromFile = reader.getCustomKeyword("EXPERIMENT NAME")

    # Get the experiment name from the registered Experiment
    currentExpName = experiment.getPropertyValue(experimentType + "_NAME")

    # We need the Experiment to be mutable
    mutableExperiment = transaction.makeExperimentMutable(experiment)

    # Are the experiment names matching?
    if expNameFromFile == currentExpName:

        # Log
        _logger.info("Registered experiment name matches the experiment " +
                     "name from the FCS file.")

    else:

        # Update the registered Experiment name
        mutableExperiment.setPropertyValue(experimentType + "_NAME",
                                           expNameFromFile)

        # Log
        _logger.info("Updated registered experiment name from '" +
                     currentExpName + "' to '" + expNameFromFile + "'.")

    #
    #
    # FCS FILE PARAMETERS AND ACQUISITION DATE
    #
    #

    hardwareString = experimentType[0:experimentType.find("_EXPERIMENT")]
    parameterProperty = hardwareString + "_FCSFILE_PARAMETERS"
    acqDateProperty = hardwareString + "_FCSFILE_ACQ_DATE"

    # Log
    _logger.info("Checking properties of " + str(len(dataSets)) + " file(s).")

    for dataSet in dataSets:

        # Check whether the parameters are stored for the file
        parameters = dataSet.getPropertyValue(parameterProperty)

        # Check whether the acquisition date is stored for the file
        acqDate = dataSet.getPropertyValue(acqDateProperty)

        if parameters is None or acqDate is None:

            # Make the DataSet mutable for update
            mutableDataSet = transaction.makeDataSetMutable(dataSet)

            # Get the file from the dataset
            files = getFileForCode(dataSet.getDataSetCode())
            if len(files) != 1:

                # Prepare the return arguments
                success = False
                message = "Could not retrieve the FCS file to process!"

                # Log the error
                _logger.error(message)

                # Add the results to current row
                row.setCell("success", success)
                row.setCell("message", message)

                # Return here
                return

            # Get the FCS file
            fcsFile = files[0]

            # Open and parse the FCS file
            reader = FCSReader(java.io.File(fcsFile), True)

            # Parse the file with data
            if not reader.parse():

                # Prepare the return arguments
                success = False
                message = "Could not process file " + os.path.basename(fcsFile)

                # Log the error
                _logger.error(message)

                # Add the results to current row
                row.setCell("success", success)
                row.setCell("message", message)

                # Return here
                return

            if acqDate is None:

                # Get and format the acquisition date
                dateStr = formatExpDateForPostgreSQL(
                    reader.getStandardKeyword("$DATE"))

                # Update the dataSet
                mutableDataSet.setPropertyValue(acqDateProperty, dateStr)

                # Log
                _logger.info("The acquisition date of file " + str(fcsFile) +
                             " was set to: " + dateStr + ".")

            if parameters is None:

                # Get the parameters
                parametersAttr = reader.parametersAttr

                if parametersAttr is None:

                    # Prepare the return arguments
                    success = False
                    message = "Could not read parameters from file " + \
                    os.path.basename(fcsFile)

                    # Log the error
                    _logger.error(message)

                    # Add the results to current row
                    row.setCell("success", success)
                    row.setCell("message", message)

                    # Return here
                    return

                # Convert the parameters to XML
                parametersXML = dictToXML(parametersAttr)

                # Now store them in the dataSet
                mutableDataSet.setPropertyValue(parameterProperty,
                                                parametersXML)

                # Log
                _logger.info("The parameters for file " + str(fcsFile) +
                             " were successfully stored (in XML).")

    # Update the version of the experiment
    mutableExperiment.setPropertyValue(experimentType + "_VERSION",
                                       str(EXPERIMENT_VERSION))

    success = True
    message = "Congratulations! The experiment was successfully upgraded " + \
        "to the latest version."

    # Log
    _logger.info(message)

    # Add the results to current row
    row.setCell("success", success)
    row.setCell("message", message)
Пример #24
0
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1


    # Get the name of the incoming file
    name = transaction.getIncoming().getName()
    
    identifier = pattern.findall(name)[0]
    if isExpected(identifier):
        experiment = identifier[1:5]
        project = identifier[:5]
        parentCode = identifier[:10]
    else:
        print "The identifier "+identifier+" did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum"
 
    # create new dataset 
    dataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA")
    dataSet.setMeasuredData(False)

    search_service = transaction.getSearchService()

    vcf = re.compile("VCQ\w{4}[0-9]{3}[A-Z]\w[A-Z]*")
    vcfCodes = vcf.findall(name)

    if len(vcfCodes) > 0:
        sc = SearchCriteria()
        sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, vcfCodes[0]))
        foundSamples = search_service.searchForSamples(sc)
        vcSample = transaction.getSampleForUpdate(foundSamples[0].getSampleIdentifier())
    else:
        # vcf sample needs to be created        
        sc = SearchCriteria()
        sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, parentCode))
        foundSamples = search_service.searchForSamples(sc)

        parentSampleIdentifier = foundSamples[0].getSampleIdentifier()
        space = foundSamples[0].getSpace()
        sa = transaction.getSampleForUpdate(parentSampleIdentifier)
        
        # register new experiment and sample
        #numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) + 1
        #newVariantCallingExperiment = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_VARIANT_CALLING")
        #newVariantCallingExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')

        # register new experiment and sample
        existingExperimentIDs = []
        existingExperiments = search_service.listExperiments("/" + space + "/" + project)
    
        numberOfExperiments = len(existingExperiments) + 1

        for eexp in existingExperiments:
            existingExperimentIDs.append(eexp.getExperimentIdentifier())

        newExpID = '/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments)

        while newExpID in existingExperimentIDs:
            numberOfExperiments += 1 
            newExpID = '/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments)

        newVariantCallingExperiment = transaction.createNewExperiment(newExpID, "Q_NGS_VARIANT_CALLING")
        newVariantCallingExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')

        search_service = transaction.getSearchService()
        sc = SearchCriteria()
        pc = SearchCriteria()
        pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project))
        sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
        foundSamples2 = search_service.searchForSamples(sc)

        vcNumber = 1
        newSampleID = '/' + space + '/' + 'VC' + str(vcNumber) + parentCode
        existingSampleIDs = []

        for samp in foundSamples2:
            existingSampleIDs.append(samp.getSampleIdentifier())

        while newSampleID in existingSampleIDs:
            vcNumber += 1
            newSampleID = '/' + space + '/' + 'VC' + str(vcNumber) + parentCode
            
        vcSample = transaction.createNewSample(newSampleID, "Q_NGS_VARIANT_CALLING")
        vcSample.setParentSampleIdentifiers([sa.getSampleIdentifier()])
      
        vcSample.setExperiment(newVariantCallingExperiment) 

        cegat = False
        sourceLabFile = open(os.path.join(incomingPath,'source_dropbox.txt'), 'r')
        sourceLab = sourceLabFile.readline().strip()
        sourceLabFile.close()

        if sourceLab == 'dmcegat':
            cegat = True
        os.remove(os.path.realpath(os.path.join(incomingPath,'source_dropbox.txt')))

        for f in os.listdir(incomingPath):
            if f.endswith('origlabfilename') and cegat:
                origName = open(os.path.join(incomingPath,f), 'r')
                secondaryName = origName.readline().strip().split('_')[0]
                origName.close()
                #entitySample = transaction.getSampleForUpdate('/%s/%s' % (space,parentCode))
                sa.setPropertyValue('Q_SECONDARY_NAME', secondaryName)
                os.remove(os.path.realpath(os.path.join(incomingPath,f)))   
        
            elif f.endswith('sha256sum') or f.endswith('vcf'):
                pass
                #transaction.moveFile(os.path.join(incomingPath,f), dataSet)
            else:
                os.remove(os.path.realpath(os.path.join(incomingPath,f)))

        dataSet.setSample(vcSample)
        transaction.moveFile(incomingPath, dataSet)
Пример #25
0
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1

    # Get the name of the incoming file
    name = transaction.getIncoming().getName()

    identifier = pattern.findall(name)[0]
    if isExpected(identifier):
        experiment = identifier[1:5]
        project = identifier[:5]
        parentCode = identifier[:10]
    else:
        print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum"

    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    sc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.CODE, parentCode))
    foundSamples = search_service.searchForSamples(sc)

    parentSampleIdentifier = foundSamples[0].getSampleIdentifier()
    space = foundSamples[0].getSpace()
    sa = transaction.getSampleForUpdate(parentSampleIdentifier)
    # find or register new experiment
    expType = "Q_NGS_MAPPING"

    experiments = search_service.listExperiments("/" + space + "/" + project)
    experimentIDs = []
    for exp in experiments:
        experimentIDs.append(exp.getExperimentIdentifier())

    # no existing experiment for samples of this sample preparation found
    expID = experimentIDs[0]
    i = 0
    while expID in experimentIDs:
        i += 1
        expNum = len(experiments) + i
        expID = '/' + space + '/' + project + '/' + project + 'E' + str(expNum)

    #newMappingSample = transaction.createNewSample('/' + space + '/' + 'MP'+ parentCode, "Q_NGS_MAPPING")
    #newMappingSample.setParentSampleIdentifiers([sa.getSampleIdentifier()])
    #newMappingSample.setExperiment(mapExperiment)

    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, project))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
    allSamples = search_service.searchForSamples(sc)

    #existingSampleIDs = []

    ngsParents = []

    for samp in allSamples:
        #existingSampleIDs.append(samp.getSampleIdentifier())
        if samp.getSampleType() == "Q_NGS_SINGLE_SAMPLE_RUN":
            if sa.getSampleIdentifier() in samp.getParentSampleIdentifiers():
                ngsParents.append(samp.getSampleIdentifier())

    #replNumber = 1
    #if len(ngsParents > 1):
    mapSampleID = '/' + space + '/' + 'MP' + parentCode

    sc = SearchCriteria()
    sc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.CODE, mapSampleID))
    foundMapSample = search_service.searchForSamples(sc)
    #while newSampleID in existingSampleIDs:
    #        vcNumber += 1
    #        newSampleID = '/' + space + '/' + 'MP' + str(vcNumber) + parentCode
    if len(foundMapSample) == 0:
        mapExperiment = transaction.createNewExperiment(expID, expType)
        mapExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')
        mappingSample = transaction.createNewSample(mapSampleID,
                                                    "Q_NGS_MAPPING")
        mappingSample.setParentSampleIdentifiers(ngsParents)
        mappingSample.setExperiment(mapExperiment)
    else:
        mappingSample = transaction.getSampleForUpdate(
            foundMapSample[0].getSampleIdentifier())
    # create new dataset
    dataSet = transaction.createNewDataSet("Q_NGS_MAPPING_DATA")
    dataSet.setMeasuredData(False)
    dataSet.setSample(mappingSample)

    transaction.moveFile(incomingPath, dataSet)
Пример #26
0
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1

    # Get the name of the incoming file
    name = transaction.getIncoming().getName()

    # identifier = pattern.findall(name)[0]
    # if isExpected(identifier):
    #         project = identifier[:5]
    #         #parentCode = identifier[:10]
    # else:
    # print "The identifier "+identifier+" did not match the pattern
    # Q[A-Z]{4}\d{3}\w{2} or checksum"
    propertyMap = mangleFilenameForAttributes(name)

    # we'll get qbic code and patient id
    expID = propertyMap['expID']
    code = propertyMap['qbicID']
    projectCode = code[:5]
    patientID = propertyMap['patientID']
    timepoint = propertyMap['timepoint']
    modality = propertyMap['modality']
    tracer = propertyMap['tracer']
    tissue = propertyMap['tissue']
    timestamp = propertyMap['datestr']

    # print "look for: ", code

    search_service = transaction.getSearchService()
    sc = SearchCriteria()  # Find the patient according to code
    sc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.CODE, code))
    foundSamples = search_service.searchForSamples(sc)

    if not len(foundSamples) > 0:
        raise SampleNotFoundError('openBIS query of ' + code +
                                  ' failed. Please recheck your QBiC code!')

    # produces an IndexError if sample code does not exist (will check before)
    sampleIdentifier = foundSamples[0].getSampleIdentifier()

    space = foundSamples[0].getSpace()
    rootSample = transaction.getSampleForUpdate(sampleIdentifier)

    # print code, "was found in space", space, "as", sampleIdentifier

    # get or create MS-specific experiment/sample and
    # attach to the test sample
    expType = "Q_BMI_GENERIC_IMAGING"

    # load imaging experiments to append new data
    activeExperiment = None
    experiments = search_service.listExperiments("/" + space + "/" +
                                                 projectCode)
    experimentIDs = []
    fullExpIdentifier = '/' + space + '/' + projectCode + '/' + expID

    for exp in experiments:
        if exp.getExperimentType() == expType and exp.getExperimentIdentifier(
        ) == fullExpIdentifier:
            activeExperiment = exp

    # if expID is not found...
    if (activeExperiment == None):
        raise ExperimentNotFoundError('Experiment with ID ' + expID +
                                      ' could not be found! Check the ID.')

    sc = SearchCriteria()  # Find the patient according to code
    sc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.TYPE,
            "Q_BMI_GENERIC_IMAGING_RUN"))

    ec = SearchCriteria()

    ec.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.CODE, expID))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(ec))

    existingSamples = search_service.searchForSamples(sc)

    imagingSampleCode = modality + '-' + tracer + '-' + tissue + '-' + \
        patientID + '-' + timepoint + '-' + \
        str(len(existingSamples) + 1).zfill(3)

    # let's first check if such an imaging run was registered before
    sc = SearchCriteria()
    sc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.CODE, imagingSampleCode))
    foundSamples = search_service.searchForSamples(sc)

    if len(foundSamples) > 0:
        raise SampleAlreadyCreatedError(
            'Sample ' + imagingSampleCode +
            ' has been created already. Please re-check to avoid duplicates! Offending file: '
            + incomingPath)

    imagingSample = transaction.createNewSample(
        '/' + space + '/' + imagingSampleCode, "Q_BMI_GENERIC_IMAGING_RUN")
    imagingSample.setParentSampleIdentifiers(
        [rootSample.getSampleIdentifier()])
    imagingSample.setExperiment(activeExperiment)

    sampleLabel = modality + ' imaging (' + patientID + ', ' + timepoint + ')'
    imagingSample.setPropertyValue('Q_SECONDARY_NAME', sampleLabel)
    imagingSample.setPropertyValue('Q_TIMEPOINT', timepoint)

    if tissue == 'liver':
        imagingSample.setPropertyValue('Q_IMAGED_TISSUE', 'LIVER')
    elif tissue == 'tumor':
        imagingSample.setPropertyValue('Q_IMAGED_TISSUE',
                                       'HEPATOCELLULAR_CARCINOMA')

    openbisTimestamp = buildOpenBisTimestamp(timestamp)
    imagingSample.setPropertyValue('Q_MSHCC_IMAGING_DATE', openbisTimestamp)

    # create new dataset
    imagingDataset = transaction.createNewDataSet('Q_BMI_IMAGING_DATA')
    imagingDataset.setMeasuredData(False)
    imagingDataset.setSample(imagingSample)
    imagingDataset.setPropertyValue(
        'Q_SECONDARY_NAME',
        modality + ' data (' + patientID + ', ' + timepoint + ')')

    # disable hash computation for now... resulted in outOfMemory errors for some bigger files
    #incomingFileSha256Sum = hashlib.sha256(
    #    open(incomingPath, 'rb').read()).hexdigest()
    incomingFileSha256Sum = 'MISSING!'
    imagingDataset.setPropertyValue('Q_TARBALL_SHA256SUM',
                                    incomingFileSha256Sum)

    # finish the transaction
    transaction.moveFile(incomingPath, imagingDataset)
def find_and_register_ngs(transaction, jsonContent):
	qcValues = jsonContent["sample1"]["qc"]
	genome = jsonContent["sample1"]["genome"]
	idGenetics = jsonContent["sample1"]["id_genetics"]
	qbicBarcode = jsonContent["sample1"]["id_qbic"]
	system = jsonContent["sample1"]["processing_system"]
	tumor = jsonContent["sample1"]["tumor"]
	expType = jsonContent["type"]

    	project = qbicBarcode[:5]

	search_service = transaction.getSearchService()
    	sc = SearchCriteria()
    	pc = SearchCriteria()
    	pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project))
    	sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
	foundSamples = search_service.searchForSamples(sc)

    	datasetSample = None
    	sampleFound = False
	sampleIdent = None
	space = foundSamples[0].getSpace()
	testSampleCode = None

	knownCodes = []

    	for samp in foundSamples:
		qbicBarcodeID = '/' + samp.getSpace() + '/' + qbicBarcode
		knownCodes.append(samp.getCode())
		print "code: "+samp.getCode()
		if qbicBarcodeID in samp.getParentSampleIdentifiers() or qbicBarcode == samp.getCode():
			sampleType = samp.getSampleType()
	    		if sampleType == "Q_TEST_SAMPLE":
				print "searching: "+idGenetics.split('_')[0]
				print samp.getPropertyValue("Q_EXTERNALDB_ID")
				if (samp.getPropertyValue("Q_SAMPLE_TYPE") == typesDict[expType]) and ((samp.getPropertyValue("Q_SECONDARY_NAME") == idGenetics.split('_')[0]) or (samp.getPropertyValue("Q_EXTERNALDB_ID") == idGenetics.split('_')[0])):
	    				sampleIdent = samp.getSampleIdentifier()
					testSampleCode = samp.getCode()
					oldTestSamples[idGenetics] = sampleIdent
	if not sampleIdent:
		if not idGenetics in newTestSamples:
			for samp in foundSamples:
				if qbicBarcode == samp.getCode():
					testSampleCode = createNewBarcode(project, transaction)

					sampleIdent = '/' + space + '/' + testSampleCode
					testSample = transaction.createNewSample(sampleIdent, "Q_TEST_SAMPLE")
					testSample.setParentSampleIdentifiers([samp.getSampleIdentifier()])
					testSample.setPropertyValue('Q_SECONDARY_NAME', idGenetics.split('_')[0])
					testSample.setPropertyValue('Q_SAMPLE_TYPE', typesDict[expType])
					global numberOfExperiments
					numberOfExperiments += 1
					newTestSampleExperiment = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments), "Q_SAMPLE_PREPARATION")
					testSample.setExperiment(newTestSampleExperiment)
					newTestSamples[idGenetics] = sampleIdent
					
	for s in foundSamples:
		# There is already a registered NGS run
		if (s.getSampleType() == "Q_NGS_SINGLE_SAMPLE_RUN") and (sampleIdent in s.getParentSampleIdentifiers() and (s.getPropertyValue("Q_SECONDARY_NAME") in idGenetics)):
			sa = transaction.getSampleForUpdate(s.getSampleIdentifier())
	    		sa.setPropertyValue("Q_SECONDARY_NAME", idGenetics)
					
	    		datasetSample = sa
	    		sampleFound = True

	if not sampleFound:
		# register new experiment and sample
		numberOfExperiments += 1
		newNGSMeasurementExp = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_MEASUREMENT")
		newNGSMeasurementExp.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')
		newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_MODE', 'PAIRED_END')
		newNGSMeasurementExp.setPropertyValue('Q_SEQUENCER_DEVICE', 'IMGAG_ILLUMINA_HISEQ_2500')
		newNGSMeasurementExp.setPropertyValue('Q_ADDITIONAL_INFO', system)
		newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_TYPE', typesDict[expType])
		newNGSID = '/' + space + '/' + 'NGS'+ idGenetics.split('_')[-1] + testSampleCode
		newNGSrunSample = transaction.createNewSample(newNGSID, "Q_NGS_SINGLE_SAMPLE_RUN")
		newNGSrunSample.setParentSampleIdentifiers([sampleIdent])
		newNGSrunSample.setExperiment(newNGSMeasurementExp)

		newNGSSamples[idGenetics] = newNGSID

		additionalInfo = '%s: %s\n' % ("Genome", genome)

		for qc in qcValues:
			line = str(qc)
			additionalInfo += '%s\n' % line.replace('{', '').replace('}', '')

		newNGSrunSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo)
		newNGSrunSample.setPropertyValue('Q_SECONDARY_NAME', idGenetics)

		datasetSample = newNGSrunSample
	return datasetSample
Пример #28
0
def handle_BSA_Run(transaction):
    # Get the name of the incoming file
    name = transaction.getIncoming().getName()
    incomingPath = transaction.getIncoming().getAbsolutePath()

    stem, ext = os.path.splitext(name)

    # Convert the raw file and write it to an mzml tmp folder.
    # Sadly, I can not see a way to make this part of the transaction.
    tmpdir = tempfile.mkdtemp(dir=MZML_TMP)
    try:
        convert = partial(convert_raw,
                  remote_base=REMOTE_BASE,
                  host=MSCONVERT_HOST,
                  timeout=CONVERSION_TIMEOUT,
                  user=MSCONVERT_USER)
        if ext.lower() in VENDOR_FORMAT_EXTENSIONS:
            openbis_format_code = VENDOR_FORMAT_EXTENSIONS[ext.lower()]
        else:
            raise ValueError("Invalid incoming file %s" % incomingPath)

        mzml_path = os.path.join(tmpdir, stem + '.mzML')
        raw_path = os.path.join(incomingPath, name)
        convert(raw_path, mzml_path)

        mzml_name = os.path.basename(mzml_path)
        mzml_dest = os.path.join(DROPBOX_PATH, mzml_name)

        os.rename(mzml_path, mzml_dest)
    finally:
        shutil.rmtree(tmpdir)

    # The MS experiment
    msExp = transaction.getExperiment(BSA_MPC_EXPERIMENT_ID)

    #TODO create new ms sample? if so, use normal qbic barcodes?
    msCode = "MS"+BSA_MPC_BARCODE

    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, BSA_MPC_PROJECT));
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))

    foundSamples = search_service.searchForSamples(sc)

    run = 1
    for samp in foundSamples:
        if samp.getSampleType() == "Q_MS_RUN":
            existingRun = int(samp.getCode().split("_")[-1])
            if existingRun >= run:
                run = existingRun + 1

    msSample = transaction.createNewSample('/' + BSA_MPC_SPACE + '/' + msCode + "_" + str(run), "Q_MS_RUN")
    #set parent sample, always the same for bsa run
    msSample.setParentSampleIdentifiers([BSA_MPC_SAMPLE_ID])
    msSample.setExperiment(msExp)

    createRawDataSet(transaction, raw_path, msSample, openbis_format_code)
    GZipAndMoveMZMLDataSet(transaction, mzml_dest, msSample)

    for f in os.listdir(incomingPath):
        if ".testorig" in f:
            os.remove(os.path.realpath(os.path.join(incomingPath, f)))
Пример #29
0
def find_and_register_vcf(transaction, jsonContent, varcode):#varcode example: GS130715_03-GS130717_03 (verified in startup.log)
    qbicBarcodes = []
    geneticIDS = []
    sampleSource = []

    varcodekey = ''

    for key in jsonContent.keys():
        if key == "type" or key == "files":
            pass
        else:#keys: "sample1" and "sample2"
            geneticIDS.append(jsonContent[key]["id_genetics"])#GS130715_03 and GS130717_03
            qbicBarcodes.append(jsonContent[key]["id_qbic"])
            sampleSource.append(jsonContent[key]["tumor"])
            if jsonContent[key]["id_genetics"] == varcode:
                varcodekey = key


    expType = jsonContent["type"]
    project = qbicBarcodes[0][:5]
    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project));
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))

    foundSamples = search_service.searchForSamples(sc)
    space = foundSamples[0].getSpace()

    datasetSample = None
    sampleFound = False

    parentIdentifiers = []
    testParentIdentifiers = []


    global numberOfExperiments
    additionalInfo = ''
    secName = ''

    if len(geneticIDS) >= 2:
        somaticIdent = '%s-%s' % (geneticIDS[0], geneticIDS[1]) # if there is more than one sample we have to concatenate the identifiers
        secName = somaticIdent
        if somaticIdent == varcode:
            for i, parentBarcode in enumerate(qbicBarcodes):
                additionalInfo += '%s %s Tumor: %s \n' % (qbicBarcodes[i], geneticIDS[i], sampleSource[i])
            for barcode, geneticID in zip(qbicBarcodes, geneticIDS):
                genShortID = geneticID.split('_')[0]
                if geneticID in newNGSSamples:
                    parentIdentifiers.append(newNGSSamples[geneticID])
                    testParentIdentifiers.append(oldTestSamples[geneticID])
                else:
                    for samp in foundSamples:
                        #some short variables to clean up the long if case
                        code = samp.getCode()
                        sType = samp.getSampleType()
                        qbicBarcodeID = '/' + space + '/' + barcode # qbic identifier from the metadata that came in (probably tissue sample)
                        parentIDs = samp.getParentSampleIdentifiers()
                        analyte = samp.getPropertyValue("Q_SAMPLE_TYPE")
                        curSecName = samp.getPropertyValue("Q_SECONDARY_NAME")
                        extID = samp.getPropertyValue("Q_EXTERNALDB_ID")
                        # we are looking for either the test sample with this barcode OR a test sample with parent with this barcode, the right analyte (e.g. DNA) and the short genetics ID in secondary name or external ID
                        print code
                        print qbicBarcodeID in parentIDs
                        print analyte == typesDict[expType]
                        print (curSecName != None) and (genShortID in curSecName)
                        print (extID != None) and (genShortID in extID)
                        if ((barcode == code) and (sType == "Q_TEST_SAMPLE")) or ((qbicBarcodeID in parentIDs) and (analyte == typesDict[expType]) and (((curSecName != None) and (genShortID in curSecName)) or ((extID != None) and (genShortID in extID)))):
                            testParentID = samp.getSampleIdentifier()
                            # this time we are looking for the NGS Single Sample run attached to the test sample we just found
                            for s in foundSamples:
                                sampleType = s.getSampleType()
                                curSecName = s.getPropertyValue("Q_SECONDARY_NAME")
                                extDB = s.getPropertyValue("Q_EXTERNALDB_ID")
                                if (testParentID in s.getParentSampleIdentifiers()) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (((curSecName != None) and (geneticID in curSecName)) or ((extDB != None) and (geneticID in extDB))):
                                    sampleIdent = s.getSampleIdentifier()
                                    parentIdentifiers.append(sampleIdent)
                                    testParentIdentifiers.append(testParentID) # if we found the right one, we append it for later, as every related test sample is needed for registration
    else:
        geneticID = varcode
        genShortID = geneticID.split('_')[0]
        barcode = jsonContent[varcodekey]["id_qbic"]
        additionalInfo = '%s %s Tumor: %s \n' % (barcode, geneticID, jsonContent[varcodekey]["tumor"])
        secName += '%s ' % geneticID
        if geneticID in newNGSSamples:
            parentIdentifiers.append(newNGSSamples[geneticID])
            testParentIdentifiers.append(oldTestSamples[geneticID])
        else:
            for samp in foundSamples:
                #some short variables to clean up the long if case
                code = samp.getCode()
                sType = samp.getSampleType()
                qbicBarcodeID = '/' + space + '/' + barcode # qbic identifier from the metadata that came in (probably tissue sample)
                parentIDs = samp.getParentSampleIdentifiers()
                analyte = samp.getPropertyValue("Q_SAMPLE_TYPE")
                curSecName = samp.getPropertyValue("Q_SECONDARY_NAME")
                extID = samp.getPropertyValue("Q_EXTERNALDB_ID")
                # we are looking for either the test sample with this barcode OR a test sample with parent with this barcode, the right analyte (e.g. DNA) and the short genetics ID in secondary name or external ID
                if ((barcode == code) and (sType == "Q_TEST_SAMPLE")) or ((qbicBarcodeID in parentIDs) and (analyte == typesDict[expType]) and ((genShortID in curSecName) or (genShortID in extID))):
                    testParentID = samp.getSampleIdentifier()
                    for s in foundSamples:
                        sampleType = s.getSampleType()
                        curSecName = s.getPropertyValue("Q_SECONDARY_NAME")
                        extDB = s.getPropertyValue("Q_EXTERNALDB_ID")
                        if (testParentID in s.getParentSampleIdentifiers()) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (((curSecName != None) and (geneticID in curSecName)) or ((extDB != None) and (geneticID in extDB))):
                            sampleIdent = s.getSampleIdentifier()
                            parentIdentifiers.append(sampleIdent)
                            testParentIdentifiers.append(testParentID)
    numberOfExperiments += 1
    existingExperimentIDs = []
    existingExperiments = search_service.listExperiments("/" + space + "/" + project)
    
    for eexp in existingExperiments:
        existingExperimentIDs.append(eexp.getExperimentIdentifier())

    newExpID = '/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments)

    while newExpID in existingExperimentIDs:
        numberOfExperiments += 1 
        newExpID = '/' + space + '/' + project + '/' + project + 'E' +str(numberOfExperiments)
        
    newVCExp = transaction.createNewExperiment(newExpID, "Q_NGS_VARIANT_CALLING")
    identString = varcode # not used atm
    #for genID in geneticIDS:
    #	identString += genID.split('_')[-1]

    identString2 = ''
    for tpi in testParentIdentifiers:
        identString2 += '_'+tpi.split('/')[-1]

    #newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ project + qbicBarcodes[0][5:] + qbicBarcodes[1][5:] + identString, "Q_NGS_VARIANT_CALLING")
    newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ identString2, "Q_NGS_VARIANT_CALLING")
    newVCSample.setParentSampleIdentifiers(parentIdentifiers)
    newVCSample.setExperiment(newVCExp)

    #additionalInfo = ""
    #secName = ""
    #for i, parentBarcode in enumerate(qbicBarcodes):
#		additionalInfo += '%s %s Tumor: %s \n' % (qbicBarcodes[i], geneticIDS[i], sampleSource[i])
#		secName += '%s ' % (geneticIDS[i])

    secName = secName.strip()
    #additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1], geneticIDS[1], sampleSource[1])

    newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo)
    #secName = '%s-%s' % (geneticIDS[0], geneticIDS[1])
    newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName)

    datasetSample = newVCSample
    return datasetSample
Пример #30
0
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1

    # Get the name of the incoming file
    name = transaction.getIncoming().getName()

    identifier = pattern.findall(name)[0]
    if isExpected(identifier):
        experiment = identifier[1:5]
        project = identifier[:5]
        parentCode = identifier[:10]
    else:
        print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum"

    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    sc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.CODE, parentCode))
    foundSamples = search_service.searchForSamples(sc)
    if len(foundSamples) > 0:
        parentSampleIdentifier = foundSamples[0].getSampleIdentifier()
        space = foundSamples[0].getSpace()
    else:
        search_service = transaction.getSearchService()
        sc = SearchCriteria()
        pc = SearchCriteria()
        pc.addMatchClause(
            SearchCriteria.MatchClause.createAttributeMatch(
                SearchCriteria.MatchClauseAttribute.PROJECT, project))
        sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
        foundSamples = search_service.searchForSamples(sc)
        if len(foundSamples) > 0:
            space = foundSamples[0].getSpace()
            parentSampleIdentifier = "/" + space + "/" + parentCode
        else:
            # no sample found in this project, they are probably not indexed yet. try parsing space from file name instead
            space = name.split("_")[0]
            parentSampleIdentifier = "/" + space + "/" + parentCode
    sa = transaction.getSampleForUpdate(parentSampleIdentifier)

    # register new experiment and sample
    existingExperimentIDs = []
    existingExperiments = search_service.listExperiments("/" + space + "/" +
                                                         project)

    numberOfExperiments = len(
        search_service.listExperiments("/" + space + "/" + project)) + 1

    for eexp in existingExperiments:
        existingExperimentIDs.append(eexp.getExperimentIdentifier())

    newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
        numberOfExperiments)

    while newExpID in existingExperimentIDs:
        numberOfExperiments += 1
        newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
            numberOfExperiments)

    newHLATypingExperiment = transaction.createNewExperiment(
        newExpID, "Q_NGS_HLATYPING")
    newHLATypingExperiment.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')

    if os.path.isdir(incomingPath):
        for root, subFolders, files in os.walk(incomingPath):
            if subFolders:
                subFolder = subFolders[0]
            for f in files:
                if f.endswith('.alleles'):
                    resultPath = os.path.join(root, f)
                    resultFile = open(resultPath, 'r')
    else:
        resultPath = incomingPath
        resultFile = open(resultPath, 'r')
    resultContent = resultFile.read()

    mhcClass = "MHC_CLASS_II"
    mhcSuffix = "2"
    # check for MHC class
    if 'A*' in resultContent:
        mhcClass = "MHC_CLASS_I"
        mhcSuffix = "1"
    # does HLA sample of this class already exist?
    hlaCode = 'HLA' + mhcSuffix + parentCode
    sc = SearchCriteria()
    sc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.CODE, hlaCode))
    foundSamples = search_service.searchForSamples(sc)
    if len(foundSamples) < 1:
        newHLATypingSample = transaction.createNewSample(
            '/' + space + '/' + hlaCode, "Q_NGS_HLATYPING")
        newHLATypingSample.setParentSampleIdentifiers(
            [sa.getSampleIdentifier()])
        newHLATypingSample.setExperiment(newHLATypingExperiment)
        newHLATypingSample.setPropertyValue("Q_HLA_CLASS", mhcClass)
    else:
        newHLATypingSample = transaction.getSampleForUpdate(
            foundSamples[0].getSampleIdentifier())

    newHLATypingSample.setPropertyValue("Q_HLA_TYPING", resultContent)

    # create new dataset
    dataSet = transaction.createNewDataSet("Q_NGS_HLATYPING_DATA")
    dataSet.setMeasuredData(False)
    dataSet.setSample(newHLATypingSample)

    transaction.moveFile(resultPath, dataSet)
Пример #31
0
def find_and_register_ngs(transaction, jsonContent):
    if "qc" in jsonContent["sample1"]:
        qcValues = jsonContent["sample1"]["qc"]
    else:
        qcValues = []
    genome = jsonContent["sample1"]["genome"]
    idGenetics = jsonContent["sample1"]["id_genetics"]
    qbicBarcode = jsonContent["sample1"]["id_qbic"]
    system = jsonContent["sample1"]["processing_system"]
    tumor = jsonContent["sample1"]["tumor"]
    expType = jsonContent["type"]

    project = qbicBarcode[:5]

    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, project))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
    foundSamples = search_service.searchForSamples(sc)

    datasetSample = None
    sampleFound = False
    sampleIdent = None
    space = foundSamples[0].getSpace()
    testSampleCode = None

    knownCodes = []

    for samp in foundSamples:
        qbicBarcodeID = '/' + samp.getSpace() + '/' + qbicBarcode
        knownCodes.append(samp.getCode())
        #if qbicBarcodeID in samp.getParentSampleIdentifiers() or qbicBarcode == samp.getCode():
        sampleType = samp.getSampleType()

        code = samp.getCode()
        sType = samp.getSampleType()
        parentIDs = samp.getParentSampleIdentifiers()
        analyte = samp.getPropertyValue("Q_SAMPLE_TYPE")
        curSecName = samp.getPropertyValue("Q_SECONDARY_NAME")
        extID = samp.getPropertyValue("Q_EXTERNALDB_ID")
        genShortID = idGenetics.split('_')[0]

        # we are looking for either the test sample with this barcode
        isTestSampleWithBarcode = (qbicBarcode == code) and (
            sType != None) and (sType == "Q_TEST_SAMPLE")
        # OR a test sample with parent with this barcode
        correctParent = qbicBarcodeID in parentIDs
        # AND the right analyte (e.g. DNA)
        correctAnalyte = (analyte != None) and (analyte == typesDict[expType])
        # AND and the short genetics ID in secondary name OR external ID
        hasGeneticsID = (curSecName != None and genShortID in curSecName) or (
            extID != None and genShortID in extID)
        if isTestSampleWithBarcode or (correctParent and correctAnalyte
                                       and hasGeneticsID):
            sampleIdent = samp.getSampleIdentifier()
            testSampleCode = samp.getCode()
            oldTestSamples[idGenetics] = sampleIdent

    if not sampleIdent:
        if not idGenetics in newTestSamples:
            for samp in foundSamples:
                if qbicBarcode == samp.getCode():
                    testSampleCode = createNewBarcode(project, transaction)

                    sampleIdent = '/' + space + '/' + testSampleCode
                    testSample = transaction.createNewSample(
                        sampleIdent, "Q_TEST_SAMPLE")
                    testSample.setParentSampleIdentifiers(
                        [samp.getSampleIdentifier()])
                    testSample.setPropertyValue('Q_SECONDARY_NAME',
                                                idGenetics.split('_')[0])
                    testSample.setPropertyValue('Q_SAMPLE_TYPE',
                                                typesDict[expType])
                    global numberOfExperiments

                    numberOfExperiments += 1
                    existingExperimentIDs = []
                    existingExperiments = search_service.listExperiments(
                        "/" + space + "/" + project)

                    for eexp in existingExperiments:
                        existingExperimentIDs.append(
                            eexp.getExperimentIdentifier())

                    newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
                        numberOfExperiments)

                    while newExpID in existingExperimentIDs:
                        numberOfExperiments += 1
                        newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
                            numberOfExperiments)

                    newTestSampleExperiment = transaction.createNewExperiment(
                        newExpID, "Q_SAMPLE_PREPARATION")
                    testSample.setExperiment(newTestSampleExperiment)
                    newTestSamples[idGenetics] = sampleIdent

    for s in foundSamples:
        # There is already a registered NGS run
        if ((s.getSampleType() == "Q_NGS_SINGLE_SAMPLE_RUN")
                and (sampleIdent in s.getParentSampleIdentifiers())
                and (s.getPropertyValue("Q_SECONDARY_NAME") == idGenetics)):
            sa = transaction.getSampleForUpdate(s.getSampleIdentifier())
            sa.setPropertyValue("Q_SECONDARY_NAME", idGenetics)

            datasetSample = sa
            sampleFound = False  # TODO this negates this block, it should be true ONLY IF the found sample has no data attached (for each new ngs run a new sample is created)

    if not sampleFound:
        # register new experiment and sample
        numberOfExperiments += 1
        existingExperimentIDs = []
        existingExperiments = search_service.listExperiments("/" + space +
                                                             "/" + project)

        for eexp in existingExperiments:
            existingExperimentIDs.append(eexp.getExperimentIdentifier())

        newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
            numberOfExperiments)

        while newExpID in existingExperimentIDs:
            numberOfExperiments += 1
            newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
                numberOfExperiments)

        newNGSMeasurementExp = transaction.createNewExperiment(
            newExpID, "Q_NGS_MEASUREMENT")
        newNGSMeasurementExp.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')
        newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_MODE',
                                              'PAIRED_END')
        newNGSMeasurementExp.setPropertyValue('Q_SEQUENCER_DEVICE',
                                              'IMGAG_ILLUMINA_HISEQ_2500')
        newNGSMeasurementExp.setPropertyValue('Q_ADDITIONAL_INFO', system)
        newNGSMeasurementExp.setPropertyValue('Q_SEQUENCING_TYPE',
                                              typesDict[expType])
        newNGSID = '/' + space + '/' + 'NGS' + idGenetics.split(
            '_')[-1] + testSampleCode

        freeID = "01"  #idGenetics.split('_')[-1]
        existingSampleIDs = []

        for s in foundSamples:
            existingSampleIDs.append(s.getSampleIdentifier())

        found = False
        while newNGSID in existingSampleIDs or found:
            existingSampleIDs.append(newNGSID)
            freeID = str(int(freeID) + 1).zfill(len(freeID))
            newNGSID = '/' + space + '/' + 'NGS' + freeID + testSampleCode
            found = transaction.getSampleForUpdate(newNGSID)

        existingSampleIDs.append(newNGSID)
        newNGSrunSample = transaction.createNewSample(
            newNGSID, "Q_NGS_SINGLE_SAMPLE_RUN")
        newNGSrunSample.setParentSampleIdentifiers([sampleIdent])
        newNGSrunSample.setExperiment(newNGSMeasurementExp)

        newNGSSamples[idGenetics] = newNGSID

        additionalInfo = '%s: %s\n' % ("Genome", genome)

        for qc in qcValues:
            line = str(qc)
            additionalInfo += '%s\n' % line.replace('{', '').replace('}', '')

        newNGSrunSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo)
        newNGSrunSample.setPropertyValue('Q_SECONDARY_NAME', idGenetics)

        datasetSample = newNGSrunSample
    return datasetSample
Пример #32
0
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1

    # Get the name of the incoming folder
    name = transaction.getIncoming().getName()

    identifier = pattern.findall(name)[0]
    if isExpected(identifier):
        pass
    #experiment = identifier[1:5]
    #parentCode = identifier[:10]
    else:
        print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum"

    project = identifier[:5]
    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, project))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
    foundSamples = search_service.searchForSamples(sc)
    space = foundSamples[0].getSpace()
    global numberOfExperiments
    numberOfExperiments = len(
        search_service.listExperiments("/" + space + "/" + project))

    src = os.path.realpath(os.path.join(incomingPath, 'source_dropbox.txt'))
    numberOfExperiments = len(
        search_service.listExperiments("/" + space + "/" + project))

    src = os.path.realpath(os.path.join(incomingPath, 'source_dropbox.txt'))
    if os.path.isfile(src):
        os.remove(src)

    print "start registration"
    #dataSet = None
    for f in os.listdir(os.path.join(incomingPath, name)):
        if f.endswith('metadata'):
            jsonContent = parse_metadata_file(
                os.path.realpath(
                    os.path.join(os.path.join(incomingPath, name), f)))
            rawFiles = jsonContent["files"]
            vcfs = []
            fastqs = []
            gsvars = []
            print "metadata read"
            for rawFile in rawFiles:
                print rawFile
                if rawFile.endswith("vcf") or rawFile.endswith("vcf.gz"):
                    vcfs.append(rawFile)
                if rawFile.endswith("fastq") or rawFile.endswith("fastq.gz"):
                    fastqs.append(rawFile)
                if rawFile.endswith("GSvar") or rawFile.endswith("GSvar.gz"):
                    gsvars.append(rawFile)

            #if rawFiles[0].endswith("vcf") or rawFiles[0].endswith("vcf.gz"):
            #	datasetSample = find_and_register_vcf(transaction, jsonContent)
            #
            #	dataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA")
            #	dataSet.setSample(datasetSample)

            #elif rawFiles[0].endswith("fastq") or rawFiles[0].endswith("fastq.gz"):
            #	datasetSample = find_and_register_ngs(transaction, jsonContent)

            #	dataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA")
            #	dataSet.setSample(datasetSample)

            #os.remove(os.path.realpath(os.path.join(os.path.join(incomingPath,name),f)))
        else:
            pass
    folder = os.path.join(incomingPath, name)
    if len(fastqs) > 0:
        fastqSample = find_and_register_ngs(transaction, jsonContent)
        fastqDataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA")
        fastqDataSet.setSample(fastqSample)
        fastqFolder = os.path.join(folder, "raw")
        os.mkdir(fastqFolder)
        for f in fastqs:
            os.rename(os.path.join(folder, f), os.path.join(fastqFolder, f))
        transaction.moveFile(fastqFolder, fastqDataSet)
    for vc in vcfs:
        vcfSample = find_and_register_vcf(transaction, jsonContent)
        vcfDataSet = transaction.createNewDataSet("Q_NGS_VARIANT_CALLING_DATA")
        vcfDataSet.setSample(vcfSample)
        vcfFolder = os.path.join(folder, "vcf")
        os.mkdir(vcfFolder)
        for f in vcfs:
            os.rename(os.path.join(folder, f), os.path.join(vcfFolder, f))
            for g in gsvars:
                if (f.split('.')[0].replace('_vc_strelka',
                                            '') == g.split('.')[0]):
                    os.rename(os.path.join(folder, g),
                              os.path.join(vcfFolder, g))
        transaction.moveFile(vcfFolder, vcfDataSet)
Пример #33
0
def find_and_register_vcf(
    transaction, jsonContent, varcode
):  #varcode example: GS130715_03-GS130717_03 (verified in startup.log)
    qbicBarcodes = []
    geneticIDS = []
    sampleSource = []

    varcodekey = ''

    for key in jsonContent.keys():
        if key == "type" or key == "files":
            pass
        else:  #keys: "sample1" and "sample2"
            geneticIDS.append(
                jsonContent[key]["id_genetics"])  #GS130715_03 and GS130717_03
            qbicBarcodes.append(jsonContent[key]["id_qbic"])
            sampleSource.append(jsonContent[key]["tumor"])
            if jsonContent[key]["id_genetics"] == varcode:
                varcodekey = key

    # if a folder has to be registered containing somatic variant calls and germline calls
    if '-' not in varcode:
        geneticIDS = [varcode]

    expType = jsonContent["type"]
    project = qbicBarcodes[0][:5]
    search_service = transaction.getSearchService()

    sc = SearchCriteria()
    pc = SearchCriteria()
    pc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.PROJECT, project))
    sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))

    foundSamples = search_service.searchForSamples(sc)
    space = get_space_from_project(transaction, project)

    datasetSample = None
    sampleFound = False

    parentIdentifiers = []
    testParentIdentifiers = []

    global numberOfExperiments
    additionalInfo = ''
    secName = ''

    if len(geneticIDS) >= 2:
        somaticIdent = '%s-%s' % (
            geneticIDS[0], geneticIDS[1]
        )  # if there is more than one sample we have to concatenate the identifiers
        secName = somaticIdent
        if somaticIdent == varcode:
            for i, parentBarcode in enumerate(qbicBarcodes):
                additionalInfo += '%s %s Tumor: %s \n' % (
                    qbicBarcodes[i], geneticIDS[i], sampleSource[i])
            for barcode, geneticID in zip(qbicBarcodes, geneticIDS):
                genShortID = geneticID.split('_')[0]
                if geneticID in newNGSSamples:
                    parentIdentifiers.append(newNGSSamples[geneticID])
                    testParentIdentifiers.append(oldTestSamples[geneticID])
                else:
                    for samp in foundSamples:
                        #some short variables to clean up the long if case
                        code = samp.getCode()
                        sType = samp.getSampleType()
                        qbicBarcodeID = '/' + space + '/' + barcode  # qbic identifier from the metadata that came in (probably tissue sample)
                        parentIDs = samp.getParentSampleIdentifiers()
                        analyte = samp.getPropertyValue("Q_SAMPLE_TYPE")
                        curSecName = samp.getPropertyValue("Q_SECONDARY_NAME")
                        extID = samp.getPropertyValue("Q_EXTERNALDB_ID")
                        # we are looking for either the test sample with this barcode OR a test sample with parent with this barcode, the right analyte (e.g. DNA) and the short genetics ID in secondary name or external ID

                        if ((barcode == code) and
                            (sType == "Q_TEST_SAMPLE")) or (
                                (qbicBarcodeID in parentIDs) and
                                (analyte == typesDict[expType]) and
                                (((curSecName != None) and
                                  (genShortID in curSecName)) or
                                 ((extID != None) and (genShortID in extID)))):
                            testParentID = samp.getSampleIdentifier()
                            print(testParentID)
                            # this time we are looking for the NGS Single Sample run attached to the test sample we just found
                            for s in foundSamples:
                                new_code = s.getCode()
                                sampleType = s.getSampleType()
                                curSecName = s.getPropertyValue(
                                    "Q_SECONDARY_NAME")
                                extDB = s.getPropertyValue("Q_EXTERNALDB_ID")

                                if (testParentID
                                        in s.getParentSampleIdentifiers()
                                    ) and (sampleType
                                           == "Q_NGS_SINGLE_SAMPLE_RUN") and (
                                               ((curSecName != None) and
                                                (geneticID in curSecName)) or
                                               ((extDB != None) and
                                                (geneticID in extDB))):
                                    sampleIdent = s.getSampleIdentifier()
                                    parentIdentifiers.append(sampleIdent)
                                    testParentIdentifiers.append(
                                        testParentID
                                    )  # if we found the right one, we append it for later, as every related test sample is needed for registration
                                    print('FOUND IT')
                                    print(sampleIdent)
                                    print(testParentID)
                                    print(testParentIdentifiers)
    else:
        geneticID = varcode
        genShortID = geneticID.split('_')[0]
        barcode = jsonContent[varcodekey]["id_qbic"]
        additionalInfo = '%s %s Tumor: %s \n' % (
            barcode, geneticID, jsonContent[varcodekey]["tumor"])
        secName += '%s ' % geneticID
        if geneticID in newNGSSamples:
            parentIdentifiers.append(newNGSSamples[geneticID])
            testParentIdentifiers.append(oldTestSamples[geneticID])
        else:
            print('I am scanning for samples now')
            for barcode in qbicBarcodes:
                print(barcode + "is in " + str(qbicBarcodes))
                for samp in foundSamples:
                    #some short variables to clean up the long if case
                    code = samp.getCode()
                    sType = samp.getSampleType()
                    qbicBarcodeID = '/' + space + '/' + barcode  # qbic identifier from the metadata that came in (probably tissue sample)
                    parentIDs = samp.getParentSampleIdentifiers()
                    analyte = samp.getPropertyValue("Q_SAMPLE_TYPE")
                    curSecName = samp.getPropertyValue("Q_SECONDARY_NAME")
                    extID = samp.getPropertyValue("Q_EXTERNALDB_ID")

                    # we are looking for either the test sample with this barcode OR a test sample with parent with this barcode, the right analyte (e.g. DNA) and the short genetics ID in secondary name or external ID
                    if ((barcode == code) and (sType == "Q_TEST_SAMPLE")) or (
                        (qbicBarcodeID in parentIDs) and
                        (analyte == typesDict[expType]) and
                        (((curSecName != None) and
                          (genShortID in curSecName)) or
                         ((extID != None) and (genShortID in extID)))):
                        testParentID = samp.getSampleIdentifier()
                        for s in foundSamples:
                            new_code = s.getCode()
                            sampleType = s.getSampleType()
                            curSecName = s.getPropertyValue("Q_SECONDARY_NAME")
                            extDB = s.getPropertyValue("Q_EXTERNALDB_ID")

                            if (testParentID in s.getParentSampleIdentifiers()
                                ) and (sampleType
                                       == "Q_NGS_SINGLE_SAMPLE_RUN") and (
                                           ((curSecName != None) and
                                            (geneticID in curSecName)) or
                                           ((extDB != None) and
                                            (geneticID in extDB))):
                                sampleIdent = s.getSampleIdentifier()
                                parentIdentifiers.append(sampleIdent)
                                testParentIdentifiers.append(testParentID)
                                print('FOUND IT')
                                print(sampleIdent)
                                print(testParentID)
                                print(testParentIdentifiers)

    numberOfExperiments += 1
    existingExperimentIDs = []
    existingExperiments = search_service.listExperiments("/" + space + "/" +
                                                         project)

    for eexp in existingExperiments:
        existingExperimentIDs.append(eexp.getExperimentIdentifier())

    newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
        numberOfExperiments)

    while newExpID in existingExperimentIDs:
        numberOfExperiments += 1
        newExpID = '/' + space + '/' + project + '/' + project + 'E' + str(
            numberOfExperiments)

    newVCExp = transaction.createNewExperiment(newExpID,
                                               "Q_NGS_VARIANT_CALLING")
    identString = varcode  # not used atm
    #for genID in geneticIDS:
    #	identString += genID.split('_')[-1]

    identString2 = ''
    print(testParentIdentifiers)
    identString2 = '_'.join(
        [tpi.split('/')[-1] for tpi in testParentIdentifiers])

    print('identstring ' + identString2)

    existingSampleIDs = []

    for s in foundSamples:
        existingSampleIDs.append(s.getSampleIdentifier())

    found = False
    freeID = "01"  #varcode.split('_')[-1]""
    newVCFID = '/' + space + '/' + 'VC' + freeID + identString2
    while newVCFID in existingSampleIDs or found:
        existingSampleIDs.append(newVCFID)
        freeID = str(int(freeID) + 1).zfill(len(freeID))
        print('new id test: ' + newVCFID)
        newVCFID = '/' + space + '/' + 'VC' + freeID + identString2
        found = transaction.getSampleForUpdate(newVCFID)

    newVCSample = transaction.createNewSample(newVCFID,
                                              "Q_NGS_VARIANT_CALLING")
    newVCSample.setParentSampleIdentifiers(parentIdentifiers)
    newVCSample.setExperiment(newVCExp)

    #additionalInfo = ""
    #secName = ""
    #for i, parentBarcode in enumerate(qbicBarcodes):
    #		additionalInfo += '%s %s Tumor: %s \n' % (qbicBarcodes[i], geneticIDS[i], sampleSource[i])
    #		secName += '%s ' % (geneticIDS[i])

    secName = secName.strip()
    #additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1], geneticIDS[1], sampleSource[1])

    newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo)
    #secName = '%s-%s' % (geneticIDS[0], geneticIDS[1])
    newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName)

    datasetSample = newVCSample
    return datasetSample
Пример #34
0
def process(transaction):
    context = transaction.getRegistrationContext().getPersistentMap()

    # Get the incoming path of the transaction
    incomingPath = transaction.getIncoming().getAbsolutePath()

    key = context.get("RETRY_COUNT")
    if (key == None):
        key = 1

    # Get the name of the incoming file
    name = transaction.getIncoming().getName()

    identifier = pattern.findall(name)[0]
    #identifier = name
    if isExpected(identifier):
        project = identifier[:5]
    else:
        print "The identifier " + identifier + " did not match the pattern Q[A-Z]{4}\d{3}\w{2} or checksum"

    search_service = transaction.getSearchService()
    sc = SearchCriteria()
    sc.addMatchClause(
        SearchCriteria.MatchClause.createAttributeMatch(
            SearchCriteria.MatchClauseAttribute.CODE, identifier))
    foundSamples = search_service.searchForSamples(sc)

    sampleIdentifier = foundSamples[0].getSampleIdentifier()
    space = foundSamples[0].getSpace()
    sa = transaction.getSampleForUpdate(sampleIdentifier)

    sampleType = "Q_NGS_SINGLE_SAMPLE_RUN"
    if sa.getSampleType() != sampleType:
        sc = SearchCriteria()
        sc.addMatchClause(
            SearchCriteria.MatchClause.createAttributeMatch(
                SearchCriteria.MatchClauseAttribute.CODE, "NGS" + identifier))
        foundSamples = search_service.searchForSamples(sc)
        if len(foundSamples) > 0:
            sampleIdentifier = foundSamples[0].getSampleIdentifier()
        else:
            search_service = transaction.getSearchService()
            sc = SearchCriteria()
            pc = SearchCriteria()
            pc.addMatchClause(
                SearchCriteria.MatchClause.createAttributeMatch(
                    SearchCriteria.MatchClauseAttribute.PROJECT, project))
            sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))
            foundSamples = search_service.searchForSamples(sc)
            space = foundSamples[0].getSpace()
            sampleIdentifier = "/" + space + "/" + "NGS" + identifier
        if transaction.getSampleForUpdate(sampleIdentifier):
            sa = transaction.getSampleForUpdate(sampleIdentifier)
        else:
            # create NGS-specific experiment/sample and
            # attach to the test sample
            expType = "Q_NGS_MEASUREMENT"
            ngsExperiment = None
            experiments = search_service.listExperiments("/" + space + "/" +
                                                         project)
            experimentIDs = []
            for exp in experiments:
                experimentIDs.append(exp.getExperimentIdentifier())
            expID = experimentIDs[0]
            i = 0
            while expID in experimentIDs:
                i += 1
                expNum = len(experiments) + i
                expID = '/' + space + '/' + project + \
                    '/' + project + 'E' + str(expNum)
            ngsExperiment = transaction.createNewExperiment(expID, expType)
            ngsExperiment.setPropertyValue(
                'Q_SEQUENCER_DEVICE',
                "UNSPECIFIED_ILLUMINA_HISEQ_2500")  #change this
            newID = 'NGS' + identifier
            ngsSample = transaction.createNewSample('/' + space + '/' + newID,
                                                    sampleType)
            ngsSample.setParentSampleIdentifiers([sa.getSampleIdentifier()])
            ngsSample.setExperiment(ngsExperiment)
            sa = ngsSample
    # create new dataset
    dataSet = transaction.createNewDataSet("Q_NGS_RAW_DATA")
    dataSet.setMeasuredData(False)
    dataSet.setSample(sa)

    for f in os.listdir(incomingPath):
        if ".testorig" in f:
            os.remove(os.path.realpath(os.path.join(incomingPath, f)))
        if ".origlabfilename" in f:
            nameFile = open(os.path.join(incomingPath, f))
            origName = nameFile.readline().strip()
            nameFile.close()
    transaction.moveFile(incomingPath, dataSet)
Пример #35
0
def find_and_register_vcf(transaction, jsonContent):
	qbicBarcodes = []
	geneticIDS = []
	sampleSource = []
	for key in jsonContent.keys():
		if key == "type" or key == "files":
			pass
		else:
			geneticIDS.append(jsonContent[key]["id_genetics"])
			qbicBarcodes.append(jsonContent[key]["id_qbic"])
			sampleSource.append(jsonContent[key]["tumor"])
			
			
        expType = jsonContent["type"]

        project = qbicBarcodes[0][:5]

	search_service = transaction.getSearchService()
        sc = SearchCriteria()
        pc = SearchCriteria()
        pc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.PROJECT, project));
        sc.addSubCriteria(SearchSubCriteria.createExperimentCriteria(pc))

	foundSamples = search_service.searchForSamples(sc)

	datasetSample = None
	sampleFound = False

	parentIdentifiers = []
        testParentIdentifiers = []
	
	for barcode, geneticID in zip(qbicBarcodes, geneticIDS):
        	for samp in foundSamples:
                	space = samp.getSpace()
			qbicBarcodeID = '/' + space + '/' + barcode
			print qbicBarcodeID
			print geneticID
                	if qbicBarcodeID in samp.getParentSampleIdentifiers():
                        	testParentID = samp.getSampleIdentifier()
				for s in foundSamples:
					sampleType = s.getSampleType()
					print sampleType
					print testParentID
					print s.getParentSampleIdentifiers()
					print s.getPropertyValue("Q_SECONDARY_NAME")
					print geneticID
					if (testParentID in s.getParentSampleIdentifiers()) and (sampleType == "Q_NGS_SINGLE_SAMPLE_RUN") and (s.getPropertyValue("Q_SECONDARY_NAME") in geneticID):
						sampleIdent = s.getSampleIdentifier()
						parentIdentifiers.append(sampleIdent)
						testParentIdentifiers.append(testParentID)

	numberOfExperiments = len(search_service.listExperiments("/" + space + "/" + project)) + 1
	newVCExp = transaction.createNewExperiment('/' + space + '/' + project + '/' + project + 'E' + str(numberOfExperiments), "Q_NGS_VARIANT_CALLING")
	newVCExp.setPropertyValue('Q_CURRENT_STATUS', 'FINISHED')

	identString = ''
	for genID in geneticIDS:
		identString += genID.split('_')[-1]

	identString2 = ''
	for tpi in testParentIdentifiers:
		identString2 += tpi.split('/')[-1]
	
	#newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ project + qbicBarcodes[0][5:] + qbicBarcodes[1][5:] + identString, "Q_NGS_VARIANT_CALLING")
	newVCSample = transaction.createNewSample('/' + space + '/' + 'VC'+ identString2  + identString, "Q_NGS_VARIANT_CALLING")
	newVCSample.setParentSampleIdentifiers(parentIdentifiers)
	newVCSample.setExperiment(newVCExp)

	additionalInfo = '%s %s Tumor: %s \n %s %s Tumor: %s' % (qbicBarcodes[0], geneticIDS[0], sampleSource[0], qbicBarcodes[1], geneticIDS[1], sampleSource[1]) 

	newVCSample.setPropertyValue('Q_ADDITIONAL_INFO', additionalInfo)
	secName = '%s-%s' % (geneticIDS[0], geneticIDS[1])

	newVCSample.setPropertyValue('Q_SECONDARY_NAME', secName)

	datasetSample = newVCSample
	return datasetSample