Exemplo n.º 1
0
def process_overhangs(repo, project, overhangs_file, instanceid, authorid,
                      date):

    with open(overhangs_file) as file:
        inputlines = file.readlines()

    validate_input_file(inputlines[0], overhangs_file)

    if len(inputlines) <= 1:
        return

    collections = {}
    collections['authorid'] = authorid
    collections['datecreated'] = date
    collections['description'] = 'Overhangs described in ' + instanceid
    collections['name'] = 'overhang-' + instanceid
    collectionid = uuid.uuid4()
    collections['idcollection'] = collectionid

    repo['collections'].append(collections)
    repository.add_object_to_collection(repo, collectionid,
                                        project['idcollection'], 'COLLECTION',
                                        authorid, date)

    for line in inputlines:
        if ',' not in line:
            continue
        tokens = line.split(',')
        featurename = 'overhang-' + tokens[0]
        featuresequence = tokens[1].lower()
        featureid = repository.create_feature(repo, featurename,
                                              featuresequence, 'overhang',
                                              date)
        repository.add_object_to_collection(repo, collectionid, featureid,
                                            'FEATURE', authorid, date)
Exemplo n.º 2
0
def persist_part_feature(repo, vectorname, part, familyname, authorid, date):

    nsa, fiveprimeoverhang, threeprimeoverhang = get_nucseq_annotations(
        repo, vectorname)

    partseq = part['nucseq']['sequence'].strip().lower()
    start = len(fiveprimeoverhang['feature']['nucseq']['sequence'].strip())
    end = len(partseq) - len(
        threeprimeoverhang['feature']['nucseq']['sequence'].strip())
    partfeatureseq = partseq[start:end]

    partfeature = repository.create_feature(repo, 'Feature-' + part['name'],
                                            partfeatureseq, familyname, date)

    repository.add_feature_to_nucseq(
        repo, 'Feature-' + part['name'], part['nucseq'], partfeature,
        len(fiveprimeoverhang['feature']['nucseq']['sequence']), authorid,
        date)
Exemplo n.º 3
0
def process_vectors(repo, project, vectorsfiles, directories, instanceid,
                    authorid, date):

    for vectorsfile in vectorsfiles:
        with open(vectorsfile) as file:
            lines = file.readlines()
        validate_input_file(lines[0], vectorsfile)

        if len(lines) <= 1:
            continue

        vectors = {}
        vectors['authorid'] = authorid
        vectors['datecreated'] = date
        vectors['description'] = 'Vectors described in ' + instanceid
        collectionid = uuid.uuid4()
        vectors['idcollection'] = collectionid
        repo['collections'].append(vectors)
        repository.add_object_to_collection(repo, project['idcollection'],
                                            vectors['idcollection'],
                                            'COLLECTION', authorid, date)

        lineno = 0
        for line in lines[1:]:
            tokens = line.split(',')
            tokens = [t.strip() for t in tokens if len(t.strip()) > 0]
            if len(tokens) < 5:
                raise ValueError(
                    'The Values.csv file does not have the required number of tokens on line '
                    + lineno)

            vectorfilename = tokens[0]
            vectorname = 'Vector-' + tokens[1]
            resistancename = 'Resistance-' + tokens[2]
            fiveprimeoverhangname = 'Overhang-' + tokens[3]
            threeprimeoverhangname = 'Overhang-' + tokens[4]

            if len(tokens) > 5:
                description = tokens[5]
            else:
                description = 'From ' + vectorfilename + ': ' + \
                              fiveprimeoverhangname + ', ' + \
                              threeprimeoverhangname + ', ' + resistancename

            directories = [d for d in directories if os.path.isdir(d)]
            for directory in directories:
                files = os.listdir(directory)
                if vectorfilename in files:
                    vectorsequence = read_genbank_file(directory + '/' +
                                                       vectorfilename)
                    break

            vector = {}
            vector['authorid'] = authorid
            vector['datecreated'] = date
            vector['description'] = description
            vector['name'] = vectorname
            vectorid = uuid.uuid4()
            vector['idvector'] = vectorid

            nucseq = {}
            nucseq['datecreated'] = date
            nucseq['idnucseq'] = vectorid
            nucseq['sequence'] = vectorsequence

            vector['nucseq'] = nucseq
            vector['iscircular'] = True

            repo['nucseq'].append(nucseq)
            repo['vectors'].append(vector)

            ft2 = {}
            foundfeature1 = False
            foundfeature2 = False

            overhangfeatures = repository.get_features_by_family_name(
                repo, 'overhang')

            for feature in overhangfeatures:

                if not foundfeature1 and feature['name'].upper(
                ) == fiveprimeoverhangname.upper():
                    position = repository.get_overhang_position_in_vector(
                        vectorsequence, feature['nucseq']['sequence'])
                    repository.add_feature_to_nucseq(repo, vectorname, nucseq,
                                                     feature, position,
                                                     authorid, date)
                    foundfeature1 = True

                if not foundfeature2 and feature['name'].upper(
                ) == threeprimeoverhangname.upper():
                    position = repository.get_overhang_position_in_vector(
                        vectorsequence, feature['nucseq']['sequence'])
                    repository.add_feature_to_nucseq(repo, vectorname, nucseq,
                                                     feature, position,
                                                     authorid, date)
                    ft2 = feature
                    foundfeature2 = True

            if not foundfeature1 or not foundfeature2:
                raise ValueError(
                    'The overhangs caused by vector ' + vectorname +
                    ' were not defined in the overhangs manifest.')

            foundfeature = False

            for feature in repository.get_features_by_family_name(
                    repo, 'resistance'):
                if feature['name'].upper() == resistancename.upper():
                    position = nucseq['sequence'].find(
                        feature['nucseq']['sequence'])
                    repository.add_feature_to_nucseq(repo, vectorname, nucseq,
                                                     feature, position,
                                                     authorid, date)
                    foundfeature = True

            if not foundfeature:
                overhangpos = repository.get_overhang_position_in_vector(
                    vectorsequence, ft2['nucseq']['sequence'])
                if overhangpos < 0:
                    raise ValueError('The overhang ' + ft2['name'] +
                                     ' could not be found in the vector ' +
                                     vectorname)
                startpos = overhangpos + len(ft2['nucseq']['sequence']) + 1
                resistancesequence = nucseq['sequence'][
                    startpos:len(nucseq['sequence'])]
                f = repository.create_feature(repo, resistancename,
                                              resistancesequence, 'resistance',
                                              date)
                position = nucseq['sequence'].find(f['nucseq']['sequence'])
                repository.add_feature_to_nucseq(repo, vectorname, nucseq, f,
                                                 position, authorid, date)
                repository.add_object_to_collection(repo, collectionid,
                                                    f['idfeature'], 'FEATURE',
                                                    authorid, date)

            repository.add_object_to_collection(repo, collectionid, vectorid,
                                                'VECTOR', authorid, date)
            lineno += 1