示例#1
0
def load_all_sequences(filename, file_format):
    start_time = time.time()
    for seqRecord in SeqIO.parse(filename, file_format):

        seq = seqRecord.seq.upper()
        location = []
        region = ''
        for feature in seqRecord.features:
            if feature.type == 'source':
                start = feature.location.nofuzzy_start
                end = feature.location.nofuzzy_end
                country = feature.qualifiers.get('country', ['undefined: UND'])
                location = [start, end]
                region = country[0]
        # count nucleoid stats if needed
        # reset stats first
        # count_nucleoid_stats(seq)
        document = SequenceDocument(
            version=str(seqRecord.id),
            length=len(seq),
            fasta=seqRecord.format('fasta'),
            sequence=str(seq),
            location=location,
            region=region
        )
        document.save()

    load_time = ExecutionTime(name="Sequence Load Time", seconds=time.time() - start_time)
    load_time.save()
示例#2
0
def parce_base_sequence(filePath, file_format):
    for record in SeqIO.parse(filePath, file_format):
        seqId = str(record.id)
        seq = str(record.seq.upper())
        document = SequenceDocument(
            version=seqId,
            length=len(seq),
            fasta=record.format('fasta'),
            sequence=seq,
            name=record.name
        )
        document.save()
        return document.id
示例#3
0
def query_not_base_sequences():
    length = 16569
    cursor = SequenceDocument.objects(length__ne=length)
    records = []
    for r in cursor:
        records.append(r)
    del cursor
    return records
示例#4
0
def query_normal_length():
    length = 377
    cursor = SequenceDocument.objects(length=length)
    records = []
    for r in cursor:
        records.append(r)
    del cursor
    return records
示例#5
0
def save_stats_db(sequence):
    rcrs_base_sequence = query_base_sequence('NC_012920')
    rsrs_base_sequence = query_base_sequence('RSRS')
    doc = SequenceDocument.objects(version=sequence.version)
    rcrs_distance = count_distance(sequence.sequence, rcrs_base_sequence.sequence[16023:16400])
    doc.update(distance_to_rCRS=rcrs_distance)
    rsrs_distance = count_distance(sequence.sequence, rsrs_base_sequence.sequence[16023:16400])
    doc.update(distance_to_RSRS=rsrs_distance)
示例#6
0
def query_base_sequence(name):
    return SequenceDocument.objects(name=name)[0]
示例#7
0
def reset_sequence_documents_regions():
    for seq_document in SequenceDocument.objects(length=377):
        seq_document.delete()
示例#8
0
def get_sequences_by_region(region):
    return SequenceDocument.objects(region__contains=region).only('sequence')