def minimal_data(): tp53 = Gene('TP53') map2k1 = Gene('MAP2K1') case = SampleCollection('case', [Sample('1', {tp53: 2, map2k1: 1})]) control = SampleCollection('control', [Sample('1', {tp53: 1, map2k1: 1})]) return tp53, map2k1, case, control
def test_init(): genes1 = {Gene('BAD'): 1.2345, Gene('FUCA2'): 6.5432} genes2 = {Gene('BAD'): 2.3456, Gene('FUCA2'): 7.6543} samples = [Sample('Tumour_1', genes1), Sample('Tumour_2', genes2)] sample_collection = SampleCollection('Tumour', samples) assert sample_collection.name == 'Tumour' assert all(isinstance(k, Sample) for k in sample_collection.samples)
def post(self,id): """ Adds a new case. """ data = api.payload type = data.get('type') value = data.get('value') if User.query.get(id) is None: return {'message': "L\'utilisateur n\'exists pas!"}, 404 sample = Sample(type=type,value=value,user_id=id) db.session.add(sample) db.session.commit() return {'element': sample.to_json()}, 201
def new_sample(request): if request.method == 'POST': sform = SampleForm(request.POST, instance=Sample()) if sform.is_valid(): sform.save() return HttpResponseRedirect('/viewer/new_sample/') else: sform = SampleForm(instance=Sample()) context = {'sample_form': sform} samples = Sample.objects.all() context['samples'] = samples context.update(csrf(request)) return render_to_response('viewer/new_sample.html', context, context_instance=RequestContext(request))
def add(request): user = request.user if request.method == 'POST': s = Sample() s.date_time_last_updated = datetime.datetime.now() s.last_updated_by = request.user form = forms.SampleForm(request.POST, instance=s) if form.is_valid(): form.save() return HttpResponseRedirect('/samples/list/bysubject/%s/1' % form.instance.subject.id) else: form = forms.SampleForm() subscriptions = user.project_subscriptions.all().order_by('project_code') form.fields['subject'].queryset = Subject.objects.filter(project__in=subscriptions) return render_to_response('samples/add.html', locals() )
def gen_corpus_feature(corpus_path, corpus_feature_path, emb_path): sample = Sample(dic_path, corpus_path, emb_path) with open(corpus_path, 'r', encoding='utf-8', errors='ignore') as f: sentences = f.readlines() corpus_feature = {'index': [], 'feature': []} idx = 0 for sentence in sentences: corpus_feature['feature'].append(sample.get_feature(sentence)) corpus_feature['index'].append(idx) idx += 1 with open(corpus_feature_path, 'wb') as f: pkl.dump(corpus_feature, f)
def list_samples(farm_id, lot_id): if request.method == 'GET': farm = Farm.query.get(farm_id) lot = SampleLot.query.get(lot_id) elif request.method == 'POST': farm_id = request.form.get('farm_id') lot_id = request.form.get('lot_id') produce_id = request.form.get('produce_id') p = Produce.query.get(int(produce_id)) grown_produce = GrownProduce(produce=p) farm = Farm.query.get(int(farm_id)) lot = SampleLot.query.get(int(lot_id)) farm.produce.append(grown_produce) lot.samples.append(Sample(produce=grown_produce)) db.session.add(grown_produce) db.session.add(farm) db.session.add(lot) db.session.commit() produces = Produce.query.all() samples = [] for s in lot.samples: grown_produce = GrownProduce.query.get(s.produce_id) samples.append({ 'id': s.id, 'produce': grown_produce.produce.name }) return render_template('food/samples.html', farm=farm, lot=lot, samples=samples, produces=produces)
def add_sample(farm_id, lot_id): """Add sample to the sample lot.""" errors = [] if request.method == 'POST': produce_id = request.form.get('produce', '') if not produce_id: errors.append(u'กรุณาเลือกชนิดของผลผลิต') else: lot = SampleLot.query.get(lot_id) p = Produce.query.get(produce_id) gp = GrownProduce(produce=p) sample = Sample(produce=gp) lot.samples.append(sample) db.session.add(lot) db.session.commit() return redirect(url_for('food.list_samples', farm_id=farm_id, lot_id=lot_id)) farm = Farm.query.get(farm_id) lot = SampleLot.query.get(lot_id) produces = [] for p in Produce.query.all(): produces.append({ 'id': p.id, 'name': p.name }) return render_template('food/add_sample.html', farm=farm, lot=lot, produces=produces)
def test_ttest(): data1 = {'BAD': 1.2345, 'FUCA2': 6.5432} data2 = {'BAD': 2.3456, 'FUCA2': 7.6543} data3 = {'BAD': 6.3456, 'FUCA2': 11.6543} data4 = {'BAD': 7.1111, 'FUCA2': 9.9711} tumour_samples = [Sample.from_names('Tumour_1', data1), Sample.from_names('Tumour_2', data2)] normal_samples = [Sample.from_names('Normal_1', data3), Sample.from_names('Normal_2', data4)] tumour = SampleCollection('Tumour', tumour_samples) normal = SampleCollection('Normal', normal_samples) experiment = Experiment(case=tumour, control=normal) tt = ttest(experiment) assert isinstance(tt, pd.Series) assert all(gene in list(tt.keys()) for gene in experiment.get_all().genes)
def add_sample(al1, al2, device): if request.method == "GET": sample = Sample(analog1=al1, analog2=al2, device_id=device) db.session.add(sample) db.session.commit() result = 'Dato añadido' message = { 'result': result } return jsonify(message)
def test_sample_init(): genes = {Gene('BAD'): 1.2345, Gene('FUCA2'): 6.5432} sample = Sample('Tumour_1', genes) assert sample.name == 'Tumour_1' assert all(isinstance(k, Gene) for k in sample.data.keys()) assert sample.data == genes
def test_sample_from_names(): data = {'BAD': 1.2345, 'FUCA2': 6.5432} sample = Sample.from_names('Tumour_1', data) assert sample.name == 'Tumour_1' assert all(isinstance(k, Gene) for k in sample.data.keys()) assert [k.name for k in sample.data.keys()] == ['BAD', 'FUCA2'] assert all(isinstance(v, float) for v in sample.data.values()) assert list(sample.data.values()) == [1.2345, 6.5432]
def test_get_all(): data1 = {'BAD': 1.2345, 'FUCA2': 6.5432} data2 = {'BAD': 2.3456, 'FUCA2': 7.6543} data3 = {'BAD': 3.4567} tumour_samples = [ Sample.from_names('Tumour_1', data1), Sample.from_names('Tumour_2', data2) ] normal_samples = [Sample.from_names('Normal_1', data3)] tumour = SampleCollection('Tumour', tumour_samples) normal = SampleCollection('Normal', normal_samples) experiment_samples = Experiment(case=tumour, control=normal).get_all() assert isinstance(experiment_samples, SampleCollection) assert all(label in experiment_samples.labels for label in tumour.labels + normal.labels)
def test_init(): data1 = {'BAD': 1.2345, 'FUCA2': 6.5432} data2 = {'BAD': 2.3456, 'FUCA2': 7.6543} data3 = {'BAD': 3.4567} tumour_samples = [ Sample.from_names('Tumour_1', data1), Sample.from_names('Tumour_2', data2) ] normal_samples = [Sample.from_names('Normal_1', data3)] tumour = SampleCollection('Tumour', tumour_samples) normal = SampleCollection('Normal', normal_samples) experiment = Experiment(case=tumour, control=normal) assert isinstance(experiment.case, SampleCollection) assert isinstance(experiment.control, SampleCollection) assert experiment.case == tumour assert experiment.control == normal
def collate(self, data): """ write data to cassnadra :param data: list or dict of sample data :raise DataValidationError: """ self.validate(data) data = [data] if isinstance(data, dict) else data for sample in data: s = Sample(partition=1, collect_at=sample['timestamp'], resource=sample['resource'], metric_name=sample['name'], type=sample['type'], attribute=sample.get('attribute'), value=sample['value'] ) s.save()
def insert_samples(self, df, power_source, should_commit=True): df.drop_duplicates(subset='timestamp', inplace=True) for col in Loader.filter_int_named_columns(df): columns = ['timestamp', col] if 'is_on' in df.columns: columns = columns + ['is_on'] samples = df[columns] samples['power_source_id'] = power_source.id samples['physical_quantity_id'] = col samples.rename(index=str, columns={col: 'measurement'}, inplace=True) sample_objs = [] i = 1 for sample in samples.iterrows(): sample_obj = Sample(timestamp=sample[1]['timestamp'], measurement=sample[1]['measurement'], physical_quantity_id=col, power_source_id=power_source.id) if 'is_on' in columns: if sample[1]['is_on'] == 1: sample_obj.is_on = True elif sample[1]['is_on'] == 0: sample_obj.is_on = False sample_objs.append(sample_obj) if i % 100000 == 0: try: self.session.bulk_save_objects(sample_objs) if should_commit: self.session.commit() except psycopg2.IntegrityError as ex: print(ex) sample_objs = [] i = i + 1 self.session.bulk_save_objects(sample_objs, return_defaults=True) if should_commit: self.session.commit()
def post(self): data = request.get_json() analog1 = data['analog1'] analog2 = data['analog2'] device_id = data['device_id'] sample = Sample.query.filter_by(device_id=device_id) if sample: result = 'No se han podido guardar los datos' message = {'result':result} else: sample_data = Sample(analog1=analog1, analog2=analog2, device_id=device_id) db.session.add(sample_data) db.session.commit() result = 'Datos registrados' message = {'result': result} return jsonify(message)
def add_samplelot(farm_id): errors = [] if request.method == 'POST': collected_date = request.form.get('collected_date', '') if not collected_date: errors.append(u'โปรดกรอกข้อมูลวันที่เก็บผลผลิต') else: lot = SampleLot( collected_at=collected_date, farm_id=farm_id) farm = Farm.query.get(farm_id) for grown_produce in farm.produce: # automatically adds samples from all grown produce sample = Sample(produce=grown_produce) lot.samples.append(sample) db.session.add(lot) db.session.commit() return redirect(url_for('food.index')) farm = Farm.query.get(farm_id) return render_template('food/add_samplelot.html', farm=farm, errors=errors)
def main(session): killer = GracefulKiller() while (1): sample = Sample() sample.humidity = random.randint(0, 100) sample.pressure = random.randint(0, 100) sample.temperature = random.randint(0, 100) sample.windspeed = random.randint(0, 100) session.add(sample) session.commit() print("Add new sample: %s %s %s %s" % (sample.humidity, sample.pressure, sample.temperature, sample.windspeed)) sys.stdout.flush() time.sleep(1) if killer.kill_now: session.close() break
def test_sample_timeplace(self): sample_set = SampleSet("first_sampleset") sample1 = Sample("P1993_101", sample_set, None) time_place1 = TimePlace(datetime.datetime.now(), "52.3820818", "18.0233369") sample1.timeplace = time_place1 self.session.add(sample1) self.session.add(time_place1) # The sample should have the correct time place assert Sample.query.first().timeplace == time_place1 # One can also add it after sample creation sample2 = Sample("P1993_102", sample_set, None) sample2.timeplace = time_place1 self.session.add(sample2) self.session.commit() # The only existing time place should contain exactly sample1 and sample2 assert len(TimePlace.query.first().samples) == 2 assert sample1 in TimePlace.query.first().samples assert sample2 in TimePlace.query.first().samples time_place2 = TimePlace(datetime.date(1999,3,21), "42.3820818", "108.0233369") sample3 = Sample("P1993_103", sample_set, None) sample3.timeplace = time_place2 self.session.add(sample3) self.session.add(time_place2) self.session.commit() assert len(TimePlace.query.all()) == 2 assert sample3.timeplace.latitude < sample2.timeplace.latitude assert sample3.timeplace.longitude > sample2.timeplace.longitude assert sample3.timeplace.time < sample2.timeplace.time assert sample3.timeplace.date_formatted() == '1999-03-21'
finally: reporter.onFinish() if __name__ == '__main__': ts = time.time() sample = Sample( file='D:\\Users\\wanghe\\Desktop\\haley_data\\SBS_G91_291-19.txt', ID=0.0, OD=0.250, E=161000.00, initiation=2511, final=2719, loadingtime=1.20, holdingtime=600.00, stress_relax_count=20, noise_count=200, holdtype=Sample.COMPEESSION, fitting_num=600, fitting_windowsize=131, fitting_polyorder=2) # CycleAnalyst().analysis(sample) # CycleAnalyst().analysis(sample, reporter = ChartReporter(sample)) # chart only CycleAnalyst().analysis(sample, reporter=ResultFileReporter(sample)) # file only
def functional_table(): form = FunctionClassFilterForm() form.function_class.choices = [('cog', 'Cog'), ('pfam', 'Pfam'), ('tigrfam', 'TigrFam'), ('all', 'All') ] form.select_sample_groups.choices = [(sample_set.name, sample_set.name) for sample_set in SampleSet.query.all()] type_identifiers = [] if form.validate_on_submit(): function_class = form.function_class.data if function_class == 'all': function_class = None limit = form.limit.data if limit == 'all': limit = None else: limit = int(limit) filter_alternative = form.filter_alternative.data if filter_alternative == 'filter_with_type_identifiers': for type_identifier in form.type_identifiers.entries: if type_identifier.data != '': type_identifiers.append(type_identifier.data) elif filter_alternative == 'filter_with_search': search_string = form.search_annotations if search_string.data != '': q = _search_query(search_string.data) type_identifiers = [a.type_identifier for a in q.all()] sample_sets = form.select_sample_groups.data if len(sample_sets) > 0: samples = [sample.scilifelab_code for sample in Sample.all_from_sample_sets(sample_sets)] else: samples = None download_action = False if form.submit_download.data: download_action = True download_select = form.download_select.data else: function_class=None limit=20 samples = None download_action = False if len(form.type_identifiers) == 0: form.type_identifiers.append_entry() if type_identifiers == []: type_identifiers = None samples, table = Annotation.rpkm_table(limit=limit, samples=samples, function_class=function_class, type_identifiers=type_identifiers) samples = sorted(samples, key=lambda x: x.scilifelab_code) sample_scilifelab_codes = [sample.scilifelab_code for sample in samples] if download_action: if download_select == 'Gene List': # Fetch all contributing genes for all the annotations in the table annotation_ids = [annotation.id for annotation, sample in table.items()] genes_per_annotation = Annotation.genes_per_annotation(annotation_ids) csv_output = '\n'.join( [','.join([gene.name, annotation.type_identifier]) \ for gene, annotation in genes_per_annotation]) r = make_response(csv_output) r.headers["Content-Disposition"] = "attachment; filename=gene_list.csv" r.headers["Content-Type"] = "text/csv" return r return render_template('functional_table.html', table=table, samples=samples, sample_scilifelab_codes = sample_scilifelab_codes, form=form )
for header_file in sorted(glob.glob(INPUTDIR + "/*/header.txt")): with open(header_file, "r") as file: for line in file: line = line.rstrip() if line.startswith("##"): continue if line.startswith("#"): line = line[1:] cols = line.split("\t") col_names = cols[0:9] sample_names = cols[9:] for sample_name in sample_names: if sample_name in phenodata_map: genotype = Sample(ID=sample_name, tag=phenodata_map[sample_name]) else: genotype = Sample(ID=sample_name) if sample_name not in samples_ids: samples_ids.add(sample_name) sample_list.append(genotype) for sample in sample_list: csv_files[Sample].writerow(sample.get_all()) #print("[MASTER] Generating chromosome CSV...") with open(INPUTDIR + "chromosomes.txt", "r") as file: for line in file: chromosome = line.rstrip() chrom = Chromosome(ID=chromosome)
def do_sandbox(): #USER users_by_email = {} for (user_index, user_mock) in enumerate(user_mocks): query = User.query.filter_by(email=user_mock['email']) if query.count() == 0: user = User(from_dict=user_mock) user.validationToken = None Wrapper.check_and_save(user) set_from_mock("thumbs", user, user_index) print("CREATED user") pprint(vars(user)) if 'role' in user_mock: role = Role() role.type = user_mock['role'] role.user = user Wrapper.check_and_save(role) print("CREATED role") pprint(vars(role)) else: user = query.first() users_by_email[user.email] = user #SAMPLE samples_by_name = {} for (sample_index, sample_mock) in enumerate(sample_mocks): query = Sample.query.filter_by(name=sample_mock['name']) if query.count() == 0: sample = Sample(from_dict=sample_mock) Wrapper.check_and_save(sample) set_from_mock("audios", sample, sample_index) print("CREATED sample") pprint(vars(sample)) else: sample = query.first() samples_by_name[sample.name] = sample #INSTRUMENT instruments_by_name = {} for (instrument_index, instrument_mock) in enumerate(instrument_mocks): query = Instrument.query.filter_by(name=instrument_mock['name']) if query.count() == 0: instrument = Instrument(from_dict=instrument_mock) Wrapper.check_and_save(instrument) print("CREATED instrument") pprint(vars(instrument)) else: instrument = query.first() instruments_by_name[instrument.name] = instrument #HARMONY harmonies_by_name = {} for (harmony_index, harmony_mock) in enumerate(harmony_mocks): query = Harmony.query.filter_by(name=harmony_mock['name']) if query.count() == 0: harmony = Harmony(from_dict=harmony_mock) Wrapper.check_and_save(harmony) print("CREATED harmony") pprint(vars(harmony)) else: harmony = query.first() harmonies_by_name[harmony.name] = harmony #NOTE notes = [] for note_index in range(harmony.notesMax): query = Note.query.filter_by(index=note_index) if query.count() == 0: note = Note(from_dict={ "index": note_index }) if "noteNames" in harmony_mock and note_index in harmony_mock['noteNames']: note.name = harmony_mock['noteNames'][note_index] Wrapper.check_and_save(note) print("CREATED note") pprint(vars(note)) else: note = query.first() notes.append(note) #SCALE #for scaleSize in range(1, harmony.scaleMaxSize): for scaleSize in []: scale_note_indexes_combinations = harmony.get_scale_note_indexes_combinations(scaleSize) scale_note_indexes_combinations_length = str(len(scale_note_indexes_combinations)) for (scale_index, scale_note_indexes) in enumerate(scale_note_indexes_combinations): query = Scale.query.filter_by( combinationIndex=scale_index, harmonyId=harmony.id, size=scaleSize ) if query.count() == 0: scale = Scale() scale.combinationIndex = scale_index scale.harmony = harmony scale.name = harmony_mock['get_scale_name'](scale_note_indexes) scale.tags = harmony_mock['get_scale_tags'](scale_note_indexes) scale.size = scaleSize Wrapper.check_and_save(scale) print("CREATED scale " + str(scale_index) + " / " + scale_note_indexes_combinations_length + "(" + str(scaleSize) + ")") #pprint(vars(scale)) for scale_note_index in scale_note_indexes: note = notes[scale_note_index] scale_note = ScaleNote() scale_note.scale = scale scale_note.note = note Wrapper.check_and_save(scale_note) #print("CREATED scale_note") #pprint(vars(scale_note)) #SOUND for sound_mock in sound_mocks: instrument = instruments_by_name[sound_mock['instrumentName']] sample = samples_by_name[sound_mock['sampleName']] query = Sound.query.filter_by( instrumentId=instrument.id, pitch=sound_mock['pitch'], sampleId=sample.id ) if query.count() == 0: sound = Sound(from_dict=sound_mock) sound.instrument = instrument sound.sample = sample Wrapper.check_and_save(sound) print("CREATED sound") pprint(vars(sound)) #MELODY melodies_by_name = {} for melody_mock in melody_mocks: query = Melody.query.filter_by(name=melody_mock['name']) if query.count() == 0: melody = Melody(from_dict=melody_mock) Wrapper.check_and_save(melody) print("CREATED melody") pprint(vars(melody)) else: melody = query.first() melodies_by_name[melody.name] = melody #RHYTHM rhythms_by_name = {} for rhythm_mock in rhythm_mocks: query = Rhythm.query.filter_by(name=rhythm_mock['name']) if query.count() == 0: rhythm = Rhythm(from_dict=rhythm_mock) Wrapper.check_and_save(rhythm) print("CREATED rhythm") pprint(vars(rhythm)) else: rhythm = query.first() rhythms_by_name[rhythm.name] = rhythm #PATTERN patterns_by_name = {} for pattern_mock in pattern_mocks: melody = melodies_by_name[pattern_mock['melodyName']] rhythm = rhythms_by_name[pattern_mock['rhythmName']] query = Pattern.query.filter_by( melodyId=melody.id, rhythmId=rhythm.id ) if query.count() == 0: pattern = Pattern(from_dict=pattern_mock) pattern.melody = melody pattern.rhythm = rhythm Wrapper.check_and_save(pattern) print("CREATED pattern") pprint(vars(pattern)) else: pattern = query.first() patterns_by_name[pattern.name] = pattern #SCORE scores_by_name = {} for score_mock in score_mocks: query = Score.query.filter_by(name=score_mock['name']) if query.count() == 0: score = Score(from_dict=score_mock) user = users_by_email[score_mock['userEmail']] score.user = user Wrapper.check_and_save(score) print("CREATED score") pprint(vars(score)) else: score = query.first() scores_by_name[score.name] = score #STAFF staves_by_name = {} for staff_mock in staff_mocks: query = Staff.query.filter_by( name=staff_mock['name'] ) if query.count() == 0: staff = Staff(from_dict=staff_mock) Wrapper.check_and_save(staff) print("CREATED staff") pprint(vars(staff)) else: staff = query.first() staves_by_name[staff.name] = staff #SCORE STAFF for score_staff_mock in score_staff_mocks: score = scores_by_name[score_staff_mock['scoreName']] staff = staves_by_name[score_staff_mock['staffName']] query = ScoreStaff.query.filter_by( positionIndex=score_staff_mock['positionIndex'], scoreId=score.id, staffId=staff.id ) if query.count() == 0: score_staff = ScoreStaff(from_dict=score_staff_mock) score_staff.score = score score_staff.staff = staff Wrapper.check_and_save(score_staff) print("CREATED score_staff") pprint(vars(score_staff)) #SCORE INSTRUMENT for score_instrument_mock in score_instrument_mocks: score = scores_by_name[score_instrument_mock['scoreName']] instrument = instruments_by_name[score_instrument_mock['instrumentName']] query = ScoreInstrument.query.filter_by( positionIndex=score_instrument_mock['positionIndex'], scoreId=score.id, instrumentId=instrument.id ) if query.count() == 0: score_instrument = ScoreInstrument(from_dict=score_instrument_mock) score_instrument.score = score score_instrument.instrument = instrument Wrapper.check_and_save(score_instrument) print("CREATED score_instrument") pprint(vars(score_instrument)) #VOICE voices_by_name = {} for voice_mock in voice_mocks: query = Voice.query.filter_by(name=voice_mock['name']) if query.count() == 0: voice = Voice(from_dict=voice_mock) Wrapper.check_and_save(voice) print("CREATED voice") pprint(vars(voice)) else: voice = query.first() voices_by_name[voice.name] = voice #VOICE PATTERNS for voice_pattern_mock in voice_pattern_mocks: voice = voices_by_name[voice_pattern_mock['voiceName']] pattern = patterns_by_name[voice_pattern_mock['patternName']] query = VoicePattern.query.filter_by( voiceId=voice.id, patternId=pattern.id ) if query.count() == 0: voice_pattern = VoicePattern(from_dict=voice_pattern_mock) voice_pattern.voice = voice voice_pattern.pattern = pattern Wrapper.check_and_save(voice_pattern) print("CREATED voice_pattern") pprint(vars(voice_pattern)) #STAFF VOICE for staff_voice_mock in staff_voice_mocks: staff = staves_by_name[staff_voice_mock['staffName']] voice = voices_by_name[staff_voice_mock['voiceName']] query = StaffVoice.query.filter_by( staffId=staff.id, voiceId=voice.id ) if query.count() == 0: staff_voice = StaffVoice(from_dict=staff_voice_mock) staff_voice.staff = staff staff_voice.voice = voice Wrapper.check_and_save(staff_voice) print("CREATED staff_voice") pprint(vars(staff_voice))
SAMPLE = [ { "l_value": 2.0, "harmful": True, "photo": "photo1.jpg" }, { "l_value": 1.5, "harmful": False, "photo": "photo2.jpg" }, { "l_value": 10.5, "harmful": True, "photo": "photo3.jpg" }, ] # Create the database db.create_all() # iterate over the PEOPLE structure and populate the database for sample in SAMPLE: s = Sample(l_value=sample.get("l_value"), harmful=sample.get("harmful"), photo=sample.get("photo")) db.session.add(s) db.session.commit()
def test_sample_sampleset(self): #Test that sample and sample set references each other properly sample_set = SampleSet("first_sampleset") sample1 = Sample("P1993_101", sample_set, None) self.session.add(sample1) self.session.add(sample_set) # Sample sets relations are created assert Sample.query.filter_by(scilifelab_code='P1993_101').first().sample_set is sample_set sample_set2 = SampleSet("second_sampleset") sample2 = Sample("P1993_102", sample_set2, None) sample3 = Sample("P1993_103", sample_set, None) self.session.add(sample2) self.session.add(sample3) self.session.commit() # Sample2 should have sample set 2 assert Sample.query.filter_by(scilifelab_code='P1993_102').first().sample_set is sample_set2 # There should be 2 sample sets and 3 samples in the db assert len(SampleSet.query.all()) == 2 assert len(Sample.query.all()) == 3 # The reverse relationship for sample set 2 should have sample1 and sample3 assert len(sample_set.samples) == 2 assert sample2 not in sample_set.samples assert sample1 in sample_set.samples assert sample3 in sample_set.samples assert len(Sample.all_from_sample_sets(['first_sampleset'])) == 2 assert sample1 in Sample.all_from_sample_sets(['first_sampleset']) assert sample3 in Sample.all_from_sample_sets(['first_sampleset']) assert sample2 not in Sample.all_from_sample_sets(['first_sampleset']) assert len(Sample.all_from_sample_sets(['second_sampleset'])) == 1 assert sample1 not in Sample.all_from_sample_sets(['second_sampleset']) assert sample3 not in Sample.all_from_sample_sets(['second_sampleset']) assert sample2 in Sample.all_from_sample_sets(['second_sampleset']) assert len(Sample.all_from_sample_sets(['second_sampleset', 'first_sampleset'])) == 3 assert sample1 in Sample.all_from_sample_sets(['second_sampleset', 'first_sampleset']) assert sample3 in Sample.all_from_sample_sets(['second_sampleset', 'first_sampleset']) assert sample2 in Sample.all_from_sample_sets(['second_sampleset', 'first_sampleset'])
def main(): n_experiments, n_samples, n_runs = 0, 0, 0 db = get_database() # clear pre-existing data print("--- dropping existing data") db.drop_tables([Sample, Experiment, Run]) print("--- migrating up!") db.create_tables([Experiment, Sample, Run]) batch_size = 1000 with tarfile.open(TAR_PATH, "r:gz") as tar: for n, member in enumerate(tar): print(f"--- loading {member.name}") h = tar.extractfile(member) experiments_batch = [] samples_batch = [] runs_batch = [] if h is not None: if "experiment" in member.name: # experiment alias -> biosample identifier with tar.extractfile(member) as handle: root = xml.etree.ElementTree.parse(handle) for _id, experiment_data in load_experiment_data(root): experiments_batch.append({ "id": _id, **experiment_data }) n_experiments += 1 if len(experiments_batch) >= batch_size: print("--- inserting batch of experiments") print(len(experiments_batch)) Experiment.insert_many( experiments_batch).execute() print( f"--- n_experiments={n_experiments}; row count={Experiment.select().count()}" ) experiments_batch = [] if len(experiments_batch) > 0: try: Experiment.insert_many(experiments_batch).execute() except peewee.DataError: pprint(experiments_batch) quit(-1) if "sample" in member.name: with tar.extractfile(member) as handle: root = xml.etree.ElementTree.parse(handle) for _id, sample_data in load_sample_data(root): row = {"id": _id, **sample_data} samples_batch.append(row) n_samples += 1 if len(samples_batch) >= batch_size: print(len(samples_batch)) print("--- inserting batch of samples") Sample.insert_many(samples_batch).execute() print( f"--- n_samples={n_samples}; row count={Sample.select().count()}" ) samples_batch = [] if len(samples_batch) > 0: Sample.insert_many(samples_batch).execute() if "run" in member.name: with tar.extractfile(member) as handle: root = xml.etree.ElementTree.parse(handle) for _id, run_data in load_run_data(root): runs_batch.append({"id": _id, **run_data}) n_runs += 1 if len(runs_batch) >= batch_size: print(len(runs_batch)) print("--- inserting batch of runs") Run.insert_many(runs_batch).execute() print( f"--- n_runs={n_runs}; row count={Run.select().count()}" ) runs_batch = [] if len(runs_batch) > 0: Run.insert_many(runs_batch).execute() else: pass
def parse_sample(Document, p_uuid): f = csv.DictReader(Document, delimiter=',') Document.close() for row in f: s_uuid = uuid4().hex rowDict = row project = Project.objects.get(projectid=p_uuid) wanted_keys = ['sample_name', 'organism', 'title', 'seq_method', 'collection_date', 'biome', 'feature', 'geo_loc_country', 'geo_loc_state', 'geo_loc_city', 'geo_loc_farm', 'geo_loc_plot', 'latitude', 'longitude', 'material', 'elevation'] sampleDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Sample(projectid=project, sampleid=s_uuid, **sampleDict) m.save() sample = Sample.objects.get(sampleid=s_uuid) wanted_keys = ['depth', 'pool_dna_extracts', 'samp_size', 'samp_collection_device', 'samp_weight_dna_ext', 'sieving', 'storage_cond'] collectDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Collect(projectid=project, sampleid=sample, **collectDict) m.save() wanted_keys = ['annual_season_precpt', 'annual_season_temp'] climateDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Climate(projectid=project, sampleid=sample, **climateDict) m.save() wanted_keys = ['bulk_density', 'drainage_class', 'fao_class', 'horizon', 'local_class', 'porosity', 'profile_position', 'slope_aspect', 'slope_gradient', 'soil_type', 'texture_class', 'water_content_soil'] soil_classDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Soil_class(projectid=project, sampleid=sample, **soil_classDict) m.save() wanted_keys = ['pH', 'EC', 'tot_C', 'tot_OM', 'tot_N', 'NO3_N', 'NH4_N', 'P', 'K', 'S', 'Zn', 'Fe', 'Cu', 'Mn', 'Ca', 'Mg', 'Na', 'B'] soil_nutrDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Soil_nutrient(projectid=project, sampleid=sample, **soil_nutrDict) m.save() wanted_keys = ['agrochem_addition', 'biological_amendment', 'cover_crop', 'crop_rotation', 'cur_land_use', 'cur_vegetation', 'cur_crop', 'cur_cultivar', 'organic', 'previous_land_use', 'soil_amendments', 'tillage'] mgtDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Management(projectid=project, sampleid=sample, **mgtDict) m.save() wanted_keys = ['rRNA_copies', 'microbial_biomass_C', 'microbial_biomass_N', 'microbial_respiration'] microbeDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Microbial(projectid=project, sampleid=sample, **microbeDict) m.save() wanted_keys = ['usr_cat1', 'usr_cat2', 'usr_cat3', 'usr_cat4', 'usr_cat5', 'usr_cat6', 'usr_quant1', 'usr_quant2', 'usr_quant3', 'usr_quant4', 'usr_quant5', 'usr_quant6'] userDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = User(projectid=project, sampleid=sample, **userDict) m.save()
def object_sample(): sample = Sample(0, 'Object Sample') return jsonify(sample.__dict__)
def parse_sample(Document, p_uuid): f = csv.DictReader(Document, delimiter=',') Document.close() for row in f: s_uuid = uuid4().hex rowDict = row project = Project.objects.get(projectid=p_uuid) wanted_keys = [ 'sample_name', 'organism', 'title', 'seq_method', 'collection_date', 'biome', 'feature', 'geo_loc_country', 'geo_loc_state', 'geo_loc_city', 'geo_loc_farm', 'geo_loc_plot', 'latitude', 'longitude', 'material', 'elevation' ] sampleDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Sample(projectid=project, sampleid=s_uuid, **sampleDict) m.save() sample = Sample.objects.get(sampleid=s_uuid) wanted_keys = [ 'depth', 'pool_dna_extracts', 'samp_size', 'samp_collection_device', 'samp_weight_dna_ext', 'sieving', 'storage_cond' ] collectDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Collect(projectid=project, sampleid=sample, **collectDict) m.save() wanted_keys = ['annual_season_precpt', 'annual_season_temp'] climateDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Climate(projectid=project, sampleid=sample, **climateDict) m.save() wanted_keys = [ 'bulk_density', 'drainage_class', 'fao_class', 'horizon', 'local_class', 'porosity', 'profile_position', 'slope_aspect', 'slope_gradient', 'soil_type', 'texture_class', 'water_content_soil' ] soil_classDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Soil_class(projectid=project, sampleid=sample, **soil_classDict) m.save() wanted_keys = [ 'pH', 'EC', 'tot_C', 'tot_OM', 'tot_N', 'NO3_N', 'NH4_N', 'P', 'K', 'S', 'Zn', 'Fe', 'Cu', 'Mn', 'Ca', 'Mg', 'Na', 'B' ] soil_nutrDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Soil_nutrient(projectid=project, sampleid=sample, **soil_nutrDict) m.save() wanted_keys = [ 'agrochem_addition', 'biological_amendment', 'cover_crop', 'crop_rotation', 'cur_land_use', 'cur_vegetation', 'cur_crop', 'cur_cultivar', 'organic', 'previous_land_use', 'soil_amendments', 'tillage' ] mgtDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Management(projectid=project, sampleid=sample, **mgtDict) m.save() wanted_keys = [ 'rRNA_copies', 'microbial_biomass_C', 'microbial_biomass_N', 'microbial_respiration' ] microbeDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = Microbial(projectid=project, sampleid=sample, **microbeDict) m.save() wanted_keys = [ 'usr_cat1', 'usr_cat2', 'usr_cat3', 'usr_cat4', 'usr_cat5', 'usr_cat6', 'usr_quant1', 'usr_quant2', 'usr_quant3', 'usr_quant4', 'usr_quant5', 'usr_quant6' ] userDict = {x: rowDict[x] for x in wanted_keys if x in rowDict} m = User(projectid=project, sampleid=sample, **userDict) m.save()
def make_samples(samples_dict): """Create samples from dict representation""" return [ Sample.from_names(name, values) for name, values in samples_dict.items() ]