sys.path.append("..") sys.path.append(".") from config import * from helpers.basics import load_config from helpers.logger import Logger from helpers.db_helpers import * ## Script specific import sys import logging import collections import datetime if "log" not in globals(): log = Logger.init_logger('STATS_%s'%(cfg.language_code), load_config()) def main(): # mongodb stats can be obtained with cfg.db.command("collstats","dict") db_stats={ 'A/ report_date':datetime.datetime.now(), 'B/ Number of samples':samples_col.count(), 'C/ Number of normalized measures':measurements_col.count(), 'C_a/ Tally of normalized measures':measurements_col.aggregate([{"$group":{"_id":"$type", "count": { "$sum": 1 }}}])['result'], 'D/ Number of species':species_col.count(),
#!/usr/bin/env python # encoding: utf-8 import sys sys.path.append("..") sys.path.append(".") from config import * from helpers.basics import load_config from helpers.logger import Logger from helpers.db_helpers import * # Script supposed to be run in the background to populate the DB with available datasets if "log" not in globals(): logger = Logger.init_logger('FLATTEN_%s'%(cfg.language_code), load_config()) logger.info("Running %s",sys.argv[0]) logger.info("Flattening and normalizing experimental results") already_existing_xp=measurements_col.distinct("xp") samples_with_results=samples_col.find({"experimental_results":{"$elemMatch":{"values":{"$exists":True}}}}) # a_sample=samples_with_results[0] n_op=0 measurements_to_insert=measurements_col.initialize_unordered_bulk_op() for a_sample in samples_with_results: # i,experimental_results=enumerate(a_sample['experimental_results']).next() for i,experimental_results in enumerate(a_sample['experimental_results']):
#!/usr/bin/env python # encoding: utf-8 import sys sys.path.append("..") sys.path.append(".") from config import * from helpers.basics import load_config from helpers.logger import Logger from helpers.db_helpers import * # Script import datetime if "log" not in globals(): log = Logger.init_logger('SAMPLE_DATA_%s'%(cfg.language_code), load_config()) # clear db species_col.remove() publications_col.remove() samples_col.remove() mappings_col.remove() measurements_col.remove() #### Melon # species
sys.path.append("..") sys.path.append(".") from config import * from helpers.basics import load_config from helpers.logger import Logger from helpers.db_helpers import * from helpers.path import data_dir # Script supposed to be run in the background to populate the DB with available datasets ## Setup from numbers import Number import collections from math import log if "log" not in globals(): logger = Logger.init_logger('DATA_PROCESSOR_%s'%(cfg.language_code), load_config()) logger.info("Running %s",sys.argv[0]) # Get available datasets and insert them in the DB # a_sample=samples_col.find_one({"experimental_results.values":{"$exists":False}}) samples_to_process=samples_col.find({"experimental_results":{"$elemMatch":{"values":{"$exists":False}}}}) logger.info("Found %d samples to process",samples_to_process.count()) for a_sample in samples_to_process: logger.info("Will process dataset for experiment %s",a_sample['name']) parser_config=a_sample['xls_parsing'] for a_result_idx,a_result in [(i,x) for i,x in enumerate(a_sample['experimental_results']) if "values" not in x]: # specialize parser for the result
#!/usr/bin/env python # encoding: utf-8 import sys sys.path.append("..") sys.path.append(".") from config import * from helpers.basics import load_config from helpers.logger import Logger from helpers.db_helpers import * from helpers.path import data_dir # Script supposed to be run in the background to populate the DB with available datasets if "log" not in globals(): logger = Logger.init_logger('PROCESS_MAPPINGS_%s'%(cfg.language_code), load_config()) # Script supposed to be run in the background to populate the DB with available datasets logger.info("Running %s",sys.argv[0]) # Get available mappings and process them mappings_to_process=mappings_col.find({"src_to_tgt":{"$exists":False}}) logger.info("Found %d mappings to process",mappings_to_process.count())