def add_ds_ma(norm_exprs_file, tax_id, symbol_type, pheno_file, name, normalization, description, xref): name = get_or_guess_name(name, norm_exprs_file) print("Adding microarray data set '%s' ..." % name) from roger.persistence import db import roger.logic.dge import roger.persistence.dge from roger.persistence.schema import MicroArrayDataSet session = db.session() print("Parsing and annotating data ...") ds_prop = roger.logic.dge.create_ds(session, MicroArrayDataSet, norm_exprs_file, tax_id, symbol_type, pheno_file, name, normalization, description, xref) print("Persisting data set ...") roger.persistence.dge.add_ds(session, flask.current_app.config['ROGER_DATA_FOLDER'], ds_prop) print("Done - added data set with name '%s'" % name)
def add_design(session, design_file, dataset_name, name=None, description=None, sample_groups_file=None, sample_group_levels_file=None, sample_group_pheno_column=None): ds = get_ds(session, dataset_name) name = get_or_guess_name(name, design_file) design = query_design(session, name, dataset_name).one_or_none() if design is not None: raise ROGERUsageError( "Design of data set '%s' with name '%s' already exist" % (dataset_name, name)) design_data = create_design_data( read_table(design_file, sep='\t', index_col=0), ds.pheno_data, name, description, read_array(sample_groups_file, nullable=True), read_array(sample_group_levels_file, nullable=True), sample_group_pheno_column) design_data.design.DataSetID = ds.ID session.add(design_data.design) session.flush() design_data.sample_subset["DesignID"] = design_data.design.ID insert_data_frame(session, design_data.sample_subset, SampleSubset.__table__) session.commit() return name
def add_contrast(session, contrast_file, design_name, dataset_name, name=None, description=None): design = get_design(session, design_name, dataset_name) name = get_or_guess_name(name, contrast_file) if query_contrast(session, name, design_name, dataset_name).one_or_none() is not None: raise ROGERUsageError("Contrast '%s' already exist in '%s'" % (name, design_name)) contrast = Contrast(DesignID=design.ID, Name=name, Description=description, CreatedBy=get_current_user_name(), CreationTime=get_current_datetime()) session.add(contrast) session.flush() contrast_data = read_table(contrast_file, sep='\t', index_col=0) check_contrast_matrix(design.design_matrix.columns, contrast_data) contrast_cols = contrast_data.columns contrast_table = DataFrame({ "ContrastID": contrast.ID, "DesignID": design.ID, "Name": contrast_cols, "Description": contrast_cols, "ColumnData": [ contrast_data[col_name].values.tolist() for col_name in contrast_cols ] }) insert_data_frame(session, contrast_table, ContrastColumn.__table__) session.commit() return name
def add_contrast(contrast_matrix, design, dataset, name, description): name = get_or_guess_name(name, contrast_matrix) print("Adding contrast '%s' to data set '%s' ..." % (name, dataset)) from roger.persistence import db import roger.persistence.dge name = roger.persistence.dge.add_contrast(db.session(), contrast_matrix, design, dataset, name, description) print("Done - added contrast with name '%s'" % name)
def add_design(design_matrix, dataset, name, description, sample_groups, sample_group_levels, sample_group_column): name = get_or_guess_name(name, design_matrix) print("Adding design '%s' to data set '%s' ..." % (name, dataset)) from roger.persistence import db import roger.persistence.dge name = roger.persistence.dge.add_design(db.session(), design_matrix, dataset, name, description, sample_groups, sample_group_levels, sample_group_column) print("Done - added design with name '%s'" % name)
def create_ds(session, ds_type: Type[DataSet], exprs_file, tax_id, symbol_type, pheno_file=None, name=None, normalization_method=None, description=None, xref=None): name = get_or_guess_name(name, exprs_file) # Input checking species_list = list_species(session) if species_list[species_list.TaxID == tax_id].empty: raise ROGERUsageError('Unknown taxon id: %s' % tax_id) if session.query(DataSet).filter( DataSet.Name == name).one_or_none() is not None: raise ROGERUsageError("Data set with name '%s' already exists" % name) exprs_data = parse_gct(file_path=exprs_file) (annotation_data, annotation_version) = annotate(session, exprs_data, tax_id, symbol_type) pheno_data = pd.DataFrame() if pheno_file is not None: pheno_data = read_df(pheno_file) annotated_pheno_data = annotate_ds_pheno_data(exprs_data, pheno_data) return DataSetProperties(ds_type, tax_id, exprs_file, pheno_file, exprs_data, annotated_pheno_data, annotation_data, annotation_version, name, normalization_method, description, xref)