Пример #1
0
def add_ds_ma(norm_exprs_file,
              tax_id,
              symbol_type,
              pheno_file,
              name,
              normalization,
              description,
              xref):
    name = get_or_guess_name(name, norm_exprs_file)

    print("Adding microarray data set '%s' ..." % name)
    from roger.persistence import db
    import roger.logic.dge
    import roger.persistence.dge
    from roger.persistence.schema import MicroArrayDataSet

    session = db.session()

    print("Parsing and annotating data ...")
    ds_prop = roger.logic.dge.create_ds(session,
                                        MicroArrayDataSet,
                                        norm_exprs_file,
                                        tax_id,
                                        symbol_type,
                                        pheno_file,
                                        name,
                                        normalization,
                                        description,
                                        xref)

    print("Persisting data set ...")
    roger.persistence.dge.add_ds(session,
                                 flask.current_app.config['ROGER_DATA_FOLDER'],
                                 ds_prop)
    print("Done - added data set with name '%s'" % name)
Пример #2
0
def add_design(session,
               design_file,
               dataset_name,
               name=None,
               description=None,
               sample_groups_file=None,
               sample_group_levels_file=None,
               sample_group_pheno_column=None):
    ds = get_ds(session, dataset_name)

    name = get_or_guess_name(name, design_file)

    design = query_design(session, name, dataset_name).one_or_none()
    if design is not None:
        raise ROGERUsageError(
            "Design of data set '%s' with name '%s' already exist" %
            (dataset_name, name))

    design_data = create_design_data(
        read_table(design_file, sep='\t',
                   index_col=0), ds.pheno_data, name, description,
        read_array(sample_groups_file, nullable=True),
        read_array(sample_group_levels_file, nullable=True),
        sample_group_pheno_column)

    design_data.design.DataSetID = ds.ID
    session.add(design_data.design)
    session.flush()

    design_data.sample_subset["DesignID"] = design_data.design.ID
    insert_data_frame(session, design_data.sample_subset,
                      SampleSubset.__table__)

    session.commit()
    return name
Пример #3
0
def add_contrast(session,
                 contrast_file,
                 design_name,
                 dataset_name,
                 name=None,
                 description=None):
    design = get_design(session, design_name, dataset_name)

    name = get_or_guess_name(name, contrast_file)

    if query_contrast(session, name, design_name,
                      dataset_name).one_or_none() is not None:
        raise ROGERUsageError("Contrast '%s' already exist in '%s'" %
                              (name, design_name))

    contrast = Contrast(DesignID=design.ID,
                        Name=name,
                        Description=description,
                        CreatedBy=get_current_user_name(),
                        CreationTime=get_current_datetime())
    session.add(contrast)
    session.flush()

    contrast_data = read_table(contrast_file, sep='\t', index_col=0)
    check_contrast_matrix(design.design_matrix.columns, contrast_data)

    contrast_cols = contrast_data.columns
    contrast_table = DataFrame({
        "ContrastID":
        contrast.ID,
        "DesignID":
        design.ID,
        "Name":
        contrast_cols,
        "Description":
        contrast_cols,
        "ColumnData": [
            contrast_data[col_name].values.tolist()
            for col_name in contrast_cols
        ]
    })

    insert_data_frame(session, contrast_table, ContrastColumn.__table__)

    session.commit()
    return name
Пример #4
0
def add_contrast(contrast_matrix,
                 design,
                 dataset,
                 name,
                 description):
    name = get_or_guess_name(name, contrast_matrix)

    print("Adding contrast '%s' to data set '%s' ..." % (name, dataset))
    from roger.persistence import db
    import roger.persistence.dge

    name = roger.persistence.dge.add_contrast(db.session(),
                                              contrast_matrix,
                                              design,
                                              dataset,
                                              name,
                                              description)
    print("Done - added contrast with name '%s'" % name)
Пример #5
0
def add_design(design_matrix,
               dataset,
               name,
               description,
               sample_groups,
               sample_group_levels,
               sample_group_column):
    name = get_or_guess_name(name, design_matrix)

    print("Adding design '%s' to data set '%s' ..." % (name, dataset))
    from roger.persistence import db
    import roger.persistence.dge

    name = roger.persistence.dge.add_design(db.session(),
                                            design_matrix,
                                            dataset,
                                            name,
                                            description,
                                            sample_groups,
                                            sample_group_levels,
                                            sample_group_column)
    print("Done - added design with name '%s'" % name)
Пример #6
0
def create_ds(session,
              ds_type: Type[DataSet],
              exprs_file,
              tax_id,
              symbol_type,
              pheno_file=None,
              name=None,
              normalization_method=None,
              description=None,
              xref=None):
    name = get_or_guess_name(name, exprs_file)

    # Input checking
    species_list = list_species(session)
    if species_list[species_list.TaxID == tax_id].empty:
        raise ROGERUsageError('Unknown taxon id: %s' % tax_id)

    if session.query(DataSet).filter(
            DataSet.Name == name).one_or_none() is not None:
        raise ROGERUsageError("Data set with name '%s' already exists" % name)

    exprs_data = parse_gct(file_path=exprs_file)

    (annotation_data, annotation_version) = annotate(session, exprs_data,
                                                     tax_id, symbol_type)

    pheno_data = pd.DataFrame()
    if pheno_file is not None:
        pheno_data = read_df(pheno_file)

    annotated_pheno_data = annotate_ds_pheno_data(exprs_data, pheno_data)

    return DataSetProperties(ds_type, tax_id, exprs_file, pheno_file,
                             exprs_data, annotated_pheno_data, annotation_data,
                             annotation_version, name, normalization_method,
                             description, xref)