def get_dataframe(
            self, dataframe, acols, nullcols, dbcol,
            globalid, siteid, sitelevels):

        acols = list(acols) if acols is not None else acols
        nullcols = list(nullcols) if acols is not None else nullcols
        dbcol = list(dbcol) if dbcol is not None else dbcol
        sitelevels = list(sitelevels) if sitelevels is not None else sitelevels

        acols = [x.rstrip() for x in acols]
        nullcols = [x.rstrip() for x in nullcols]
        dbcol = [x.rstrip() for x in dbcol]

        # Columns that will be updated later in the
        # program
        try:
            updatedf = hlp.produce_null_df(
                len(dbcol), dbcol, len(sitelevels), 'NA')
        except Exception as e:
            acols = [int(x) for x in acols]
            updatedf = hlp.produce_null_df(
                len(dbcol), dbcol, len(sitelevels), 'NA')
            print(str(e))

        updatedf['siteid'] = sitelevels
        return updatedf
    def get_dataframe(self, dataframe, acols, nullcols, dbcol, globalid,
                      siteid, sitelevels):

        acols = list(acols) if acols is not None else acols
        nullcols = list(nullcols) if acols is not None else nullcols
        dbcol = list(dbcol) if dbcol is not None else dbcol
        sitelevels = list(sitelevels) if sitelevels is not None else sitelevels

        acols = [x.rstrip() for x in acols]
        nullcols = [x.rstrip() for x in nullcols]
        dbcol = [x.rstrip() for x in dbcol]

        # Columns that will be updated later in the
        # program
        try:
            updatedf = hlp.produce_null_df(len(dbcol), dbcol, len(sitelevels),
                                           'NA')
        except Exception as e:
            acols = [int(x) for x in acols]
            updatedf = hlp.produce_null_df(len(dbcol), dbcol, len(sitelevels),
                                           'NA')
            print(str(e))

        updatedf['siteid'] = sitelevels
        return updatedf
    def time_regex(data, col, form, nulls):
        '''
        Method to format the date columns in the raw data
        based on user input. Returns 3 formatted columns
        i.e. (year, month, day) including nulls
        '''

        fields = ['month', 'day', 'year']
        if any(isinstance(i, list) for i in col):
            col = list(chain.from_iterable(col))
        else:
            pass
        print(type(col))
        print(col)

        if len(nulls) > 0:
            nulldf = hlp.produce_null_df(
                len(nulls), nulls, len(data), 'NaN')


        else:
            nulldf = DataFrame()            
        try:
            if col[0] is not None:
                time_list_re = hlp.strip_time(data, col)
            else:
                time_list_re = []

        except Exception as e:
            print(str(e))
            raise AttributeError('Could not strip time format')            
        notnull = [x for x in fields if x not in nulls]

        for i,item in enumerate(form):
            try:
                time_form_list = []
                for j in time_list_re:
                    time_form_list.append(
                        [
                            to_datetime(
                                x, format=form[i]) for x in
                            j
                        ]
                    )
                if len(time_form_list) > 1:
                    timedf = DataFrame(
                        [list(x) for x in zip(
                            *time_form_list)])

                else:
                    timedf = DataFrame(time_form_list[0])                    
                    if len(notnull) == 1:
                        timedf.columns = notnull
                    else:
                        pass
                final = {'formatted': timedf, 'null': nulldf}
                return final
            except Exception as e:
                print(str(e))
                print('Trying different format')
    def time_regex(data, col, form, nulls):
        '''
        Method to format the date columns in the raw data
        based on user input. Returns 3 formatted columns
        i.e. (year, month, day) including nulls
        '''

        fields = ['month', 'day', 'year']
        if any(isinstance(i, list) for i in col):
            col = list(chain.from_iterable(col))
        else:
            pass
        print(type(col))
        print(col)

        if len(nulls) > 0:
            nulldf = hlp.produce_null_df(len(nulls), nulls, len(data), 'NaN')

        else:
            nulldf = DataFrame()
        try:
            if col[0] is not None:
                time_list_re = hlp.strip_time(data, col)
            else:
                time_list_re = []

        except Exception as e:
            print(str(e))
            raise AttributeError('Could not strip time format')
        notnull = [x for x in fields if x not in nulls]

        for i, item in enumerate(form):
            try:
                time_form_list = []
                for j in time_list_re:
                    time_form_list.append(
                        [to_datetime(x, format=form[i]) for x in j])
                if len(time_form_list) > 1:
                    timedf = DataFrame([list(x) for x in zip(*time_form_list)])

                else:
                    timedf = DataFrame(time_form_list[0])
                    if len(notnull) == 1:
                        timedf.columns = notnull
                    else:
                        pass
                final = {'formatted': timedf, 'null': nulldf}
                return final
            except Exception as e:
                print(str(e))
                print('Trying different format')
Exemplo n.º 5
0
def test_site_in_project_key(MergeToUpload, site_handle_corner_case,
                             file_handle_corner_case, meta_handle_corner_case,
                             project_handle_corner_case,
                             taxa_handle_corner_case, time_handle_corner_case,
                             percent_cover_handle_corner_case,
                             covar_handle_corner_case):
    facade = face.Facade()

    facade.input_register(meta_handle_corner_case)
    facade.meta_verify()

    facade.input_register(file_handle_corner_case)
    facade.load_data()

    siteid = site_handle_corner_case.lnedentry['study_site_key']

    facade._data[siteid].replace(
        {
            'C': 'site_jrn_zone_creosotebush',
            'G': 'site_jrn_zone_grassland'
        },
        inplace=True)

    facade.input_register(site_handle_corner_case)
    sitedirector = facade.make_table('siteinfo')
    study_site_table = sitedirector._availdf

    print('study_site_table (test): ', study_site_table)

    facade.create_log_record('study_site_table')
    lter = meta_handle_corner_case.lnedentry['lter']
    ltercol = produce_null_df(1, ['lter_table_fkey'], len(study_site_table),
                              lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    print('study_site_table: ', study_site_table)
    facade.push_tables['study_site_table'] = study_site_table

    sitelevels = facade._data[siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    facade._valueregister['siteid'] = siteid

    facade.input_register(project_handle_corner_case)
    maindirector = facade.make_table('maininfo')
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)

    facade.push_tables['project_table'] = project_table
    facade.create_log_record('project_table')

    facade.input_register(taxa_handle_corner_case)
    taxadirector = facade.make_table('taxainfo')
    taxa_table = taxadirector._availdf
    facade.push_tables['taxa_table'] = taxa_table
    facade.create_log_record('taxa_table')

    facade.input_register(time_handle_corner_case)
    timetable = tparse.TimeParse(facade._data,
                                 time_handle_corner_case.lnedentry).formater()
    facade.push_tables['timetable'] = timetable
    facade.create_log_record('timetable')

    facade.input_register(percent_cover_handle_corner_case)
    rawdirector = facade.make_table('rawinfo')
    rawtable = rawdirector._availdf
    print(rawtable)
    facade.push_tables[percent_cover_handle_corner_case.tablename] = rawtable
    facade.create_log_record(percent_cover_handle_corner_case.tablename)

    facade.input_register(covar_handle_corner_case)
    covartable = ddf.DictionaryDataframe(
        facade._data,
        covar_handle_corner_case.lnedentry['columns']).convert_records()
    facade.push_tables['covariates'] = covartable
    facade.create_log_record('covartable')

    facade._valueregister['globalid'] = meta_handle_corner_case.lnedentry[
        'globalid']
    facade._valueregister['lter'] = meta_handle_corner_case.lnedentry['lter']
    facade._valueregister['siteid'] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x + '_derived' for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    print('merge class obs_time columns: ', observation_time_df.columns)
    print('merge class project table: ', project_table)

    #    try:
    #        study_site_table.to_sql(
    #            'study_site_table',
    #            orm.conn, if_exists='append', index=False)
    #    except Exception as e:
    #        print('Sites in db: ', str(e))
    project_table['lter_project_fkey'] = facade._valueregister['lter']
    project_table.to_sql('project_table',
                         orm.conn,
                         if_exists='append',
                         index=False)

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation=facade._valueregister['lter'],
        studysitelabel=siteid,
        studysitelevels=sitelevels)

    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table,
        siteinprojkeydf=site_in_project_key_df,
        sitelabel=siteid)

    taxa_column_in_push_table = [
        x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    taxa_column_in_data = [
        x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name='{}'.format(
            re.sub('_table', '', facade._inputs['rawinfo'].tablename)),
        covariate_dataframe=covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table)

    obs_columns_in_data = [
        x[1] for x in list(facade._inputs['rawinfo'].lnedentry.items())
    ]
    obs_columns_in_push_table = [
        x[0] for x in list(facade._inputs['rawinfo'].lnedentry.items())
    ]
    merge_object.update_project_table(
        spatial_rep_columns_from_og_df=obs_columns_in_data,
        spatial_rep_columns_from_formated_df=obs_columns_in_push_table)
    def get_dataframe(self, dataframe, acols, nullcols, keycols, dbcol,
                      globalid, siteid, sitelevels):

        acols = list(acols) if acols is not None else acols
        nullcols = list(nullcols) if acols is not None else nullcols
        keycols = list(keycols) if keycols is not None else keycols
        dbcol = list(dbcol) if dbcol is not None else dbcol
        sitelevels = list(sitelevels) if sitelevels is not None else sitelevels

        [
            '{}_observation'.format(
                re.sub('_table', '', self._inputs.tablename)) for x in acols
            if x == 'unitobs'
        ]

        [
            '{}_observation'.format(
                re.sub('_table', '', self._inputs.tablename)) for x in nullcols
            if x == 'unitobs'
        ]

        print('obs acols: ', acols)
        print('obs nullcols: ', nullcols)

        if self._inputs.tablename == 'individual_table':
            try:
                acols.remove('')
            except Exception as e:
                print('no individual column to remove: ', str(e))
        else:
            pass

        try:
            acols = [x.rstrip() for x in acols]
        except Exception as e:
            acols = [int(x) for x in acols]
            uniquesubset = dataframe[acols]
            print(str(e))

        # Insert siteid column and remove
        # spatial rep 1 from null columns (already have data
        # in siteid column of raw data)

        acols.insert(0, siteid)
        nullcols.remove('spatial_replication_level_1')
        nullcols.remove('{}_observation'.format(
            re.sub('_table', '', self._inputs.tablename)))

        columns_to_be_added_later = ['year', 'month', 'day', 'covariates']
        [nullcols.remove(x) for x in columns_to_be_added_later]
        [nullcols.remove(x) for x in keycols]

        if self._inputs.foreignmergeddata is None:
            pass
        else:
            columns_where_data_is_from_query = [
                'taxa_{}_fkey'.format(
                    re.sub('_table', '', self._inputs.tablename)),
                'site_in_project_{}_fkey'.format(
                    re.sub('_table', '', self._inputs.tablename))
            ]
            [acols.append(x) for x in columns_where_data_is_from_query]

        uniquesubset = dataframe[acols]
        print('uniquesub: ', uniquesubset)
        nullsubset = hlp.produce_null_df(ncols=len(nullcols),
                                         colnames=nullcols,
                                         dflength=len(uniquesubset),
                                         nullvalue='NA')
        print('null subset: ', nullsubset)
        _concat = concat([uniquesubset, nullsubset], axis=1).reset_index()
        final = _concat.reset_index()

        if self._inputs.tablename == 'individual_table':
            final['individual_observation'] = 1
            print('should have added individual observation')
        else:
            pass
        print('final build class columns: ', final.columns)
        try:
            fomated_column_to_change = list(self._inputs.lnedentry.keys())
            fomated_column_to_change.append('spatial_replication_level_1')
            for index, item in enumerate(fomated_column_to_change):
                if item == 'unitobs':
                    fomated_column_to_change[index] = '{}_observation'.format(
                        re.sub('_table', '', self._inputs.tablename))

            original_column_names_to_change = list(
                self._inputs.lnedentry.values())
            original_column_names_to_change.append(siteid)
            for i, item in enumerate(fomated_column_to_change):
                final.rename(
                    columns={original_column_names_to_change[i]: item},
                    inplace=True)

            return final

        except Exception as e:
            print(str(e))
            raise AttributeError('Column renaming error')
    def get_dataframe(self, dataframe, acols, nullcols, keycols, dbcol,
                      globalid, siteid, sitelevels):

        acols = list(acols) if acols is not None else acols
        nullcols = list(nullcols) if acols is not None else nullcols
        keycols = list(keycols) if keycols is not None else keycols
        dbcol = list(dbcol) if dbcol is not None else dbcol
        sitelevels = list(sitelevels) if sitelevels is not None else sitelevels
        dataframe = dataframe

        try:
            acols = [x.rstrip() for x in acols]
        except Exception as e:
            acols = [int(x) for x in acols]
            uniquesubset = dataframe[acols]
            print(str(e))

        try:
            [dbcol.remove(x) for x in keycols]
        except Exception as e:
            print(str(e))

        try:
            [nullcols.remove(x) for x in keycols]
        except Exception as e:
            print(str(e))

        print('SELF INPUTS: ', self._inputs.checks)
        print('AVAILABLE COLUMNS: ', acols)
        print('DB COLUMNS: ', dbcol)
        print('NULL COLUMNS: ', nullcols)
        print('DF COLUMNS: ', dataframe.columns.values.tolist())

        if self._inputs.checks['taxacreate'] is True:
            dfcol = dataframe.columns.values.tolist()
            columns_create = [x for x in acols if x not in dfcol]
            print('CREATE :', columns_create)
            for i in columns_create:
                dataframe.loc[:, i] = i
            print('DF COLUMNS (added): ', dataframe.columns.values.tolist())

        else:
            pass

        dbcolrevised = [x for x in dbcol if x not in nullcols]
        print('DB COLUMN REVISED: ', dbcolrevised)
        uniquesubset_site_list = []
        for i, item in enumerate(sitelevels):
            uniquesubset = dataframe[dataframe[siteid] == item].copy()
            try:
                uniquesubset = uniquesubset[acols].copy()
            except Exception as e:
                print(str(e))
            for j, rename_item in enumerate(dbcolrevised):
                uniquesubset.rename(columns={acols[j]: rename_item},
                                    inplace=True)

            unique = uniquesubset.drop_duplicates().copy()
            unique = unique.reset_index()
            sitelevel = hlp.produce_null_df(ncols=len(unique),
                                            colnames=[siteid],
                                            dflength=len(unique),
                                            nullvalue=item)
            nullsubset = hlp.produce_null_df(ncols=len(nullcols),
                                             colnames=nullcols,
                                             dflength=len(unique),
                                             nullvalue='NA')
            unique = concat([unique, nullsubset, sitelevel], axis=1)
            uniquesubset_site_list.append(unique)

        final = uniquesubset_site_list[0]
        print('final before loop: ', final)
        for i, item in enumerate(uniquesubset_site_list):
            if i > 0:
                final = concat([final, item], ignore_index=True)
            else:
                pass

        print('past subsetting sites')
        print(final.columns)
        dbcol.append(siteid)
        return final[dbcol].copy()
def test_site_in_project_key_number_two(
    MergeToUpload,
    site_handle2,
    file_handle2,
    meta_handle2,
    project_handle2,
    taxa_handle2,
    time_handle2,
    count_handle2,
    covar_handle2,
):
    facade = face.Facade()

    facade.input_register(meta_handle2)
    facade.meta_verify()

    facade.input_register(file_handle2)
    facade.load_data()

    facade.input_register(site_handle2)
    sitedirector = facade.make_table("siteinfo")
    study_site_table = sitedirector._availdf

    siteid = site_handle2.lnedentry["study_site_key"]
    sitelevels = facade._data[siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    print("test2 sitelevels: ", sitelevels)
    facade._valueregister["siteid"] = siteid

    print("study_site_table (test): ", study_site_table)

    facade.create_log_record("study_site_table")
    lter = meta_handle2.lnedentry["lter"]
    ltercol = produce_null_df(1, ["lter_table_fkey"], len(study_site_table), lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    study_site_table_og_col = study_site_table.columns.values.tolist()

    study_site_table_single = study_site_table.iloc[0, :]

    study_site_table_single_df = DataFrame([study_site_table_single])
    study_site_table_single_df.columns = study_site_table_og_col

    print("study site single: ", study_site_table_single)

    study_site_table_single_df.loc[0, "study_site_key"] = "NULL"

    print("study_site_table: ", study_site_table_single_df)

    facade.push_tables["study_site_table"] = study_site_table_single_df

    facade.input_register(project_handle2)
    maindirector = facade.make_table("maininfo")
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)

    facade.push_tables["project_table"] = project_table
    facade.create_log_record("project_table")

    facade.input_register(taxa_handle2)
    taxadirector = facade.make_table("taxainfo")

    taxa_table = taxadirector._availdf
    facade.push_tables["taxa_table"] = taxa_table
    print("taxa columns after make taxa table: ", taxa_table.columns)

    facade.create_log_record("taxa_table")

    print("taxa columns before time_table: ", taxa_table.columns)

    facade.input_register(time_handle2)
    timetable = tparse.TimeParse(facade._data, time_handle2.lnedentry).formater()
    facade.push_tables["timetable"] = timetable
    facade.create_log_record("timetable")

    print("taxa columns before count_table: ", taxa_table.columns)
    facade.input_register(count_handle2)
    rawdirector = facade.make_table("rawinfo")
    rawtable = rawdirector._availdf
    print(rawtable)
    facade.push_tables[count_handle2.tablename] = rawtable
    facade.create_log_record(count_handle2.tablename)

    print("taxa columns before covar_table: ", taxa_table.columns)
    facade.input_register(covar_handle2)
    covartable = ddf.DictionaryDataframe(facade._data, covar_handle2.lnedentry["columns"]).convert_records()
    facade.push_tables["covariates"] = covartable
    facade.create_log_record("covartable")

    facade._valueregister["globalid"] = meta_handle2.lnedentry["globalid"]
    facade._valueregister["lter"] = meta_handle2.lnedentry["lter"]
    facade._valueregister["siteid"] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x + "_derived" for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    print("merge class obs_time columns: ", observation_time_df.columns)
    print("merge class project table: ", project_table)

    study_site_table.to_sql("study_site_table", orm.conn, if_exists="append", index=False)
    project_table["lter_project_fkey"] = facade._valueregister["lter"]
    project_table.to_sql("project_table", orm.conn, if_exists="append", index=False)

    print("taxa columns before site_in_proj method: ", taxa_table.columns)

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation=facade._valueregister["lter"],
        studysitelabel=siteid,
        studysitelevels=sitelevels,
    )

    print("taxa columns before user taxa merge method: ", taxa_table.columns)
    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid
    )

    taxa_column_in_data = [x[0] for x in list(facade._inputs["taxainfo"].lnedentry.items())]

    taxa_column_in_push_table = [x[1] for x in list(facade._inputs["taxainfo"].lnedentry.items())]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name="{}".format(re.sub("_table", "", facade._inputs["rawinfo"].tablename)),
        covariate_dataframe=covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table,
    )
def test_site_in_project_key(
        MergeToUpload, site_handle_3_biomass, file_handle_3_biomass,
        meta_handle_3_biomass, project_handle_3_biomass, taxa_handle_3_biomass,
        time_handle_3_biomass, count_handle_3_biomass, covar_handle_3_biomass):
    facade = face.Facade()

    facade.input_register(meta_handle_3_biomass)
    facade.meta_verify()

    facade.input_register(file_handle_3_biomass)
    facade.load_data()

    facade.input_register(site_handle_3_biomass)
    sitedirector = facade.make_table('siteinfo')
    study_site_table = sitedirector._availdf

    print('study_site_table (test): ', study_site_table)

    facade.create_log_record('study_site_table')
    lter = meta_handle_3_biomass.lnedentry['lter']
    ltercol = produce_null_df(1, [
        'lter_table_fkey'], len(study_site_table), lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    print('study_site_table: ', study_site_table)
    facade.push_tables['study_site_table'] = study_site_table
    
    siteid = site_handle_3_biomass.lnedentry['study_site_key']
    sitelevels = facade._data[
        siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    facade._valueregister['siteid'] = siteid

    facade.input_register(project_handle_3_biomass)
    maindirector = facade.make_table('maininfo')
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)
    
    facade.push_tables['project_table'] = project_table
    facade.create_log_record('project_table')
    
    facade.input_register(taxa_handle_3_biomass)
    taxadirector = facade.make_table('taxainfo')
    taxa_table = taxadirector._availdf
    facade.push_tables['taxa_table'] = taxa_table
    facade.create_log_record('taxa_table')
    
    facade.input_register(time_handle_3_biomass)
    timetable = tparse.TimeParse(
        facade._data, time_handle_3_biomass.lnedentry).formater()
    facade.push_tables['timetable'] = timetable
    facade.create_log_record('timetable')

    facade.input_register(count_handle_3_biomass)
    rawdirector = facade.make_table('rawinfo')
    rawtable = rawdirector._availdf
    print('rawtable facade tmaker: ', rawtable)
    facade.push_tables[count_handle_3_biomass.tablename] = rawtable
    facade.create_log_record(count_handle_3_biomass.tablename)

    facade.input_register(covar_handle_3_biomass)
    covartable = ddf.DictionaryDataframe(
        facade._data,
        covar_handle_3_biomass.lnedentry['columns']).convert_records()
    facade.push_tables['covariates'] = covartable
    facade.create_log_record('covartable')

    facade._valueregister['globalid'] = meta_handle_3_biomass.lnedentry['globalid']
    facade._valueregister['lter'] = meta_handle_3_biomass.lnedentry['lter']
    facade._valueregister['siteid'] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x+'_derived' for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable,observationdf], axis=1 )
    print('merge class obs_time df: ', observation_time_df)
    print('merge class obs_time columns: ', observation_time_df.columns)
    print('merge class project table: ', project_table)

    try:
        study_site_table.to_sql(
            'study_site_table',
            orm.conn, if_exists='append', index=False)
    except Exception as e:
        print('Sites in db: ', str(e))
    project_table['lter_project_fkey'] = facade._valueregister['lter']
    project_table.to_sql(
        'project_table', orm.conn,
        if_exists='append', index=False
    )

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation= facade._valueregister['lter'],
        studysitelabel=siteid,
        studysitelevels=sitelevels
    )

    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table,
        siteinprojkeydf=site_in_project_key_df,
        sitelabel=siteid
    )

    taxa_column_in_data = [
        x[1] for x in 
        list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    taxa_column_in_push_table = [
        x[0] for x in 
        list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name=
        '{}'.format(
                    re.sub('_table', '', facade._inputs['rawinfo'].tablename)),
        covariate_dataframe = covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table
    )

    obs_columns_in_data = [
        x[1] for x in 
        list(facade._inputs['rawinfo'].lnedentry.items())
    ]
    obs_columns_in_push_table = [
        x[0] for x in 
        list(facade._inputs['rawinfo'].lnedentry.items())
    ]
    merge_object.update_project_table(
        spatial_rep_columns_from_og_df=obs_columns_in_data,
        spatial_rep_columns_from_formated_df=obs_columns_in_push_table)
        def save_close(self):
            self.update_data()
            session = orm.Session()
            sitecheck = session.query(
                orm.Sitetable.siteid).order_by(
                    orm.Sitetable.siteid)
            session.close()
            sitecheckdf = read_sql(
                sitecheck.statement, sitecheck.session.bind)
            changed_df = self.sitetablemodel.data(
                None, QtCore.Qt.UserRole)
            changed_site_list = changed_df['siteid'].values.tolist()

            if sitecheckdf is not None:
                if len(sitecheckdf) == 0:
                   checker = True
                else:
                    records_entered = sitecheckdf[
                        'siteid'].values.tolist()
                    check = [
                        x for x in
                        list(set(records_entered)) if
                        x in changed_site_list]
                    checker = (len(check) == 0)
            else:
                checker = True

            if checker is True:
                pass
            else:
                self._log.debug('SiteId present under different LTER')
                self.error.showMessage(
                    'Site abbreviations already in database ' +
                    'from an different LTER. Please modify ' +
                    'site abbreviations.')
                raise AttributeError(
                    'SiteID already present under different LTER')

            self.save_data = self.sitetablemodel.data(
                None, QtCore.Qt.UserRole)

            # Updating  site levels
            self.facade.register_site_levels(
                self.facade._data[
                    self.siteloc[
                        'siteid']].drop_duplicates().values.tolist())

            if len(self.save_data) == 0:
                self.save_data= self.save_data.append(
                    DataFrame(
                        {
                            'siteid':'NULL',
                            'lat': 'nan',
                            'lng': 'nan',
                            'descript': 'NULL'
                        }, index=[0])
                )
            else:
                pass

            lterid_df = hlp.produce_null_df(
                1, ['lterid'], len(self.save_data), self.lter)
            print(lterid_df)

            self.save_data = concat(
                [self.save_data, lterid_df]
                , axis=1).reset_index(drop=True)
            print(self.save_data)
            self.facade.push_tables['sitetable'] = self.save_data

            hlp.write_column_to_log(
                self.sitelned, self._log, 'sitetable_c')

            oldsitetable = hlp.produce_null_df(
                len(self.save_data.columns),
                self.save_data.columns.values.tolist(),
                len(self.save_data),
                'nan'
            )
            hlp.updated_df_values(
                oldsitetable, self.save_data, self._log, 'sitetable'
            )

            self.climatesite_unlocks.emit(self.facade._data)
            self._log.debug(
                'facade site levels' +
                ' '.join(self.facade._valueregister['sitelevels']))
            self.submit_change()
            self.close()
    def get_dataframe(
            self, dataframe, acols, nullcols, keycols, dbcol,
            globalid, siteid, sitelevels):

        acols = list(acols) if acols is not None else acols
        nullcols = list(nullcols) if acols is not None else nullcols
        keycols = list(keycols) if keycols is not None else keycols
        dbcol = list(dbcol) if dbcol is not None else dbcol
        sitelevels = list(sitelevels) if sitelevels is not None else sitelevels

        [
            '{}_observation'.format(
                re.sub('_table', '', self._inputs.tablename))
            for x in acols if x == 'unitobs']

        [
            '{}_observation'.format(
                re.sub('_table', '', self._inputs.tablename))
            for x in nullcols if x == 'unitobs']

        print('obs acols: ', acols)
        print('obs nullcols: ', nullcols)

        if self._inputs.tablename == 'individual_table':
            try:
                acols.remove('')
            except Exception as e:
                print('no individual column to remove: ', str(e))
        else:
            pass

        try:
            acols = [x.rstrip() for x in acols]
        except Exception as e:
            acols = [int(x) for x in acols]
            uniquesubset = dataframe[acols]
            print(str(e))

        # Insert siteid column and remove
        # spatial rep 1 from null columns (already have data
        # in siteid column of raw data)


        acols.insert(0, siteid)
        nullcols.remove('spatial_replication_level_1')
        nullcols.remove(
            '{}_observation'.format(
                re.sub('_table', '', self._inputs.tablename)
            ))

        columns_to_be_added_later = [
            'year', 'month', 'day', 'covariates']
        [nullcols.remove(x) for x in columns_to_be_added_later]
        [nullcols.remove(x) for x in keycols]

        if self._inputs.foreignmergeddata is None:
            pass
        else:
            columns_where_data_is_from_query = [
                'taxa_{}_fkey'.format(
                    re.sub('_table', '', self._inputs.tablename)),
                'site_in_project_{}_fkey'.format(
                    re.sub('_table', '', self._inputs.tablename)
                )
            ]
            [acols.append(x) for x in columns_where_data_is_from_query]


        uniquesubset = dataframe[acols]
        print('uniquesub: ', uniquesubset)
        nullsubset = hlp.produce_null_df(
            ncols=len(nullcols),
            colnames=nullcols,
            dflength=len(uniquesubset),
            nullvalue='NA')
        print('null subset: ', nullsubset)
        _concat = concat(
            [uniquesubset, nullsubset], axis=1).reset_index(
                )
        final = _concat.reset_index()

        if self._inputs.tablename == 'individual_table':
            final['individual_observation'] = 1
            print('should have added individual observation')
        else:
            pass
        print('final build class columns: ', final.columns)
        try: 
            fomated_column_to_change = list(self._inputs.lnedentry.keys())
            fomated_column_to_change.append('spatial_replication_level_1')
            for index, item in enumerate(fomated_column_to_change):
                if item == 'unitobs':
                    fomated_column_to_change[index] = '{}_observation'.format(
                        re.sub(
                            '_table', '', self._inputs.tablename))

            original_column_names_to_change = list(
                self._inputs.lnedentry.values())
            original_column_names_to_change.append(siteid)
            for i, item in enumerate(fomated_column_to_change):
                final.rename(
                    columns={
                        original_column_names_to_change[i]: item},
                    inplace=True)


            return final

        except Exception as e:
            print(str(e))
            raise AttributeError('Column renaming error')
    def get_dataframe(
            self, dataframe, acols, nullcols, keycols, dbcol,
            globalid, siteid, sitelevels):

        acols = list(acols) if acols is not None else acols
        nullcols = list(nullcols) if acols is not None else nullcols
        keycols = list(keycols) if keycols is not None else keycols
        dbcol = list(dbcol) if dbcol is not None else dbcol
        sitelevels = list(sitelevels) if sitelevels is not None else sitelevels

        try:
            acols = [x.rstrip() for x in acols]
        except Exception as e:
            acols = [int(x) for x in acols]
            uniquesubset = dataframe[acols]
            print(str(e))


        try:
            [dbcol.remove(x) for x in keycols]
        except Exception as e:
            print(str(e))

        try:
            [nullcols.remove(x) for x in keycols]
        except Exception as e:
            print(str(e))

        print('SELF INPUTS: ', self._inputs.checks)
        print('AVAILABLE COLUMNS: ', acols)
        print('DB COLUMNS: ', dbcol)
        print('NULL COLUMNS: ', nullcols)
        print('DF COLUMNS: ', dataframe.columns.values.tolist())


        if self._inputs.checks['taxacreate'] is True:
            dfcol = dataframe.columns.values.tolist()
            columns_create = [x for x in acols if x not in dfcol]
            print('CREATE :', columns_create)
            for i in columns_create:
                dataframe.loc[:, i] = i
            print('DF COLUMNS (added): ', dataframe.columns.values.tolist())

        else:
            pass

        dbcolrevised = [x for x in dbcol if x not in nullcols]
        print('DB COLUMN REVISED: ', dbcolrevised)
        uniquesubset_site_list = []
        for i,item in enumerate(sitelevels):                
            uniquesubset = dataframe[dataframe[siteid]==item].copy()
            try:
                uniquesubset = uniquesubset[acols].copy()
            except Exception as e:
                print(str(e))
            for j, rename_item in enumerate(dbcolrevised):
                uniquesubset.rename(
                    columns={acols[j]: rename_item},
                    inplace=True)

            unique = uniquesubset.drop_duplicates().copy()
            unique = unique.reset_index()
            sitelevel = hlp.produce_null_df(
                ncols=len(unique),
                colnames=[siteid],
                dflength=len(unique),
                nullvalue=item)
            nullsubset = hlp.produce_null_df(
                ncols=len(nullcols),
                colnames=nullcols,
                dflength=len(unique),
                nullvalue='NA')
            unique = concat(
                [unique, nullsubset, sitelevel], axis=1)
            uniquesubset_site_list.append(unique)

        final = uniquesubset_site_list[0]
        print('final before loop: ', final)
        for i, item in enumerate(uniquesubset_site_list):
            if i > 0:
                final = concat([final, item], ignore_index=True)
            else:
                pass

        print('past subsetting sites')
        print(final.columns)
        dbcol.append(siteid)
        return final[dbcol].copy()
    def get_dataframe(
            self, dataframe, acols, nullcols, keycols, dbcol,
            globalid, siteid, sitelevels):
        '''
        Method to concatenate a study_site_table
        based on informatoin supplied by the user (acols),
        expected columns in table (dbcol), 
        Columns to be filled with NA (nullcols),
        and the globalid, siteid, and unique site levels

        acols: columns returned from the GUI (i.e. line edit entries)

        dbcol: all columns within the table

        nullcols: all columns within the table that HAVE to have
        NA's generated by the table builder

        keycols: primary and foreign keys in the table 
        (Typically what are removed from the nullcol list)
        '''
        acols = list(acols) if acols is not None else acols
        nullcols = list(nullcols) if acols is not None else nullcols
        keycols = list(keycols) if keycols is not None else keycols
        dbcol = list(dbcol) if dbcol is not None else dbcol
        sitelevels = list(sitelevels) if sitelevels is not None else sitelevels

        print('acols before: ', acols)
        print('nullcols before: ', nullcols)
        print('dbcol before: ', dbcol)

        try:
            acols = [x.rstrip() for x in acols]
        except Exception as e:
            acols = [int(x) for x in acols]
            uniquesubset = dataframe[acols]
            print(str(e))

        try:
            remove_from_null = ['lter_table_fkey']
            [nullcols.remove(x) for x in remove_from_null]
        except Exception as e:
            print(str(e))
        try:
            remove_known_fkey = ['lter_table_fkey']
            [dbcol.remove(x) for x in remove_known_fkey]
        except Exception as e:
            print(str(e))
        try:
            lat_lng_null_list = ['lat_study_site', 'lng_study_site']
            [nullcols.remove(x) for x in lat_lng_null_list]
        except Exception as e:
            print(str(e))


        print('acols after: ', acols)
        print('nullcols after: ', nullcols)
        print('dbcol after: ', dbcol)

        uniquesubset = dataframe[acols]
        uniquesubset.columns = ['study_site_key']
        nullcols_non_numeric = hlp.produce_null_df(
            ncols=len(nullcols),
            colnames=nullcols,
            dflength=len(uniquesubset),
            nullvalue='NA')

        nullcols_numeric = hlp.produce_null_df(
            ncols=len(lat_lng_null_list),
            colnames=lat_lng_null_list,
            dflength=len(uniquesubset),
            nullvalue='-99999')

        _concat = concat(
            [uniquesubset, nullcols_non_numeric, nullcols_numeric],
            axis=1).reset_index(drop=True)
        final = _concat.drop_duplicates().reset_index(drop=True)

        return final
    def get_dataframe(
            self, dataframe, acols, nullcols, dbcol,
            globalid, siteid, sitelevels):

        acols = list(acols)
        nullcols = list(nullcols)
        keycols = list(keycols)
        dbcol = list(dbcol)
        sitelevels = list(sitelevels)

        acols = [x.rstrip() for x in acols]
        nullcols = [x.rstrip() for x in nullcols]
        dbcol = [x.rstrip() for x in dbcol]

        col_booleans = list(self._inputs.checks.values())
        col_names = list(self._inputs.checks.keys())
        acols = [
            x.rstrip() for x,y in zip(acols, col_booleans)
            if y is False]
        acols_rename = [
            x.rstrip() for x,y in zip(col_names, col_booleans)
            if y is False]
        nullcols = [
            x.rstrip() for x,y in zip(col_names, col_booleans)
            if y is True]
        dbcol.remove('stationid')

        for i in dbcol:
            if i not in nullcols:
                nullcols.append(i)
            else:
                pass

        print('siteid: ', siteid)
        print('col bools: ', col_booleans)
        print('avaialable cols: ', acols)
        print('null cols: ', nullcols)
        print('db cols: ', dbcol)

        print('dataframe climate build: ', dataframe)

        acols.append(siteid)
        try:
            uniquesubset = dataframe[acols]
        except Exception as e:
            acols = [int(x) for x in acols]
            uniquesubset = dataframe[acols]
            print(str(e))

        nullsubset = hlp.produce_null_df(
            ncols=len(nullcols),
            colnames=nullcols,
            dflength=len(uniquesubset),
            nullvalue='NA')
        print('uq subset build: ', uniquesubset)
        _concat =  concat(
            [uniquesubset, nullsubset], axis=1).reset_index(
                )
        final = _concat.reset_index() 

        try:
            print('build siteid: ', siteid)
            acols_rename.append('stationid')
            for i,item in enumerate(acols_rename):
                final.rename(
                    columns={acols[i]:item}, inplace=True)

            print('final build class: ', final.columns)
            return final

        except Exception as e:
            print(str(e))
            raise AttributeError('Column renaming error')
    def save_close(self):
        self.update_data()
        session = orm.Session()
        sitecheck = session.query(orm.Sitetable.siteid).order_by(
            orm.Sitetable.siteid)
        session.close()
        sitecheckdf = read_sql(sitecheck.statement, sitecheck.session.bind)
        changed_df = self.sitetablemodel.data(None, QtCore.Qt.UserRole)
        changed_site_list = changed_df['siteid'].values.tolist()

        if sitecheckdf is not None:
            if len(sitecheckdf) == 0:
                checker = True
            else:
                records_entered = sitecheckdf['siteid'].values.tolist()
                check = [
                    x for x in list(set(records_entered))
                    if x in changed_site_list
                ]
                checker = (len(check) == 0)
        else:
            checker = True

        if checker is True:
            pass
        else:
            self._log.debug('SiteId present under different LTER')
            self.error.showMessage('Site abbreviations already in database ' +
                                   'from an different LTER. Please modify ' +
                                   'site abbreviations.')
            raise AttributeError('SiteID already present under different LTER')

        self.save_data = self.sitetablemodel.data(None, QtCore.Qt.UserRole)

        # Updating  site levels
        self.facade.register_site_levels(self.facade._data[
            self.siteloc['siteid']].drop_duplicates().values.tolist())

        if len(self.save_data) == 0:
            self.save_data = self.save_data.append(
                DataFrame(
                    {
                        'siteid': 'NULL',
                        'lat': 'nan',
                        'lng': 'nan',
                        'descript': 'NULL'
                    },
                    index=[0]))
        else:
            pass

        lterid_df = hlp.produce_null_df(1, ['lterid'], len(self.save_data),
                                        self.lter)
        print(lterid_df)

        self.save_data = concat([self.save_data, lterid_df],
                                axis=1).reset_index(drop=True)
        print(self.save_data)
        self.facade.push_tables['sitetable'] = self.save_data

        hlp.write_column_to_log(self.sitelned, self._log, 'sitetable_c')

        oldsitetable = hlp.produce_null_df(
            len(self.save_data.columns),
            self.save_data.columns.values.tolist(), len(self.save_data), 'nan')
        hlp.updated_df_values(oldsitetable, self.save_data, self._log,
                              'sitetable')

        self.climatesite_unlocks.emit(self.facade._data)
        self._log.debug('facade site levels' +
                        ' '.join(self.facade._valueregister['sitelevels']))
        self.submit_change()
        self.close()
def test_site_in_project_key(
    MergeToUpload,
    site_handle_4_percent_cover,
    file_handle_4_percent_cover,
    meta_handle_4_percent_cover,
    project_handle_4_percent_cover,
    taxa_handle_4_percent_cover,
    time_handle_4_percent_cover,
    biomass_handle_4_percent_cover,
    covar_handle_4_percent_cover,
):
    facade = face.Facade()

    facade.input_register(meta_handle_4_percent_cover)
    facade.meta_verify()

    facade.input_register(file_handle_4_percent_cover)
    facade.load_data()

    facade.input_register(site_handle_4_percent_cover)
    sitedirector = facade.make_table("siteinfo")
    study_site_table = sitedirector._availdf

    print("study_site_table (test): ", study_site_table)

    facade.create_log_record("study_site_table")
    lter = meta_handle_4_percent_cover.lnedentry["lter"]
    ltercol = produce_null_df(1, ["lter_table_fkey"], len(study_site_table), lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    print("study_site_table: ", study_site_table)
    facade.push_tables["study_site_table"] = study_site_table

    siteid = site_handle_4_percent_cover.lnedentry["study_site_key"]
    sitelevels = facade._data[siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    facade._valueregister["siteid"] = siteid

    facade.input_register(project_handle_4_percent_cover)
    maindirector = facade.make_table("maininfo")
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)

    facade.push_tables["project_table"] = project_table
    facade.create_log_record("project_table")

    facade.input_register(taxa_handle_4_percent_cover)
    taxadirector = facade.make_table("taxainfo")
    taxa_table = taxadirector._availdf
    facade.push_tables["taxa_table"] = taxa_table
    facade.create_log_record("taxa_table")

    facade.input_register(time_handle_4_percent_cover)
    timetable = tparse.TimeParse(facade._data, time_handle_4_percent_cover.lnedentry).formater()
    facade.push_tables["timetable"] = timetable
    facade.create_log_record("timetable")

    facade.input_register(biomass_handle_4_percent_cover)
    rawdirector = facade.make_table("rawinfo")
    rawtable = rawdirector._availdf
    print(rawtable)
    facade.push_tables[biomass_handle_4_percent_cover.tablename] = rawtable
    facade.create_log_record(biomass_handle_4_percent_cover.tablename)

    facade.input_register(covar_handle_4_percent_cover)
    covartable = ddf.DictionaryDataframe(
        facade._data, covar_handle_4_percent_cover.lnedentry["columns"]
    ).convert_records()
    facade.push_tables["covariates"] = covartable
    facade.create_log_record("covartable")

    facade._valueregister["globalid"] = meta_handle_4_percent_cover.lnedentry["globalid"]
    facade._valueregister["lter"] = meta_handle_4_percent_cover.lnedentry["lter"]
    facade._valueregister["siteid"] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x + "_derived" for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    print("merge class obs_time columns: ", observation_time_df.columns)
    print("merge class project table: ", project_table)

    try:
        study_site_table.to_sql("study_site_table", orm.conn, if_exists="append", index=False)
    except Exception as e:
        print("Sites in db: ", str(e))
    project_table["lter_project_fkey"] = facade._valueregister["lter"]
    project_table.to_sql("project_table", orm.conn, if_exists="append", index=False)

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation=facade._valueregister["lter"],
        studysitelabel=siteid,
        studysitelevels=sitelevels,
    )

    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table, siteinprojkeydf=site_in_project_key_df, sitelabel=siteid
    )

    taxa_column_in_push_table = [x[0] for x in list(facade._inputs["taxainfo"].lnedentry.items())]

    taxa_column_in_data = [x[1] for x in list(facade._inputs["taxainfo"].lnedentry.items())]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name="{}".format(re.sub("_table", "", facade._inputs["rawinfo"].tablename)),
        covariate_dataframe=covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table,
    )

    obs_columns_in_data = [x[1] for x in list(facade._inputs["rawinfo"].lnedentry.items())]
    obs_columns_in_push_table = [x[0] for x in list(facade._inputs["rawinfo"].lnedentry.items())]
    merge_object.update_project_table(
        spatial_rep_columns_from_og_df=obs_columns_in_data,
        spatial_rep_columns_from_formated_df=obs_columns_in_push_table,
    )
    def get_dataframe(self, dataframe, acols, nullcols, dbcol, globalid,
                      siteid, sitelevels):

        acols = list(acols)
        nullcols = list(nullcols)
        keycols = list(keycols)
        dbcol = list(dbcol)
        sitelevels = list(sitelevels)

        acols = [x.rstrip() for x in acols]
        nullcols = [x.rstrip() for x in nullcols]
        dbcol = [x.rstrip() for x in dbcol]

        col_booleans = list(self._inputs.checks.values())
        col_names = list(self._inputs.checks.keys())
        acols = [x.rstrip() for x, y in zip(acols, col_booleans) if y is False]
        acols_rename = [
            x.rstrip() for x, y in zip(col_names, col_booleans) if y is False
        ]
        nullcols = [
            x.rstrip() for x, y in zip(col_names, col_booleans) if y is True
        ]
        dbcol.remove('stationid')

        for i in dbcol:
            if i not in nullcols:
                nullcols.append(i)
            else:
                pass

        print('siteid: ', siteid)
        print('col bools: ', col_booleans)
        print('avaialable cols: ', acols)
        print('null cols: ', nullcols)
        print('db cols: ', dbcol)

        print('dataframe climate build: ', dataframe)

        acols.append(siteid)
        try:
            uniquesubset = dataframe[acols]
        except Exception as e:
            acols = [int(x) for x in acols]
            uniquesubset = dataframe[acols]
            print(str(e))

        nullsubset = hlp.produce_null_df(ncols=len(nullcols),
                                         colnames=nullcols,
                                         dflength=len(uniquesubset),
                                         nullvalue='NA')
        print('uq subset build: ', uniquesubset)
        _concat = concat([uniquesubset, nullsubset], axis=1).reset_index()
        final = _concat.reset_index()

        try:
            print('build siteid: ', siteid)
            acols_rename.append('stationid')
            for i, item in enumerate(acols_rename):
                final.rename(columns={acols[i]: item}, inplace=True)

            print('final build class: ', final.columns)
            return final

        except Exception as e:
            print(str(e))
            raise AttributeError('Column renaming error')
Exemplo n.º 18
0
    def save_close(self):
        '''
        Method to save the study_site_table as it is seen
        by the user (matching sites that were accepted by user 
        are removed from the saved table because it will be pushed)
        '''
        self.updated_from_query_matches = False

        # Retrieve study_site_table data from user view
        self.save_data = self.sitetablemodel.data(None, QtCore.Qt.UserRole)

        # If there are no site because they are already
        # in the database then create an empty dataframe
        if len(self.save_data) == 0:
            self.save_data = self.save_data.append(
                DataFrame(
                    {
                        'study_site_key': 'NULL',
                        'lat_study_site': 'nan',
                        'lng_study_site': 'nan',
                        'descript': 'NULL'
                    },
                    index=[0]))
        else:
            pass

        # Append dataframe with current LTER
        lterid_df = hlp.produce_null_df(1, ['lter_table_fkey'],
                                        len(self.save_data), self.lter)
        print(lterid_df)
        self.save_data = concat([self.save_data, lterid_df],
                                axis=1).reset_index(drop=True)

        #Convert types and strip stings
        numeric_cols = ['lat_study_site', 'lng_study_site']
        self.save_data[self.save_data.columns.difference(
            numeric_cols)] = self.save_data[self.save_data.columns.difference(
                numeric_cols)].applymap(str)
        self.save_data[self.save_data.columns.difference(
            numeric_cols)] = self.save_data[self.save_data.columns.difference(
                numeric_cols)].applymap(lambda x: x.strip())
        self.save_data[numeric_cols] = to_numeric(self.save_data[numeric_cols],
                                                  errors='coerce')

        print('Pushed dataset: ', self.save_data)
        self.facade.push_tables['study_site_table'] = self.save_data

        # Helpers to keep track of user changes to site names
        hlp.write_column_to_log(self.sitelned, self._log, 'sitetable_c')
        oldsitetable = hlp.produce_null_df(
            len(self.save_data.columns),
            self.save_data.columns.values.tolist(), len(self.save_data), 'nan')
        hlp.updated_df_values(oldsitetable, self.save_data, self._log,
                              'sitetable')

        # Signal to confim this form has been completed and
        # user can move on to other tables
        self.site_unlocks.emit(self.facade._data)
        self._log.debug('facade site levels' +
                        ' '.join(self.facade._valueregister['sitelevels']))
        self._log.debug(
            'sitelevels (Save Block): ' +
            ' '.join(self.save_data['study_site_key'].values.tolist()))
        self.close()
    def get_dataframe(self, dataframe, acols, nullcols, keycols, dbcol,
                      globalid, siteid, sitelevels):
        '''
        Method to concatenate a study_site_table
        based on informatoin supplied by the user (acols),
        expected columns in table (dbcol), 
        Columns to be filled with NA (nullcols),
        and the globalid, siteid, and unique site levels

        acols: columns returned from the GUI (i.e. line edit entries)

        dbcol: all columns within the table

        nullcols: all columns within the table that HAVE to have
        NA's generated by the table builder

        keycols: primary and foreign keys in the table 
        (Typically what are removed from the nullcol list)
        '''
        acols = list(acols) if acols is not None else acols
        nullcols = list(nullcols) if acols is not None else nullcols
        keycols = list(keycols) if keycols is not None else keycols
        dbcol = list(dbcol) if dbcol is not None else dbcol
        sitelevels = list(sitelevels) if sitelevels is not None else sitelevels

        print('acols before: ', acols)
        print('nullcols before: ', nullcols)
        print('dbcol before: ', dbcol)

        try:
            acols = [x.rstrip() for x in acols]
        except Exception as e:
            acols = [int(x) for x in acols]
            uniquesubset = dataframe[acols]
            print(str(e))

        try:
            remove_from_null = ['lter_table_fkey']
            [nullcols.remove(x) for x in remove_from_null]
        except Exception as e:
            print(str(e))
        try:
            remove_known_fkey = ['lter_table_fkey']
            [dbcol.remove(x) for x in remove_known_fkey]
        except Exception as e:
            print(str(e))
        try:
            lat_lng_null_list = ['lat_study_site', 'lng_study_site']
            [nullcols.remove(x) for x in lat_lng_null_list]
        except Exception as e:
            print(str(e))

        print('acols after: ', acols)
        print('nullcols after: ', nullcols)
        print('dbcol after: ', dbcol)

        uniquesubset = dataframe[acols]
        uniquesubset.columns = ['study_site_key']
        nullcols_non_numeric = hlp.produce_null_df(ncols=len(nullcols),
                                                   colnames=nullcols,
                                                   dflength=len(uniquesubset),
                                                   nullvalue='NA')

        nullcols_numeric = hlp.produce_null_df(ncols=len(lat_lng_null_list),
                                               colnames=lat_lng_null_list,
                                               dflength=len(uniquesubset),
                                               nullvalue='-99999')

        _concat = concat(
            [uniquesubset, nullcols_non_numeric, nullcols_numeric],
            axis=1).reset_index(drop=True)
        final = _concat.drop_duplicates().reset_index(drop=True)

        return final
def test_site_in_project_key_number_two(MergeToUpload, site_handle2,
                                        file_handle2, meta_handle2,
                                        project_handle2, taxa_handle2,
                                        time_handle2, count_handle2,
                                        covar_handle2):
    facade = face.Facade()

    facade.input_register(meta_handle2)
    facade.meta_verify()

    facade.input_register(file_handle2)
    facade.load_data()

    facade.input_register(site_handle2)
    sitedirector = facade.make_table('siteinfo')
    study_site_table = sitedirector._availdf

    siteid = site_handle2.lnedentry['study_site_key']
    sitelevels = facade._data[siteid].drop_duplicates().values.tolist()
    facade.register_site_levels(sitelevels)
    print('test2 sitelevels: ', sitelevels)
    facade._valueregister['siteid'] = siteid

    print('study_site_table (test): ', study_site_table)

    facade.create_log_record('study_site_table')
    lter = meta_handle2.lnedentry['lter']
    ltercol = produce_null_df(1, ['lter_table_fkey'], len(study_site_table),
                              lter)
    study_site_table = concat([study_site_table, ltercol], axis=1)
    study_site_table_og_col = study_site_table.columns.values.tolist()

    study_site_table_single = study_site_table.iloc[0, :]

    study_site_table_single_df = DataFrame([study_site_table_single])
    study_site_table_single_df.columns = study_site_table_og_col

    print('study site single: ', study_site_table_single)

    study_site_table_single_df.loc[0, 'study_site_key'] = 'NULL'

    print('study_site_table: ', study_site_table_single_df)

    facade.push_tables['study_site_table'] = study_site_table_single_df

    facade.input_register(project_handle2)
    maindirector = facade.make_table('maininfo')
    project_table = maindirector._availdf.copy().reset_index(drop=True)
    orm.convert_types(project_table, orm.project_types)

    facade.push_tables['project_table'] = project_table
    facade.create_log_record('project_table')

    facade.input_register(taxa_handle2)
    taxadirector = facade.make_table('taxainfo')

    taxa_table = taxadirector._availdf
    facade.push_tables['taxa_table'] = taxa_table
    print('taxa columns after make taxa table: ', taxa_table.columns)

    facade.create_log_record('taxa_table')

    print('taxa columns before time_table: ', taxa_table.columns)

    facade.input_register(time_handle2)
    timetable = tparse.TimeParse(facade._data,
                                 time_handle2.lnedentry).formater()
    facade.push_tables['timetable'] = timetable
    facade.create_log_record('timetable')

    print('taxa columns before count_table: ', taxa_table.columns)
    facade.input_register(count_handle2)
    rawdirector = facade.make_table('rawinfo')
    rawtable = rawdirector._availdf
    print(rawtable)
    facade.push_tables[count_handle2.tablename] = rawtable
    facade.create_log_record(count_handle2.tablename)

    print('taxa columns before covar_table: ', taxa_table.columns)
    facade.input_register(covar_handle2)
    covartable = ddf.DictionaryDataframe(
        facade._data, covar_handle2.lnedentry['columns']).convert_records()
    facade.push_tables['covariates'] = covartable
    facade.create_log_record('covartable')

    facade._valueregister['globalid'] = meta_handle2.lnedentry['globalid']
    facade._valueregister['lter'] = meta_handle2.lnedentry['lter']
    facade._valueregister['siteid'] = siteid

    timetable_og_cols = timetable.columns.values.tolist()
    timetable.columns = [x + '_derived' for x in timetable_og_cols]
    observationdf = facade._data
    observation_time_df = concat([timetable, observationdf], axis=1)

    print('merge class obs_time columns: ', observation_time_df.columns)
    print('merge class project table: ', project_table)

    study_site_table.to_sql('study_site_table',
                            orm.conn,
                            if_exists='append',
                            index=False)
    project_table['lter_project_fkey'] = facade._valueregister['lter']
    project_table.to_sql('project_table',
                         orm.conn,
                         if_exists='append',
                         index=False)

    print('taxa columns before site_in_proj method: ', taxa_table.columns)

    merge_object = MergeToUpload()
    site_in_project_key_df = merge_object.site_in_proj_key_df(
        studysitetabledf=study_site_table,
        projecttabledf=project_table,
        observationtabledf=observation_time_df,
        lterlocation=facade._valueregister['lter'],
        studysitelabel=siteid,
        studysitelevels=sitelevels)

    print('taxa columns before user taxa merge method: ', taxa_table.columns)
    merge_object.merge_for_taxa_table_upload(
        formated_taxa_table=taxa_table,
        siteinprojkeydf=site_in_project_key_df,
        sitelabel=siteid)

    taxa_column_in_data = [
        x[0] for x in list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    taxa_column_in_push_table = [
        x[1] for x in list(facade._inputs['taxainfo'].lnedentry.items())
    ]

    merge_object.merge_for_datatype_table_upload(
        raw_dataframe=observation_time_df,
        formated_dataframe=rawtable,
        formated_dataframe_name='{}'.format(
            re.sub('_table', '', facade._inputs['rawinfo'].tablename)),
        covariate_dataframe=covartable,
        siteinprojkeydf=site_in_project_key_df,
        raw_data_taxa_columns=taxa_column_in_data,
        uploaded_taxa_columns=taxa_column_in_push_table)
Exemplo n.º 21
0
    def save_close(self):
        '''
        Method to save the study_site_table as it is seen
        by the user (matching sites that were accepted by user 
        are removed from the saved table because it will be pushed)
        '''
        update_message = QtGui.QMessageBox.question(
            self,'Message', 'Did you update records?',
            QtGui.QMessageBox.Yes, QtGui.QMessageBox.No)
        if update_message == QtGui.QMessageBox.No:
            return
        else:
            pass

        # Retrieve study_site_table data from user view
        save_data = self.sitetablemodel.data(
            None, QtCore.Qt.UserRole)
        self.save_data = save_data.drop_duplicates()
        print('saved data (initial): ', self.save_data)
        self.facade.register_site_levels(
            self.facade._data[
                self.siteloc[
                    'study_site_key']].drop_duplicates().values.tolist())

        # If there are no site because they are already
        # in the database then create an empty dataframe
        if len(self.save_data) == 0:
            self.save_data= self.save_data.append(
                DataFrame(
                    {
                        'study_site_key':'NULL',
                        'lat_study_site': 'nan',
                        'lng_study_site': 'nan',
                        'descript': 'NULL'
                    }, index=[0])
            )
        else:
            pass

        # Append dataframe with current LTER 
        lterid_df = hlp.produce_null_df(
            1, ['lter_table_fkey'], len(self.save_data), self.lter)
        print(lterid_df)
        self.save_data = concat(
            [self.save_data, lterid_df]
            , axis=1).reset_index(drop=True)
        #Convert types and strip stings
        numeric_cols = ['lat_study_site', 'lng_study_site']
        self.save_data[
            self.save_data.columns.difference(numeric_cols)] = self.save_data[
                self.save_data.columns.difference(numeric_cols)].applymap(str)
        self.save_data[
            self.save_data.columns.difference(numeric_cols)] = self.save_data[
                self.save_data.columns.difference(numeric_cols)].applymap(
                    lambda x: x.strip())
        self.save_data[numeric_cols] = to_numeric(
            self.save_data[numeric_cols], errors='coerce')


        print('Pushed dataset: ', self.save_data)
        self.facade.push_tables['study_site_table'] = self.save_data

        # Helpers to keep track of user changes to site names
        hlp.write_column_to_log(
            self.sitelned, self._log, 'sitetable_c')
        oldsitetable = hlp.produce_null_df(
            len(self.save_data.columns),
            self.save_data.columns.values.tolist(),
            len(self.save_data),
            'nan'
        )
        hlp.updated_df_values(
            oldsitetable, self.save_data, self._log, 'sitetable'
        )

        # Signal to confim this form has been completed and
        # user can move on to other tables
        self.site_unlocks.emit('study_site_mod')
        site_unsorted = self.facade._data[
            self.siteloc[
                'study_site_key']].drop_duplicates().values.tolist()
        site_unsorted.sort()
        self.sitelevels = site_unsorted
        self._log.debug(
            'facade site levels' +
            ' '.join(self.facade._valueregister['sitelevels']))
        self._log.debug(
            'sitelevels (Save Block): ' +
            ' '.join(self.sitelevels))

        self.saved.append(1)
        self.close()