def get_data(self, NAME=None): """ return the details of one data source. """ ensure_in_df_index(NAME, self.data_store_df, "Data Store") return self.all_data[NAME]
def remove_data_source(self, NAME=None): """ remove a row (data source) Params: NAME: the name of the data source to delete """ ensure_in_df_index(NAME, self.data_store_df, "Data Store") self.data_store_df = self.data_store_df.drop(NAME)
def toggle_use_unique(self, NAME=None): """ switch whether to use only unique sentences from data source, or allow repetitions. Params: NAME: the name of the data source whose USE_UNIQUE value is to be toggled """ ensure_in_df_index(NAME, self.data_store_df, "Data Store") self.data_store_df["USE_UNIQUE"].loc[ NAME] = not self.data_store_df["USE_UNIQUE"].loc[NAME]
def change_sample_size(self, NAME=None, SAMPLE_SIZE=None): """ change the number of sentences from a data source to use. Params: NAME: the name of the data source whose sample size is to be changed SAMPLE_SIZE: the new sample size """ ensure_in_df_index(NAME, self.data_store_df, "Data Store") self.data_store_df["SAMPLE_SIZE"].loc[NAME] = SAMPLE_SIZE
def toggle_random(self, NAME=None): """ switch whether to randomly select the sample sentences from a data source, or take the sample starting from sentence index 0 until sentence index = sample size. Params: NAME: the name of the data set whos RANDOM value is to be toggled """ ensure_in_df_index(NAME, self.data_store_df, "Data Store") self.data_store_df["RANDOM"].loc[ NAME] = not self.data_store_df["RANDOM"].loc[NAME]
def change_path(self, NAME=None, PATH=None): """ change the location of a data source Params: NAME: the name of the data source whose path is to be changed PATH: the new file path """ ensure_in_df_index(NAME, self.data_store_df, "Data Store") ensure_string_argument(PATH, "PATH") self.data_store_df["PATH"].loc[NAME] = PATH
def data_sources(self, NAME=None): """ prints the data frame to the screen. Params: NAME: Optionally specify a particular data source to print the details of. """ if NAME: ensure_in_df_index(NAME, self.data_store_df, "Data Store") print(self.data_store_df.loc[NAME]) else: print(self.data_store_df)