all_datasets = []
#%%
#*************HARD************
df_HARD = pd.read_csv('Datasets\\HARD\\balanced-reviews-utf8.tsv',
                      sep='\t',
                      header=0)

df_HARD = df_HARD[['rating',
                   'review']]  # we are interested in rating and review only
# code rating as +ve if > 3, -ve if less, no 3s in dataset
df_HARD['rating'] = df_HARD['rating'].apply(lambda x: 0 if x < 3 else 1)
# rename columns to fit default constructor in fastai
df_HARD.columns = ['label', 'text']
df_HARD['text'] = df_HARD['text'].progress_apply(
    lambda x: preprocess(x, do_farasa_tokenization=True, farasa=farasa))
train_HARD, test_HARD = train_test_split(df_HARD,
                                         test_size=0.2,
                                         random_state=42)
label_list_HARD = [0, 1]

data_Hard = Dataset("HARD", train_HARD, test_HARD, label_list_HARD)
all_datasets.append(data_Hard)

#%%
#*************ASTD-Unbalanced************
df_ASTD_UN = pd.read_csv('Datasets\\ASTD-master\\data\\Tweets.txt',
                         sep='\t',
                         header=None)

DATA_COLUMN = 'text'

all_datasets = []
#%%
# *************HARD************
df_HARD = pd.read_csv("Datasets\\HARD\\balanced-reviews-utf8.tsv",
                      sep="\t",
                      header=0)

df_HARD = df_HARD[["rating",
                   "review"]]  # we are interested in rating and review only
# code rating as +ve if > 3, -ve if less, no 3s in dataset
df_HARD["rating"] = df_HARD["rating"].apply(lambda x: 0 if x < 3 else 1)
# rename columns to fit default constructor in fastai
df_HARD.columns = ["label", "text"]
df_HARD["text"] = df_HARD["text"].progress_apply(lambda x: preprocess(
    x, do_farasa_tokenization=True, farasa=farasa_segmenter, use_farasapy=True)
                                                 )
train_HARD, test_HARD = train_test_split(df_HARD,
                                         test_size=0.2,
                                         random_state=42)
label_list_HARD = [0, 1]

data_Hard = Dataset("HARD", train_HARD, test_HARD, label_list_HARD)
all_datasets.append(data_Hard)

#%%
# *************ASTD-Unbalanced************
df_ASTD_UN = pd.read_csv("Datasets\\ASTD-master\\data\\Tweets.txt",
                         sep="\t",
                         header=None)
        self.test_features = test_features


all_datasets = []
#%%
# *************HARD************
df_HARD = pd.read_csv("Datasets\\HARD\\balanced-reviews-utf8.tsv", sep="\t", header=0)

df_HARD = df_HARD[["rating", "review"]]  # we are interested in rating and review only
# code rating as +ve if > 3, -ve if less, no 3s in dataset
df_HARD["rating"] = df_HARD["rating"].apply(lambda x: 0 if x < 3 else 1)
# rename columns to fit default constructor in fastai
df_HARD.columns = ["label", "text"]
df_HARD["text"] = df_HARD["text"].progress_apply(
    lambda x: preprocess(
        x, do_farasa_tokenization=True, farasa=farasa_segmenter, use_farasapy=True
    )
)
train_HARD, test_HARD = train_test_split(df_HARD, test_size=0.2, random_state=42)
label_list_HARD = [0, 1]

data_Hard = Dataset("HARD", train_HARD, test_HARD, label_list_HARD)
all_datasets.append(data_Hard)

#%%
# *************ASTD-Unbalanced************
df_ASTD_UN = pd.read_csv(
    "Datasets\\ASTD-master\\data\\Tweets.txt", sep="\t", header=None
)

DATA_COLUMN = "text"