def create_data(X: dt.Frame = None) -> Union[ str, List[str], dt.Frame, List[dt.Frame], np.ndarray, List[np.ndarray], pd.DataFrame, List[pd.DataFrame], Dict[str, str], # {data set names : paths} Dict[str, dt.Frame], # {data set names : dt frames} Dict[str, np.ndarray], # {data set names : np arrays} Dict[str, pd.DataFrame], # {data set names : pd frames} ]: # Download files # Location in DAI file system where we will save the data set temp_path = os.path.join(user_dir(), config.contrib_relative_directory) os.makedirs(temp_path, exist_ok=True) # URL of desired data, this comes from the City of Seattle link_basics = "https://datasets.imdbws.com/title.basics.tsv.gz" link_ratings = "https://datasets.imdbws.com/title.ratings.tsv.gz" link_episodes = "https://datasets.imdbws.com/title.episode.tsv.gz" # Download the files file_basics = download(link_basics, dest_path=temp_path) file_ratings = download(link_ratings, dest_path=temp_path) file_episodes = download(link_episodes, dest_path=temp_path) # get COVID19 new cases data from Our World in Data github basics = dt.fread(file_basics, fill=True) ratings = dt.fread(file_ratings, fill=True) episodes = dt.fread(file_episodes, na_strings=['\\N'], fill=True) # remove files os.remove(file_basics) os.remove(file_ratings) os.remove(file_episodes) # Create Title with Ratings dataset # join titles with non-null ratings ratings = ratings[~dt.isna(dt.f.averageRating), :] ratings.key = "tconst" basics_ratings = basics[:, :, dt.join(ratings)] # Create Episodes dataset episodes = episodes[~dt.isna(dt.f.seasonNumber) & ~dt.isna(dt.f.episodeNumber), :] episode_ratings = episodes[:, :, dt.join(ratings)] episode_ratings.names = {'tconst': 'episodeTconst', 'parentTconst': 'tconst', 'averageRating': 'episodeAverageRating', 'numVotes': 'episodeNumVotes'} basics_ratings.key = 'tconst' title_episode_ratings = episode_ratings[:, :, dt.join(basics_ratings)] # enumerate series episodes from 1 to N title_episode_ratings = title_episode_ratings[:, :, dt.sort(dt.f.tconst, dt.f.seasonNumber, dt.f.episodeNumber)] result = title_episode_ratings[:, dt.count(), dt.by(dt.f.tconst)][:, 'count'].to_list() from itertools import chain cumcount = chain.from_iterable([i + 1 for i in range(n)] for n in result[0]) title_episode_ratings['episodeSequence'] = dt.Frame(tuple(cumcount)) # return datasets return {f"imdb_title_ratings": basics_ratings, f"imdb_episode_ratings": title_episode_ratings}
def create_data(): folder_path = '/home/ubuntu/data/Kaggle/IEEEFraud' # Modify as needed train_identity_file = os.path.join(folder_path, 'train_identity.csv') test_identity_file = os.path.join(folder_path, 'test_identity.csv') train_transaction_file = os.path.join(folder_path, 'train_transaction.csv') test_transaction_file = os.path.join(folder_path, 'test_transaction.csv') if not (os.path.isfile(train_identity_file and os.path.isfile( test_identity_file and os.path.isfile(train_transaction_file and os.path.isfile(test_transaction_file))))): return [] train_identity = dt.fread(train_identity_file) test_identity = dt.fread(test_identity_file) train_transaction = dt.fread(train_transaction_file) test_transaction = dt.fread(test_transaction_file) target = 'isFraud' train_identity.key = 'TransactionID' test_identity.key = 'TransactionID' # Join identity into transactions train = train_transaction[:, :, dt.join(train_identity)] test = test_transaction[:, :, dt.join(test_identity)] # Combine train and test for further processing X = dt.rbind([train, test], force=True) # Turn integer time column into datetime string with proper format startdate = datetime.datetime.strptime('2017-11-30', "%Y-%m-%d") pd_time = X[:, 'TransactionDT'].to_pandas()['TransactionDT'].apply( lambda x: (startdate + datetime.timedelta(seconds=x))) X[:, 'TransactionDT_str'] = dt.Frame( pd_time.apply( lambda x: datetime.datetime.strftime(x, "%Y-%m-%d %H:%M:%S"))) # Month - to be used as fold column (that way get cross-validation without shuffling future/past too much, minimize overlap between folds) fold_column = 'fold_column' X[:, fold_column] = dt.Frame(pd_time.dt.month + (pd_time.dt.year - 2017) * 12) # Create start times (in secs) for Dx features (which are growing linearly over time) for i in range(1, 16): X[:, 'Trans_D%d_start' % i] = dt.Frame( np.floor(X[:, 'TransactionDT'].to_numpy().ravel() / (24 * 60 * 60)) - X[:, 'D%d' % i].to_numpy().ravel()) # re-order names first_names = [target, fold_column] names = first_names + [x for x in X.names if x not in first_names] X = X[:, names] # Split back into train and test train = X[:train_transaction.nrows, :] test = X[train_transaction.nrows:, :] return {'IEEE.train': train, 'IEEE.test': test}
def analyzeDailyAndMeldeTag(fullTable, fromDay, toDay, byCriteria, criteriaValue, filter, postfix): # print("fromDay, toDay",fromDay, toDay) # print("byCriteria, criteriaValue",byCriteria, criteriaValue) # print("filter:", filter) fullfilter = filter & filterByDayAndCriteria( fromDay, toDay, (byCriteria == criteriaValue), "DatenstandTag") #print("fullfilter:", fullfilter) dayTable = analyzeDaily(fullTable, fullfilter, "", postfix, "DatenstandTag") maxDatenstandTag = fullTable[:, dt.f.DatenstandTag].max().to_list()[0][0] print("maxDatenstandTag", maxDatenstandTag) latestTable = fullTable[dt.f.DatenstandTag == maxDatenstandTag, :] olderTable = fullTable[dt.f.DatenstandTag == maxDatenstandTag - 7, :] #latestTable.materialize() #print("latestTable",latestTable) #print(latestTable) minMeldeTag = latestTable[:, dt.f.MeldeTag].min().to_list()[0][0] maxMeldeTag = latestTable[:, dt.f.MeldeTag].max().to_list()[0][0] #print("minMeldeTag,maxMeldeTag",minMeldeTag,maxMeldeTag) fullfilter = filter & filterByDayAndCriteria(minMeldeTag, maxMeldeTag + 1, (byCriteria == criteriaValue), "MeldeTag") #print("fullfilter2:", fullfilter) meldeTable = analyzeDaily(latestTable, fullfilter, "MeldeTag_", postfix, "MeldeTag") meldeTable.names = {"MeldeTag": "DatenstandTag"} meldeTable.key = "DatenstandTag" meldeTable7TageAlt = analyzeDaily(olderTable, fullfilter, "MeldeTag_Vor7Tagen_", postfix, "MeldeTag") meldeTable7TageAlt.names = {"MeldeTag": "DatenstandTag"} meldeTable7TageAlt.key = "DatenstandTag" dayTable.key = "DatenstandTag" meldeDays = set(meldeTable[:, "DatenstandTag"].to_list()[0]) meldeDays7old = set(meldeTable7TageAlt[:, "DatenstandTag"].to_list()[0]) dataDays = set(dayTable[:, "DatenstandTag"].to_list()[0]) allDays = sorted(list(meldeDays.union(dataDays).union(meldeDays7old))) allDaysTable = dt.Frame(allDays) allDaysTable.names = ["DatenstandTag"] allDaysTable.key = "DatenstandTag" #dayTable = dayTable[:, :, dt.join(meldeTable)] allDaysTable = allDaysTable[:, :, dt.join( meldeTable)][:, :, dt.join(meldeTable7TageAlt)][:, :, dt.join(dayTable)] allDaysTable.key = "DatenstandTag" return allDaysTable
def test_join_errors(): d0 = dt.Frame(A=[1, 2, 3]) d1 = dt.Frame(B=range(10), stype=dt.float64) with pytest.raises(ValueError) as e: d0[:, :, join(d1)] assert "The join frame is not keyed" in str(e.value) d1.key = "B" with pytest.raises(ValueError) as e: d0[:, :, join(d1)] assert "Key column `B` does not exist in the left Frame" in str(e.value) d1.names = ("A", ) with pytest.raises(TypeError) as e: d0[:, :, join(d1)] assert ("Join column `A` has type int in the left Frame, and type real " "in the right Frame" in str(e.value))
def map_foreign_key_to_table(primary_df: dt.Frame, fk_df: dt.Frame, join_column_dict: dict) -> dt.Frame: """ Performs a left join of `primary_df` to `fk_df` by refence, updating the column indicated in `join_column_dict`. :primary_df: A `datatable.Frame`. This should be the larger table and will ideally be loaded from a .jay file with a `memory_limit` specified in `datable.fread`. :fk_df: A `datatable.Frame`. This should be a smaller table which will be joined to :join_column_dict: A dictionary with keys 'primary_df' and 'fk_df' specifying the columns to join the tables on. """ # Check for correct keys in dict key_strings = list(join_column_dict.keys()) if ('primary_df' not in key_strings or 'fk_df' not in key_strings): raise ValueError("The join_column_dict item must have keys" "'primary_df' and 'fk_df'!") # Rename columns primary_col = join_column_dict['primary_df'] fk_col = join_column_dict['fk_df'] fk_df.names = {fk_col: primary_col} fk_df.key = primary_col update_expr = {primary_col: g.id} # Join, update by reference then coerce to the correct type primary_df[:, update(**update_expr), join(fk_df)]
def test_join_random(seed, lt): random.seed(seed) ndata = int(random.expovariate(0.0005)) nkeys = int(random.expovariate(0.01)) + 1 st = random.choice(lt.stypes) if lt == ltype.bool: keys = [True, False] elif lt == ltype.int: nbits = (6 if st == stype.int8 else 12 if st == stype.int16 else 24) keys = list(set(random.getrandbits(nbits) for _ in range(nkeys))) elif lt == ltype.real: keys = [random.random() for _ in range(nkeys)] if st == stype.float32: keys = list(set(dt.Frame(keys, stype=st).topython()[0])) else: keys = list(set(keys)) else: l = int(random.expovariate(0.05)) + 1 keys = list(set(random_string(l) for _ in range(nkeys))) nkeys = len(keys) dkey = dt.Frame(KEY=keys, VAL=range(nkeys), stypes={"KEY": st}) dkey.key = "KEY" keys, vals = dkey.topython() main = [random.choice(keys) for i in range(ndata)] dmain = dt.Frame(KEY=main, stype=st) res = [vals[keys.index(main[i])] for i in range(ndata)] djoined = dmain[:, :, join(dkey)] djoined.internal.check() assert djoined.shape == (ndata, 2) assert djoined.names == ("KEY", "VAL") assert djoined.topython() == [main, res]
def test_join_empty_frame(): # See issue #1988 DT1 = dt.Frame(A=range(5), B=['gs', 'dfk', None, 'ava;lej', 'fdsfal;k']) DT2 = dt.Frame(A=[]) DT2.key = "A" RES = DT1[:, :, dt.join(DT2)] assert_equals(RES, DT1)
def analyzeDailyAltersgruppenGeschlechter(fullTable, fromDay, toDay, byCriteria, criteriaValue, Altersgruppen, Geschlechter): byDayTable = analyzeDailyAndMeldeTag(fullTable, fromDay, toDay, byCriteria, criteriaValue, True, "") byDayTable = analyzeDailyAltersgruppen(fullTable, byDayTable, fromDay, toDay, byCriteria, criteriaValue, True, Altersgruppen, Geschlechter, "") #byDayTable = byDayTable[:, :, dt.join(byDayTableAG)] #return byDayTable #print("byDayTable 1", byDayTable.names) for g in Geschlechter: if g != "unbekannt": print("Analyzing Geschlechter " + g) byDayTableG = analyzeDailyAndMeldeTag(fullTable, fromDay, toDay, byCriteria, criteriaValue, (dt.f.Geschlecht == g), "_G_" + g) print("byDayTableG", byDayTableG.names) byDayTable = byDayTable[:, :, dt.join(byDayTableG)] print("byDayTable 2", byDayTable.names) byDayTable = analyzeDailyAltersgruppen(fullTable, byDayTable, fromDay, toDay, byCriteria, criteriaValue, (dt.f.Geschlecht == g), Altersgruppen, Geschlechter, "_G_" + g) #print("byDayTableAG", byDayTableAG.names) #byDayTable = byDayTable[:,:,dt.join(byDayTableAG)] print("byDayTable 3", byDayTable.names) return byDayTable
def test_select_from_joined(): # Test that selecting unmatched elements in the joined frame does not # lead to a crash. Selection should be done using the "fast" DT[i, j] # syntax, where both i and j are integers. # See issue #1917 JDT = dt.Frame(A=[0], B=[True], C1=[34], C2=[17], C3=[18], C4=[20], D1=[5.2], D2=[-7.7], E1=["foo"], E2=["bar"], stypes={ "A": dt.int32, "B": dt.bool8, "C1": dt.int8, "C2": dt.int16, "C3": dt.int32, "C4": dt.int64, "D1": dt.float32, "D2": dt.float64, "E1": dt.str32, "E2": dt.str64 }) JDT.key = "A" SRC = dt.Frame(A=[1, 3, 7], stype=dt.int32) DT = SRC[:, :, join(JDT)] for i in range(3): for j in range(1, DT.ncols): assert DT[i, j] is None
def __call__(self, rows=None, select=None, verbose=False, timeit=False, groupby=None, join=None, sort=None, engine=None): """DEPRECATED, use DT[i, j, ...] instead.""" warnings.warn( "`DT(rows, select, ...)` is deprecated and will be removed in " "version 0.9.0. Please use `DT[i, j, ...]` instead", category=FutureWarning) time0 = time.time() if timeit else 0 function = type(lambda: None) if isinstance(rows, function): rows = rows(datatable.f) if isinstance(select, function): select = select(datatable.f) res = self[rows, select, datatable.join(join), datatable.by(groupby), datatable.sort(sort)] if timeit: print("Time taken: %d ms" % (1000 * (time.time() - time0))) return res
def transform(self, X: dt.Frame): logger = None if self.context and self.context.experiment_id: logger = make_experiment_logger( experiment_id=self.context.experiment_id, tmp_dir=self.context.tmp_dir, experiment_tmp_dir=self.context.experiment_tmp_dir) try: X = dt.Frame(X) original_zip_column_name = X.names[0] X.names = ['zip_key'] X = X[:, str('zip_key')] zip_list = dt.unique(X[~dt.isna(dt.f.zip_key), 0]).to_list()[0] zip_features = [self.get_zipcode_features(x) for x in zip_list] X_g = dt.Frame({"zip_key": zip_list}) X_g.cbind(dt.Frame(zip_features)) X_g.key = 'zip_key' X_result = X[:, :, dt.join(X_g)] self._output_feature_names = [ "{}.{}".format(original_zip_column_name, f) for f in list(X_result[:, 1:].names) ] self._feature_desc = [ "Property '{}' of US zipcode found in '{}'".format( f, original_zip_column_name) for f in list(X_result[:, 1:].names) ] return X_result[:, 1:] except Exception as ex: loggerwarning( logger, "USZipcodeDatabaseTransformer got exception {}".format( type(ex).__name__)) return np.zeros(X.shape[0])
def test_join_missing_levels(): d0 = dt.Frame(A=[1, 2, 3]) d1 = dt.Frame(A=[1, 2], K=[True, False]) d1.key = "A" res = d0[:, :, join(d1)] res.internal.check() assert res.topython() == [[1, 2, 3], [True, False, None]]
def test_join_error_no_left_column(): d0 = dt.Frame(A=[1, 2, 3]) d1 = dt.Frame(B=range(10)) d1.key = "B" with pytest.raises(ValueError) as e: noop(d0[:, :, join(d1)]) assert "Key column `B` does not exist in the left Frame" in str(e.value)
def test_join_missing_levels(): d0 = dt.Frame(A=[1, 2, 3]) d1 = dt.Frame(A=[1, 2], K=[True, False]) d1.key = "A" res = d0[:, :, join(d1)] frame_integrity_check(res) assert res.to_list() == [[1, 2, 3], [True, False, None]]
def test_join_error_type_mismatch(): d0 = dt.Frame(A=[1, 2, 3]) d1 = dt.Frame(A=[str(x) for x in range(10)]) d1.key = "A" with pytest.raises(TypeError) as e: noop(d0[:, :, join(d1)]) assert ("Column `A` of type int32 in the left Frame cannot be joined to " "column `A` of incompatible type str32 in the right Frame" in str(e.value))
def test_write_joined_frame(): # The joined frame will have a rowindex with some rows missing (-1). # Check that such frame can be written correctly. See issue #1919. DT1 = dt.Frame(A=range(5), B=list('ABCDE')) DT1.key = "A" DT2 = dt.Frame(A=[3, 7, 11, -2, 0, 1]) DT = DT2[:, :, dt.join(DT1)] out = DT.to_csv() assert out == 'A,B\n3,D\n7,\n11,\n-2,\n0,A\n1,B\n'
def test_issue1556(): X = dt.Frame(A=['Ahoy ye matey!', 'hey']) J = dt.Frame(A=['hey'], B=['Avast']) J.key = 'A' R = X[:, :, join(J)] frame_integrity_check(R) assert R.shape == (2, 2) assert R.to_dict() == {"A": ["Ahoy ye matey!", "hey"], "B": [None, "Avast"]}
def test_issue1800(): X1 = dt.Frame(A=range(5), B=[0.1, 0.2, 0.3, 0.4, 0.5]) X1.key = "A" X2 = dt.Frame(A=[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]) joined = X2[:, :, dt.join(X1)] idx = dt.Frame([True] * X2.nrows) X2[idx, "N"] = joined[idx, "B"] assert X2.to_dict() == {"A": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5], "N": [0.1, 0.1, 0.2, 0.2, 0.3, 0.3, 0.4, 0.4, 0.5, 0.5, None, None]}
def test_join_view(): # See issue #1540 x = dt.Frame(A=[1, 2, 3, 1, 2, 3], B=[3, 6, 2, 4, 3, 1], C=list("bdbbdb")) a = x[f.A == 1, ['A', 'B', 'C']] r = dt.Frame(C=['b', 'z'], BB=[2, 1000]) r.key = 'C' res = a[:, :, join(r)] assert res.shape == (2, 4) assert res.names == ("A", "B", "C", "BB") assert res.to_list() == [[1, 1], [3, 4], ['b', 'b'], [2, 2]]
def test_join_void_to_void(): DT1 = dt.Frame(A=[None, None, None], B=[3, 4, 7]) DT2 = dt.Frame(A=[None], V=["nothing"]) DT2.key = "A" RES = DT1[:, :, join(DT2)] assert_equals( RES, dt.Frame(A=[None, None, None], B=[3, 4, 7], V=["nothing", "nothing", "nothing"]))
def test_join_strings(): d0 = dt.Frame([[1, 3, 2, 1, 1, 2, 0], list("cabdabb")], names=("A", "B")) d1 = dt.Frame([list("abcd"), range(0, 20, 5)], names=("B", "V")) d1.key = "B" res = d0[:, :, join(d1)] res.internal.check() assert res.shape == (7, 3) assert res.names == ("A", "B", "V") assert res.topython() == [[1, 3, 2, 1, 1, 2, 0], ["c", "a", "b", "d", "a", "b", "b"], [10, 0, 5, 15, 0, 5, 5]]
def join_self(self): ncols = self.ncols if self.nkeys: self.df = self.df[:, :, join(self.df)] s = slice(self.nkeys, ncols) join_data = copy.deepcopy(self.data[s]) join_types = self.types[s].copy() join_names = self.names[s].copy() self.data += join_data self.types += join_types self.names += join_names self.nkeys = 0 self.dedup_names() else: msg = "The join frame is not keyed" with pytest.raises(ValueError, match=msg): self.df = self.df[:, :, join(self.df)]
def test_html_repr_joined_frame(): L_dt = dt.Frame([[5, 6, 7, 9], [7, 8, 9, 10]], names=["A", "B"]) R_dt = dt.Frame([[5, 7], [7, 9], [1, 2]], names=["A", "B", "yhat"]) R_dt.key = ["A", "B"] DT = L_dt[:, :, dt.join(R_dt)] html = DT._repr_html_() hr = parse_html_repr(html) assert hr.names == ("A", "B", "yhat") assert hr.shape == (4, 3) assert hr.data == [['5', '7', '1'], ['6', '8', None], ['7', '9', '2'], ['9', '10', None]]
def test_join_update(): d0 = dt.Frame([[1, 2, 3, 2, 3, 1, 3, 2, 2, 1], range(10)], names=("A", "B")) d1 = d0[:, mean(f.B), f.A] d1.key = "A" d0[:, "AA", join(d1)] = g.V0 assert d0.names == ("A", "B", "AA") a = 4.75 b = 14.0 / 3 assert d0.to_list() == [[1, 2, 3, 2, 3, 1, 3, 2, 2, 1], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [b, a, 4, a, 4, b, 4, a, a, b]]
def test_join_and_select_g_col(): # Check that selecting a g-column does not confuse it with an f-column. # See issue #1352 F = dt.Frame(a=[0, 2, 3], b=[3, 4, 2]) G = dt.Frame(b=[2, 4], c=["foo", "bar"]) G.key = "b" R = F[:, g.c, join(G)] R.internal.check() assert R.shape == (3, 1) assert R.stypes == (stype.str32, ) # assert R.names == ("c",) # not working yet assert R.topython() == [[None, "bar", "foo"]]
def test_dt_isna_joined(): # See issue #2109 DT = dt.Frame(A=[None, 4, 3, 2, 1]) JDT = dt.Frame(A=[0, 1, 3, 7], B=['a', 'b', 'c', 'd'], C=[0.25, 0.5, 0.75, 1.0], D=[22, 33, 44, 55], E=[True, False, True, False]) JDT.key = 'A' RES = DT[:, dt.math.isna(g[1:]), join(JDT)] frame_integrity_check(RES) assert RES.to_list() == [[True, True, False, True, False]] * 4
def analyzeDailyAltersgruppen(fullTable, byDayTable, filter, Altersgruppen, Geschlechter, postfix): #byDayTable = analyzeDaily(fullTable, filter, postfix) #print("----- analyzeDailyAltersgruppen:"+postfix) for ag in Altersgruppen: print("Analyzing Altergruppe " + ag) byDayTableAG = analyzeDaily(fullTable, filter & (dt.f.Altersgruppe == ag), postfix + "-AG-" + ag) byDayTable = byDayTable[:, :, dt.join(byDayTableAG)] byDayTable.key = "DatenstandTag" return byDayTable
def test_join_simple(): d0 = dt.Frame([[1, 3, 2, 1, 1, 2, 0], list("abcdefg")], names=("A", "B")) d1 = dt.Frame([range(4), ["zero", "one", "two", "three"]], names=("A", "V"), stypes=d0.stypes) d1.key = "A" res = d0[:, :, join(d1)] frame_integrity_check(res) assert res.shape == (7, 3) assert res.names == ("A", "B", "V") assert res.to_list() == [ [1, 3, 2, 1, 1, 2, 0], ["a", "b", "c", "d", "e", "f", "g"], ["one", "three", "two", "one", "one", "two", "zero"]]
def analyzeDailyAltersgruppen(fullTable, byDayTable, fromDay, toDay, byCriteria, criteriaValue, filter, Altersgruppen, Geschlechter, postfix): #byDayTable = analyzeDaily(fullTable, filter, postfix) #print("----- analyzeDailyAltersgruppen:"+postfix) for ag in Altersgruppen: if ag != "unbekannt": print("Analyzing Altergruppe "+ ag) fullfilter = filter & (dt.f.Altersgruppe == ag) byDayTableAG = analyzeDailyAndMeldeTag(fullTable, fromDay, toDay, byCriteria, criteriaValue, fullfilter, postfix+"_AG_"+agColName(ag)) byDayTable = byDayTable[:,:,dt.join(byDayTableAG)] byDayTable.key = "DatenstandTag" return byDayTable
def test_join_multi(): fr1 = dt.Frame(A=[1, 2, 1, 2], B=[3, 3, 4, 4], C=["goo", "blah", "zoe", "rij"]) fr1.key = ("A", "B") fr2 = dt.Frame([[1, 2, 3, 2, 3, 1, 2, 1, 1], [3, 4, 5, 4, 3, 3, 3, 4, 3]], names=("A", "B")) res = fr2[:, :, join(fr1)] assert res.names == ("A", "B", "C") assert res.to_list() == [[1, 2, 3, 2, 3, 1, 2, 1, 1], [3, 4, 5, 4, 3, 3, 3, 4, 3], ["goo", "rij", None, "rij", None, "goo", "blah", "zoe", "goo"]]