def sampleWordIDs(numSamples=1, wordLength=5): # TODO: Implement limits: # 1) data have same periodicity # 2) data come from certain supertype # 2) data come from certain subtype # 2) data come from certain type # 2) data come from certain data_master_ids # 2) data come from differing data_master_ids (i.e. no way to have same data_master_id) allIDs = EM_DBHelp.retrieve_AllWordSeriesIDs(db_connection, db_cursor) words = [] for i in range(numSamples): words.append(sample(allIDs, wordLength)) wordSeriesIDs = {ID for word in words for ID in word} getFromDB = lambda (seriesID): EM_DBHelp.retrieve_WordSeriesMetaData( db_connection, db_cursor, columnName, seriesID=seriesID ) columnName = "dt_latest_word" dates = [getFromDB(ID) for ID in wordSeriesIDs] latestDate = min(dates) columnName = "dt_earliest_word" dates = [getFromDB(ID) for ID in wordSeriesIDs] earliestDate = max(dates) columnName = "int_data_master_id" dataSeriesIDs = [getFromDB(ID) for ID in wordSeriesIDs] return (words, wordSeriesIDs, dataSeriesIDs, earliestDate, latestDate)