class ParticipantFeatureVectors: def __init__(self, nParticipant, from_file = False): self.nParticipant = nParticipant self.events = EventLogger() self.ratings = ParticipantRatings(self.nParticipant) self.featureVectors = {} # {1:{'Fp1_theta': 2453476, 'Fp1_slow_alpha': 482418, ... , 'avgSkinRes': 69'}} self.featureDF = pd.DataFrame # use special function self.Y = {} for trial in range(1,41): self.featureVectors[trial] = {} self.Y[trial] = 0 # seed and variables for splitting data self.randomSeed = random.randint(1, 1000000) self.X_train = {} self.X_test = {} self.X_validation = {} self.Y_train = {} self.Y_test = {} self.Y_validation = {} # if we not use precomputed feature from file - compute it from raw signals if not from_file: self.ratings = ParticipantRatings(self.nParticipant) self.physSignalsFeatures = ParticipantSignalsFeatures(self.nParticipant) self.physSignalsFeatures.computeFeatures(range(1, 41), range(1, 33), BANDS, FREQ, 0, 8063, ASYM_ELECTRODE_PAIRS, ASYM_BANDS) else: self.loadFeatureVectorsFromCSV() self.convertFeatureVectorsToDataFrame() def fillFeatureVectors(self): self.addEEGSpectralToFeatureVector() self.addEEGAsymetryToFeatureVector() self.addGSRToFeatureVector() def createYVector(self, yType = 'f'): self.Y = {} if yType == 'f': if self.ratings.getFamiliarity() != []: for trial in self.featureVectors.keys(): self.Y[trial] = self.ratings.familiarity.reset_index(drop=True).iloc[trial-1] elif yType == 'a': self.ratings.getArousal() for trial in self.featureVectors.keys(): self.Y[trial] = self.ratings.arousal.reset_index(drop=True).iloc[trial - 1] elif yType == 'v': self.ratings.getValence() for trial in self.featureVectors.keys(): self.Y[trial] = self.ratings.valence.reset_index(drop=True).iloc[trial - 1] elif yType == 'l': self.ratings.getLiking() for trial in self.featureVectors.keys(): self.Y[trial] = self.ratings.liking.reset_index(drop=True).iloc[trial - 1] elif yType == 'd': self.ratings.getDominance() for trial in self.featureVectors.keys(): self.Y[trial] = self.ratings.dominance.reset_index(drop=True).iloc[trial - 1] elif yType == 'save': for type in ['f', 'a', 'v', 'd', 'l']: self.createYVector(yType=type) self.saveYVectorToCSV(yType=type) else: print('No such Y vector type') def convertFeatureVectorsToDataFrame(self): self.featureDF = pd.DataFrame.from_dict(self.featureVectors, orient='index') def addEEGSpectralToFeatureVector(self): for trial in self.featureVectors.keys(): for electrode in range(len(EEG_CHANELS)): for band in BANDS.keys(): feature_name = EEG_CHANELS[electrode] + '_' + band self.featureVectors[trial][feature_name] = \ self.physSignalsFeatures.spectralEEGFeatures[trial][electrode+1][band] def addEEGAsymetryToFeatureVector(self): for trial in self.featureVectors.keys(): for electrodePair in ASYM_ELECTRODE_PAIRS: leftE, rightE = electrodePair for band in ASYM_BANDS.keys(): feature_name = EEG_CHANELS[leftE-1] + '-' + EEG_CHANELS[rightE-1] + '_' + band self.featureVectors[trial][feature_name] = \ self.physSignalsFeatures.spectralEEGAsymetry[trial][leftE][band] def addGSRToFeatureVector(self): for trial in self.featureVectors.keys(): self.featureVectors[trial]['avgSkinRes'] = self.physSignalsFeatures.averageSkinResistance[trial] def randomSplitSetForTraining(self, train=70, test=30, validation=0, seed=None): ''' Split self.featureVectors and self.Y in random train, test and validation parts in a given proportions :param train: proportion of train part, default 70 :param test: proportion of test part, default 30 :param validation: proportion of validation part, default 0 :param seed: seed for random, if None - there will be self.randomSeed used :return: self.X_train, self.X_test, self.X_validation, self.Y_train, self.Y_test, self.Y_validation - feature and target variable set divided in a given proportion ''' # init and fill proportion variables self.trainPart = train self.testPart = test self.validationPart = validation # get random sample from feature vector index of test proportion length if seed is None: seed = self.randomSeed random.seed(seed) train_index = self.featureVectors.keys() test_index = random.sample(train_index, round(len(self.featureVectors.keys()) * test / (train + test + validation))) test_index.sort() # to have ordered index train_index = [item for item in train_index if item not in test_index] # Not all model requires validation set, so we could skip it creation in such case if validation != 0: validation_index = random.sample(train_index, round( len(self.featureVectors.keys()) * validation / (train + test + validation))) validation_index.sort() train_index = [item for item in train_index if item not in validation_index] # create dict by created index self.X_train = {key: self.featureVectors[key] for key in train_index} try: self.Y_train = {key: self.Y[key] for key in train_index} except KeyError: errorMsg = 'Participant {} self.Y is empty, so no data for {}'.format(str(self.nParticipant), 'self.Y_train') self.events.addEvent(204, errorMsg) print(errorMsg) self.X_test = {key: self.featureVectors[key] for key in test_index} try: self.Y_test = {key: self.Y[key] for key in test_index} except KeyError: errorMsg = 'Participant {} self.Y is empty, so no data for {}'.format(str(self.nParticipant), 'self.Y_text') self.events.addEvent(204, errorMsg) print(errorMsg) if validation != 0: self.X_validation = {key: self.featureVectors[key] for key in validation_index} try: self.Y_validation = {key: self.Y[key] for key in validation_index} except KeyError: errorMsg = 'Participant {} self.Y is empty, so no data for {}'.format(str(self.nParticipant), 'self.Y_validation') self.events.addEvent(204, errorMsg) print(errorMsg) return self.X_train, self.Y_train, self.X_test, self.Y_test, self.X_validation, self.Y_validation def saveSplitedSetToCSV(self, seed=None): if seed is None: seed = self.randomSeed names = ['X_train', 'X_test', 'X_validation', 'Y_train', 'Y_test', 'Y_validation'] sets = [self.X_train, self.X_test, self.X_validation, self.Y_train, self.Y_test, self.Y_validation] pathname = 'training_data/seed={}&train={}&test={}&val={}/'.format(str(seed), str(self.trainPart), str(self.testPart), str(self.validationPart)) if not os.path.isdir(pathname): os.makedirs(pathname) for name, set in zip(names, sets): if set != {}: file_name = '{1}_{2}_{0}.csv'.format(str(seed), str(self.nParticipant), name) pd.DataFrame.from_dict(set, orient='index').to_csv(pathname+file_name) def saveFeatureVectorToCSV(self): filename = 'feature_vectors/FV{}.csv'.format(str(self.nParticipant)) pd.DataFrame.from_dict(self.featureVectors, orient='index').to_csv(filename) def saveYVectorToCSV(self, yType): filename = 'feature_vectors/YV'+str(self.nParticipant)+yType+'.csv' pd.DataFrame.from_dict(self.Y, orient='index').to_csv(filename) # in most cases we no need to recalculate features from data, so it's necessary to load the previously computed # (and saved to *.csv) featureVectors def loadFeatureVectorsFromCSV(self): filename = 'feature_vectors/FV{}.csv'.format(str(self.nParticipant)) self.featureVectors = pd.DataFrame.from_csv(filename).to_dict(orient='index')