def __init__(self, recipe, destination=None): self.computational_sequences = {} if type(recipe) is str: if os.path.isdir(recipe) is False: log.error("Dataset folder does not exist ...", error=True) from os import listdir from os.path import isfile, join computational_sequence_list = [ f for f in listdir(recipe) if isfile(join(recipe, f)) and f[-4:] == '.csd' ] for computational_sequence_fname in computational_sequence_list: this_sequence = computational_sequence( join(recipe, computational_sequence_fname)) self.computational_sequences[ this_sequence.metadata["root name"]] = this_sequence if type(recipe) is dict: for entry, address in recipe.items(): self.computational_sequences[entry] = computational_sequence( address, destination) if len(self.computational_sequences.keys()) == 0: log.error("Dataset failed to initialize ...", error=True) log.success("Dataset initialized successfully ... ")
def __set_computational_sequences(self, new_computational_sequences_data, metadata_copy=True): #getting the old metadata from the sequence before replacing it. Even if this is a new computational sequence this will not cause an issue since old_metadat will just be empty old_metadata = { m: self.computational_sequences[m].metadata for m in list(self.computational_sequences.keys()) } self.computational_sequences = {} for sequence_name in list(new_computational_sequences_data.keys()): self.computational_sequences[ sequence_name] = computational_sequence(sequence_name) self.computational_sequences[sequence_name].setData( new_computational_sequences_data[sequence_name], sequence_name) if metadata_copy: #if there is no metadata for this computational sequences from the previous one or no previous computational sequenece if sequence_name not in list(old_metadata.keys()): log.error( "Metadata not available to copy ..., please provide metadata before writing to disk later", error=False) self.computational_sequences[sequence_name].setMetadata( old_metadata[sequence_name], sequence_name) self.computational_sequences[ sequence_name].rootName = sequence_name
def add_computational_sequences(self, recipe, destination): for entry, address in recipe.items(): if entry in self.computational_sequences: log.error( "Dataset already contains <%s> computational sequence ..." % entry) self.computational_sequences[entry] = computational_sequence( address, destination)
def __init__(self,recipe,destination=None): self.computational_sequences={} if type(recipe) is not dict: log.error("Dataset recipe must be a dictionary type object ...") for entry, address in recipe.iteritems(): self.computational_sequences[entry]=computational_sequence(address,destination)
def __set_computational_sequences(self, new_computational_sequences_data): self.computational_sequences = {} for sequence_name in list(new_computational_sequences_data.keys()): self.computational_sequences[ sequence_name] = computational_sequence(sequence_name) self.computational_sequences[sequence_name].setData( new_computational_sequences_data[sequence_name], sequence_name) self.computational_sequences[ sequence_name].rootName = sequence_name
def readFromCSD(self): labelCompSeq = computational_sequence( self.root + 'CMU_MOSI_Opinion_Labels.csd').data facetCompSeq = computational_sequence( self.root + 'CMU_MOSI_VisualFacet_4.1.csd').data wordCompSeq = computational_sequence( self.root + 'CMU_MOSI_ModifiedTimestampedWords.csd').data covarepCompSeq = computational_sequence(self.root + 'CMU_MOSI_COVAREP.csd').data self.vidList = [] self.sidList = [] self.labelList = [] self.facetList = [] self.facetInterval = [] self.covarepList = [] self.covarepInterval = [] self.wordList = [] self.wordInterval = [] wordids_List = [] for i, vid in enumerate(labelCompSeq): if gc.debug: if i > 0: break if i == 88 or i == 66: continue print("processing video %d, uid %s" % (i, vid)) labels = labelCompSeq[vid]['features'] sen_intervals = labelCompSeq[vid]['intervals'] facet = facetCompSeq[vid]['features'] facet_intervals = facetCompSeq[vid]['intervals'] covarep = covarepCompSeq[vid]['features'] covarep_intervals = covarepCompSeq[vid]['intervals'] words = wordCompSeq[vid]['features'] word_intervals = wordCompSeq[vid]['intervals'] # add basic infomation sen_num = 0 while sen_num < len(labels): self.labelList.append(labels[sen_num]) self.vidList.append(vid) self.sidList.append(sen_num) sen_num += 1 # add word ids start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(words) and mid( word_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(words) and mid( word_intervals[end]) < sen_intervals[sen_num][1]: end += 1 toAppend = [ word2id[word_feat[0].decode('utf-8')] for word_feat in words[start:end] ] wordids_List.append(toAppend) self.wordInterval.append(word_intervals[start:end]) if len(toAppend) > 50: print(len(toAppend)) start = end sen_num += 1 # add facets start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(facet) and mid( facet_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(facet) and mid( facet_intervals[end]) < sen_intervals[sen_num][1]: end += 1 self.facetList.append(facet[start:end]) self.facetInterval.append(facet_intervals[start:end]) start = end sen_num += 1 # add covarep start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(covarep) and mid( covarep_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(covarep) and mid( covarep_intervals[end]) < sen_intervals[sen_num][1]: end += 1 self.covarepList.append(covarep[start:end]) self.covarepInterval.append(covarep_intervals[start:end]) start = end sen_num += 1 emb_mat = np.random.randn(len(word2id), 300) if not gc.debug: f = open(gc.embed_path, 'r') found = 0 for line in f: content = line.strip().split() word = ' '.join(content[:-300]) if word in word2id: word_id = word2id[word] vector = np.asarray( list(map(lambda x: float(x), content[-300:]))) emb_mat[word_id, :] = vector found += 1 print(f"Found {found} words in the embedding file.") for list_id, word_ids in enumerate(wordids_List): toAppend = [] for word_id in word_ids: toAppend.append(emb_mat[word_id]) self.wordList.append(toAppend)
compseq[vid_key]["intervals"] = numpy.arange( start=0, stop=60 + 0.000001, step=60. / ((2 * num_entries) - 1)).reshape([num_entries, 2]) if __name__ == "__main__": vid_keys = [ "video1", "video2", "video3", "video4", "video5", "Hello", "World", "UG3sfZKtCQI" ] #let's assume compseq_1 is some modality with a random feature dimension compseq_1_data = {} compseq_1_feature_dim = numpy.random.randint(low=20, high=100, size=1) random_init(compseq_1_data, compseq_1_feature_dim) compseq_1 = mmdatasdk.computational_sequence("my_compseq_1") compseq_1.setData(compseq_1_data, "my_compseq_1") #let's assume compseq_1 is some other modality with a random feature dimension compseq_2_data = {} compseq_2_feature_dim = numpy.random.randint(low=20, high=100, size=1) random_init(compseq_2_data, compseq_2_feature_dim) compseq_2 = mmdatasdk.computational_sequence("my_compseq_2") compseq_2.setData(compseq_2_data, "my_compseq_2") #NOTE: if you don't want to manually input the metdata, set it by creating a metdata key-value dictionary based on mmsdk/mmdatasdk/configurations/metadataconfigs.py compseq_1.deploy("compseq_1.csd") compseq_2.deploy("compseq_2.csd") #now creating a toy dataset from the toy compseqs mydataset_recipe = { "compseq_1": "compseq_1.csd",
def readFromCSD(self): labelCompSeq = computational_sequence( self.root + 'CMU_MOSI_Opinion_Labels.csd').data facetCompSeq = computational_sequence( self.root + 'CMU_MOSI_VisualFacet_4.1.csd').data wordCompSeq = computational_sequence( self.root + 'CMU_MOSI_TimestampedWordVectors.csd').data covarepCompSeq = computational_sequence(self.root + 'CMU_MOSI_COVAREP.csd').data self.vidList = [] self.sidList = [] self.labelList = [] self.facetList = [] self.facetInterval = [] self.covarepList = [] self.covarepInterval = [] self.wordList = [] self.wordInterval = [] for i, vid in enumerate(labelCompSeq): if gc.debug: if i > 5: break if i == 88 or i == 66: continue print "processing video %d, uid %s" % (i, vid) labels = labelCompSeq[vid]['features'] sen_intervals = labelCompSeq[vid]['intervals'] facet = facetCompSeq[vid]['features'] facet_intervals = facetCompSeq[vid]['intervals'] covarep = covarepCompSeq[vid]['features'] covarep_intervals = covarepCompSeq[vid]['intervals'] words = wordCompSeq[vid]['features'] word_intervals = wordCompSeq[vid]['intervals'] #add basic infomation sen_num = 0 while sen_num < len(labels): self.labelList.append(labels[sen_num]) self.vidList.append(vid) self.sidList.append(sen_num) sen_num += 1 #add word vectors start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(words) and mid( word_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(words) and mid( word_intervals[end]) < sen_intervals[sen_num][1]: end += 1 toAppend = [] toAppendInterval = [] for k in range(start, end): toAppend.append(words[k]) toAppendInterval.append(word_intervals[k]) self.wordList.append(toAppend) self.wordInterval.append(toAppendInterval) if len(toAppend) > 50: print len(toAppend) start = end sen_num += 1 #add facets start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(facet) and mid( facet_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(facet) and mid( facet_intervals[end]) < sen_intervals[sen_num][1]: end += 1 self.facetList.append(facet[start:end]) self.facetInterval.append(facet_intervals[start:end]) start = end sen_num += 1 #add covarep start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(covarep) and mid( covarep_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(covarep) and mid( covarep_intervals[end]) < sen_intervals[sen_num][1]: end += 1 self.covarepList.append(covarep[start:end]) self.covarepInterval.append(covarep_intervals[start:end]) start = end sen_num += 1
def readFromCSD(self): phoCompSeq = computational_sequence(self.root+'CMU_MOSI_TimestampedPhones.csd').data labelCompSeq = computational_sequence(self.root+'CMU_MOSI_Opinion_Labels.csd').data facetCompSeq = computational_sequence(self.root+'CMU_MOSI_VisualFacet_4.1.csd').data openfaceCompSeq = computational_sequence(self.root+'CMU_MOSI_OpenFace_V1.csd').data smileCompSeq = computational_sequence(self.root+'CMU_MOSI_OpenSmile.csd').data wordCompSeq = computational_sequence(self.root+'CMU_MOSI_TimestampedWordVectors.csd').data rawCompSeq = computational_sequence(self.root+'CMU_MOSI_TimestampedWords.csd').data self.vidList = [] self.sidList = [] self.labelList = [] self.phoList = [] self.phoInterval = [] self.facetList = [] self.facetInterval = [] self.openfaceList = [] self.openfaceInterval = [] self.smileList = [] self.wordList = [] self.wordInterval = [] self.rawList = [] fout = open("raw_data.txt", 'w') fout2 = open("raw_data_no_sp.txt", 'w') for i, vid in enumerate(labelCompSeq): if gc.debug: if i > 2: break if vid == "c5xsKMxpXnc": continue print("processing video %d, uid %s" % (i, vid)) labels = labelCompSeq[vid]['features'] sen_intervals = labelCompSeq[vid]['intervals'] phos = phoCompSeq[vid]['features'] pho_intervals = phoCompSeq[vid]['intervals'] facet = facetCompSeq[vid]['features'] facet_intervals = facetCompSeq[vid]['intervals'] openface = openfaceCompSeq[vid]['features'] openface_intervals = openfaceCompSeq[vid]['intervals'] smile = smileCompSeq[vid]['features'] words = wordCompSeq[vid]['features'] word_intervals = wordCompSeq[vid]['intervals'] raws = rawCompSeq[vid]['features'] timescale = sen_intervals[-1][1] #add phomemes and basic infomation start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(phos) and mid(pho_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(phos) and mid(pho_intervals[end]) < sen_intervals[sen_num][1]: end += 1 self.phoList.append([e[0] for e in phos[start:end]]) self.phoInterval.append(pho_intervals[start:end]) self.labelList.append(labels[sen_num]) #add smiles here, for the intervals of opensmile is the same as the labels self.smileList.append(smile[sen_num]) self.vidList.append(vid) self.sidList.append(sen_num) start = end sen_num += 1 #add word vectors start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(words) and mid(word_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(words) and mid(word_intervals[end]) < sen_intervals[sen_num][1]: end += 1 toAppend = [] toAppendInterval = [] for k in range(start, end): if raws[k] != 'sp' or not gc.no_sp: toAppend.append(words[k]) toAppendInterval.append(word_intervals[k]) self.wordList.append(toAppend) self.wordInterval.append(toAppendInterval) if len(toAppend) > 50: print(len(toAppend)) fout.write("%s: %s %f\n" % (vid, ''.join([x[0] + ' ' for x in raws[start:end]]), labels[sen_num])) fout2.write("%s: %s %f\n" % (vid, ''.join([x[0] + ' ' if x[0] != "sp" else '' for x in raws[start:end]]), labels[sen_num])) start = end sen_num += 1 #add facets start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(facet) and mid(facet_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(facet) and mid(facet_intervals[end]) < sen_intervals[sen_num][1]: end += 1 self.facetList.append(facet[start:end]) self.facetInterval.append(facet_intervals[start:end]) start = end sen_num += 1 #add openface start, end = 0, 0 sen_num = 0 while sen_num < len(labels): while start < len(openface) and mid(openface_intervals[start]) < sen_intervals[sen_num][0]: start += 1 end = start while end < len(openface) and mid(openface_intervals[end]) < sen_intervals[sen_num][1]: end += 1 self.openfaceList.append(openface[start:end]) self.openfaceInterval.append(openface_intervals[start:end]) start = end sen_num += 1