def get_topic_proportions_for_every_image(): from dir_processing import DirProcessing landmarks_urls_list = [] person_ids = DirProcessing.get_all_person_ids() for person_id in person_ids: perform_ids = DirProcessing.get_all_perform_ids_from_person_id(person_id) for perform_id in perform_ids: landmarks_urls = DirProcessing.get_all_landmarks_urls_from_sequence(person_id, perform_id) landmarks_urls_list.extend(landmarks_urls) doc_num = len(landmarks_urls_list) dt_file = '../ctm-dist/CTM46/final-lambda.dat' dt_vector = np.loadtxt(dt_file) topic_num = dt_vector.size / doc_num dt_matrix = np.reshape(dt_vector, (doc_num, topic_num)) np.set_printoptions(suppress=True) final_theta = np.exp(dt_matrix) final_theta = final_theta / np.sum(final_theta, axis=1)[:, np.newaxis] return landmarks_urls_list, final_theta
def divide_persons_into_subsets(self): from dir_processing import DirProcessing person_ids = DirProcessing.get_all_person_ids() permute_ids = np.random.permutation(person_ids) total_person_num = len(person_ids) step_arr = np.linspace(0, total_person_num, self.subset_num + 1) step_arr = step_arr.astype(int) subsets_dict = dict.fromkeys(person_ids) for i in range(0, self.subset_num): for j in range(step_arr[i], step_arr[i + 1]): subsets_dict[permute_ids[j]] = i return subsets_dict
def generate_corpus_and_write_to_file(): """ generate the copus, write it to files and store the LSF corpus features """ import os import sys lib_path = os.path.abspath('../utilization/') sys.path.append(lib_path) from dir_processing import DirProcessing LSF.build_dictionary() lsf_corpus = [] person_ids = DirProcessing.get_all_person_ids() for person_id in person_ids: perform_ids = DirProcessing.get_all_perform_ids_from_person_id(person_id) for perform_id in perform_ids: landmarks_urls = DirProcessing.get_all_landmarks_urls_from_sequence(person_id, perform_id) expression_sequence = LSF.lsf_from_sequence(landmarks_urls) print 'The feature extraction of expression person S{} and perform time {} has ' \ 'been done.'.format(person_id, perform_id) lsf_corpus.append(expression_sequence) import cPickle with open('../model/corpus.pk', 'wb') as f: cPickle.dump(lsf_corpus, f) with open('../model/corpus.txt', 'w') as f: for expression_sequence in lsf_corpus: lsf_sequence = expression_sequence.lsf_sequence for lsf_document in lsf_sequence: f.write(str(len(lsf_document))) for word, count in lsf_document.iteritems(): wid = LSF.word2id[word] s = " %d:%d" %(wid, count) f.write(s) f.write("\n")