def token_contexts_to_features(token_contexts, feature_extractors, workers=1): #single thread if workers == 1: return {token: np.vstack( [np.hstack([map_feature_extractor((context, extractor)) for extractor in feature_extractors] ) for context in contexts]) for token, contexts in token_contexts.items()} #multiple threads else: #resulting object res_dict = {} pool = multi.Pool(workers) print("Feature extractors: ", feature_extractors) for token, contexts in token_contexts.items(): logger.info('Multithreaded - Extracting contexts for token: ' + token + ' -- with ' + str(len(contexts)) + ' contexts...') #each context is paired with all feature extractors # context_list = [ (cont, feature_extractors) for cont in contexts ] extractors_output = [] for extractor in feature_extractors: context_list = [(cont, extractor) for cont in contexts] extractors_output.append(np.vstack(pool.map(map_feature_extractor, context_list))) res_dict[token] = np.hstack(extractors_output) return res_dict
def token_contexts_to_features_categorical(token_contexts, feature_extractors, workers=1): #single thread if workers == 1: return {token: [[x for a_list in [map_feature_extractor((context, extractor)) for extractor in feature_extractors] for x in a_list ] for context in contexts] for token, contexts in token_contexts.items()} #multiple threads else: #resulting object res_dict = {} pool = multi.Pool(workers) print("Feature extractors: ", feature_extractors) for token, contexts in token_contexts.items(): logger.info('Multithreaded - Extracting categorical contexts for token: ' + token + ' -- with ' + str(len(contexts)) + ' contexts...') #each context is paired with all feature extractors extractors_output = [] for extractor in feature_extractors: context_list = [(cont, extractor) for cont in contexts] extractors_output.append( pool.map(map_feature_extractor, context_list) ) # np.hstack and np.vstack can't be used because lists have objects of different types intermediate = [ [x[i] for x in extractors_output] for i in range(len(extractors_output[0])) ] res_dict[token] = [ flatten(sl) for sl in intermediate ] return res_dict