def create_feature(data_file): if os.path.exists(feature_output_file(data_file)): print('File exists {}.'.format(feature_output_file(data_file))) return question1_vectors, question2_vectors = sentence2vec(data_file) print(sys.argv[0], data_file, file=sys.stderr) df = pd.DataFrame() column_name = 'f{0}'.format(os.path.basename(feature_output_file(data_file)).split('_')[0]) df[column_name] = np.nan_to_num([kurtosis(x) for x in np.nan_to_num(question2_vectors)]) df[[column_name]].to_csv(feature_output_file(data_file), index=False, float_format='%.5f')
def create_feature(data_file, model: gensim.models.KeyedVectors): if os.path.exists(feature_output_file(data_file)): print('File exists {}.'.format(feature_output_file(data_file))) return question1_vectors, question2_vectors = sentence2vec(data_file) print(sys.argv[0], data_file, file=sys.stderr) df = pd.DataFrame() column_name = 'f{0}'.format( os.path.basename(feature_output_file(data_file)).split('_')[0]) values = [] df[column_name] = np.nan_to_num([ jaccard(x, y) for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors)) ]) df[[column_name]].to_csv(feature_output_file(data_file), index=False, float_format='%.5f')