max_val_score = 0

while len(chosen_features) < 1:
    best_val = 0.
    best_feature = None

    for feature in feature_list:
        if feature in chosen_features: continue

        sent_encoder.feature_names = chosen_features + [feature]
        model = SimplePQModel(sent_encoder=sent_encoder,
                              clf_type=AdaBoostClassifier,
                              clf_args={
                                  'n_estimators':
                                  100,
                                  'base_estimator':
                                  DecisionTreeClassifier(
                                      max_depth=1, class_weight="balanced")
                              })
        model.fit(train_articles)

        val_accuracy = E.evaluate(model=model,
                                  articles=val_articles,
                                  verbose=0)
        test_accuracy = E.evaluate(model=model,
                                   articles=test_articles,
                                   verbose=0)

        res_str = "{}\t{:.1f}\t{:.1f}".format(
            ', '.join(chosen_features + [feature]), 100 * val_accuracy,
	'🇸', '🌈', '💙', '\u200e', 'å', '🖤', '‐', '̂', '💥', '😉', 'щ', 'ё', '🤷', 'ц', '💖', '🍹', '🍊', '{', '}', '🤔', '💋', '\U0001f9e1', '\u200f', '𝐞', '🛑', 'º', '🇹', 
	'☕', 'ɪ', '🌊', '💁', '🇺', '🤗', '🙏', '×', '💚', '¯', '🌺', '→', '♂', '전', '율', '리', '✈', 'ń', '💦', '☁', '🌟', 'ʼ', '‚', '👏', '😏', '🤦', '🎉', '🎄', '💪', '👄', 
	'👑', '♥', '¹', 'ā', '💍', '𝐖', '𝐨', '𝐥', '👌', '🥂', '💅', '☝', '👋', '😃', '⚾', '😅', '🛁', '🇷', '̀', 'ю', 'э', '▪', 'ᴛ', 'ɴ', 'ᴀ', 'ᴇ', '🐝', '➡', '🌅', '⛱', 'ś', 
	'🇧', '💕', '⚡', '🌎', '\\', 'ツ', 'œ', '👯', '😭', '💔', '👀', '🍕', '🎶', '🙆', 'ž', '😊', '�', '📍', '🍃', '💎', '⛵', '♡', '😳', '\U0001f90d', '😐', 'ù', '🌸', '¬', '‑', 
	'👎', '\U0001f9d8', '−', '🐋', '🎀', '👸', '😆', '💸', '😪', '🍎', '👭', '😋', '🖕', '😑', '🐈', '👜', '🙀', '😼', '😽', '👊', '´', '🤓', '\U0001f929', '😎', '🤡', '🎅', '🔱', 
	'💄', '⚜', '🇫', '𝐝', '𝐓', '𝐚', '𝐯', '𝐌', '𝐦', '𝐢', '𝐭', '¾', 'ℓ', '¨', '👰', '😁', '🏋', '😝', '👇', '😚', '💏', '🗳', '🏙', '🕌', 'ф', '👗', '🛍', 'ʀ', 'ʟ', '💌', '🛣', 
	'🚖', '🔍', '🏬', '🌉', '🎭', '🏠', '🌳', '📏', 'ą', '🏅', '😩', '💀', '🎃', '👶', '', '🥞', 'fi', '🐔', '🏕', '̊', '\U0001f974', '🙃', '✊', '̃', '🌚', '🌻', '😬', '🙋', '🙂', 
	'🇮', '☺', '🏄', '💐', '🏁', '🤠', '😢', '❗', '♐', '🍟', 'm', 'u', 'n', 'r', 'o', 'e', '🎼', 'ʔ', '≠', '😘', '😵', '🍅', '😮', '🐚', '🤸', '✮', '🦐', '🐠', '🌏', '⃣', '🛶', 
	'😻', '\U0001f9d0', '🇭', '🚜', 'ß', '💰', '\u2009', '¼', '💡', '🚁', '🇬', '🐣', '🗺', '\U0001f6f8', '🌑', '🤘', '🌋', '🥐', '🧀', '🍳', '🥓', '🍷', '🥑', '🍤', '🍸', '😰',
	 '─', '😜', '🌼', '🎠', '🙈', '🎡', '🏾', '🍑', '🍫', 'ć', '⛈', 'ʺ', '☾', '˚', '\U0001f976', '🏖', '\U0001f9da', '🐶', '🍁', '⚓', 'ż']


for char_sets in char_sets_options:
	for quantiles in [1, 2, 5, 10, 15]:
		sent_encoder = TrueCharPositionEncoder(char_sets = char_sets, quantiles=quantiles)
		model = SimplePQModel(sent_encoder=sent_encoder, clf_type=LogisticRegression, clf_args={'class_weight':'balanced', 'max_iter':1000, 'solver':'lbfgs'})
		#model = SimpleNNModel(sent_encoder=sent_encoder, layer_sizes=layer_sizes, layer_dropouts=layer_dropouts)
		model.fit(train_articles)
		#coefs = model.model.coef_[0]
		#coef_str = "\t".join([str(round(v, 3)) for v in coefs])
		#
		#val_accuracy = E.evaluate(model=model, articles=val_articles, verbose=0)
		test_accuracy = E.evaluate(model=model, articles=test_articles, verbose=0)
		res_str = "{}\t{}\t{}\t{:.2f}".format(sent_encoder.name, char_sets, quantiles, 100*test_accuracy)
		print(res_str)
		results_file.write(res_str+"\n")
		results_file.flush()


'''
for quantiles in [2, 5, 10, 20]:
	from sklearn.ensemble import AdaBoostClassifier
	from sklearn.tree import DecisionTreeClassifier
	from models.sentence_encoders import HandcraftedEncoder



	#sent_encoder = HandcraftedEncoder()
	sent_encoder = HandcraftedEncoder(precomputed_embeddings=settings.PRECOMPUTED_HANDCRAFTED_EMBEDDINGS_FNAME)
	feature_list = ["Quote_count", "Sent_position", "R_difficult", "POS_PRP", "POS_VB", "A_concreteness"] #HandcraftedEncoder._all_features + "best"
	#feature = "best"


	for feature in feature_list:
		print(feature)
		sent_encoder.set_features(feature)
		model = SimplePQModel(sent_encoder=sent_encoder, clf_type=AdaBoostClassifier, clf_args={'n_estimators':100, 'base_estimator':DecisionTreeClassifier(max_depth=1, class_weight="balanced")})
		print("training {}...".format(feature))
		model.fit(train_articles)
		print("generating...")

		combined_samples[feature] = generate_samples(model, test_articles)



elif model_name == "ngrams":
	from models.sentence_encoders import NGramEncoder

	for mode, n in [('char', 2), ('word', 1)]:
		print(mode, n)
		sent_encoder = NGramEncoder(mode=mode, n=n, store_results=False, vocab_size=1000)
		print("preparing encoder...")