def bare_command(doc):
	Check the first word of each sentence is a verb AND is contained in list of key words

	Output: Count of matches

    keywords = set(
        [' be ', ' do ', ' please ', ' have ', ' thank ', ' hang ', ' let '])

    #doc = nlp(text)

    # Returns first word of every sentence along with the corresponding POS
    first_words = [
        ' ' + prep.prep_simple(str(sent[0])) + ' ' for sent in doc.sents

    POS_fw = [sent[0].tag_ for sent in doc.sents]

    # returns word if word is a verb and in list of keywords
    bc = [
        b for a, b in zip(POS_fw, first_words)
        if a == 'VB' and b not in keywords

    return len(bc)
def sentence_split(doc):

    # doc = nlp(text)
    sentences = [str(sent) for sent in doc.sents]
    sentences = [' ' + prep.prep_simple(str(s)) + ' ' for s in sentences]

    return sentences
def word_start(keywords, doc):
	Find first words in text such as conjunctions and affirmations

    key_res = []
    phrase2_count = []

    # doc = nlp(text)

    for key in keywords:

        first_words = [
            ' ' + prep.prep_simple(str(sent[0])) + ' ' for sent in doc.sents
        #first_words = [prep.prep_simple(str(fw)) for fw in first_words]
        cs = [w for w in first_words if w in keywords[key]]


    res = pd.DataFrame([key_res, phrase2_count], index=['Features',
    return res
import time
import prep
import pandas as pd
#from sentiment import Sentiment
import feature_extraction as fe
import spacy
import en_core_web_sm
#from negspacy.negation import Negex

nlp = en_core_web_sm.load()

start_time = time.clock()

text = 'I don\'t understand what you mean, but for me please could you let me know how you came to this way of thinking? Would you mind?'
clean_text = prep.prep_simple(text)
doc = nlp(clean_text)

PATH = '../Data/'
UPLOAD_FOLDER = '../Data/In/'
DOWNLOAD_FOLDER = '../data/Out/'
FOLDERS_IN = ['word_matches', 'spacy_pos', 'spacy_neg', 'word_start']
READ_TYPE = ['single', 'multiple', 'multiple', 'single']

#prep.commit_data(path, folders, words_in_line)
kw = prep.load_saved_data(UPLOAD_FOLDER, FOLDERS_IN)

sc1 = fe.count_matches(kw['word_matches'], text)

# Includes negation handling
        key_res = [
            'highPolarity_score', 'lowPolarity_score', 'highPolarity_count',
            'lowPolarity_count', 'highSubjectivity_score',
            'lowSubjectivity_score', 'highSubjectivity_count',

        sentiment_res = [
            pol_scores[0], pol_scores[1], pol_counts[0], pol_counts[1],
            sub_scores[0], sub_scores[1], sub_counts[0], sub_counts[1]

        res = pd.DataFrame(sentiment_res, index=key_res)

        return res

if __name__ == '__main__':

    text = 'I\'m not quite sure I understand for me, please could you let me know how you came to this way of thinking? Would you mind?'

    clean_text = prep.sentenciser(text)
    #clean_text = prep.phrase_split(text)
    clean_text = [prep.prep_simple(t) for t in clean_text]

    se = Sentiment(clean_text)
    sent = se.sentiment()

    #df = pd.concat(sent, axis = 0)
def feat_counts(text, kw):
	Main function for getting the features from text input.
	Calls other functions to load dataset, clean text, counts features,
	removes negation phrases.

		Text string
		Saved data of keywords and dependency pairs from pickle files

		Feature counts

    text = re.sub('(?<! )(?=[.,!?()])|(?<=[.,!?()])(?! )', r' ', text)
    text = text.lstrip()
    clean_text = prep.prep_simple(text)
    doc_text = nlp(text)
    doc_clean_text = nlp(clean_text)

    # quick test to check what's being counted in Positive_Emotion
    # t1 = [token for token in doc_clean_text]
    # print(t1)
    # for t in t1:
    # 	if ' ' + str(t) + ' ' in kw['word_matches']['Positive_Emotion']:
    # 		print(t)

    # Count key words and dependency pairs with negation
    kw_matches = count_matches(kw['word_matches'], doc_text)

    dep_pairs, negations = get_dep_pairs(doc_clean_text)
    dep_pair_matches = count_spacy_matches(kw['spacy_pos'], dep_pairs)

    dep_pairs_noneg = get_dep_pairs_noneg(doc_clean_text)
    disagreement = count_spacy_matches(kw['spacy_noneg'], dep_pairs_noneg)

    neg_dp = set([' ' + i[1] + ' ' for i in negations])
    neg_only = count_spacy_matches(kw['spacy_neg_only'], neg_dp)

    # count start word matches like conjunctions and affirmations
    start_matches = word_start(kw['word_start'], doc_text)

    scores = pd.concat(
        [kw_matches, dep_pair_matches, disagreement, start_matches, neg_only])
    scores = scores.groupby('Features').sum().sort_values(by='Counts',
    scores = scores.reset_index()

    # add remaining features
    bc = bare_command(doc_text)
    scores.loc[len(scores)] = ['Bare_Command', bc]

    ynq, whq = Question(doc_text)

    scores.loc[len(scores)] = ['YesNo_Questions', ynq]
    scores.loc[len(scores)] = ['WH_Questions', whq]

    adl = adverb_limiter(kw['spacy_tokentag'], doc_text)
    scores.loc[len(scores)] = ['Adverb_Limiter', adl]

    scores = scores.sort_values(by='Counts', ascending=False)

    tokens = token_count(doc_text)
    scores.loc[len(scores)] = ['Token_count', tokens]

    return scores