Exemplo n.º 1
0
def stylistic_features():
    data = []
    data_target = []
    for pair, conversation in speaker_pairs.iteritems():
        this_vector = []
        replies_x = get_replies(conversation, "x")
        replies_y = get_replies(conversation, "y")

        # print pair
        # # print conversation
        # # print ""
        # # print ""
        # print "REPLIES_X " + replies_x

        # print "REPLIES_Y " + replies_y
        # print ""
        # print ""
        avg_x = len(utils.tokenize_utterance(replies_x)) / len(
            conversation
        )  ##using future import above to have float number out of int division
        avg_y = len(utils.tokenize_utterance(replies_y)) / len(conversation)

        x_marker_count = utils.get_liwc_counts_from_utterance(replies_x)
        y_marker_count = utils.get_liwc_counts_from_utterance(replies_y)

        this_vector = this_vector + list(x_marker_count) + list(y_marker_count)
        this_vector.append(avg_x)
        this_vector.append(avg_y)

        x = all_utterances[conversation[0][0]]
        y = all_utterances[conversation[0][1]]
        if x["is_justice"] and not y["is_justice"]:
            label = high
        elif not x["is_justice"] and y["is_justice"]:
            label = low
        else:
            label = error

        if label != error:
            data.append(this_vector)
            data_target.append(label)

            # print "Data" + str(data)
            # print "Target" + str(data_target)
    return (data, data_target)
Exemplo n.º 2
0
def count_exhibits_feature(utterance_pair, speaker):
    utterance = all_utterances[utterance_pair[speaker]]["utterance"]
    utter_vec = utils.get_liwc_counts_from_utterance(utterance)

    exhibits_feature_counts = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    for marker_id in range(0, 8):

        if utter_vec[marker_id] > 0:
            exhibits_feature_counts[marker_id] = exhibits_feature_counts[marker_id] + 1.0
    return exhibits_feature_counts
Exemplo n.º 3
0
def count_coordination(utterance_pair):
    b_utterance = all_utterances[utterance_pair[1]]["utterance"]
    a_utterance = all_utterances[utterance_pair[0]]["utterance"]

    b_utter_vec = utils.get_liwc_counts_from_utterance(b_utterance)
    a_utter_vec = utils.get_liwc_counts_from_utterance(a_utterance)

    tokenized_b = utils.tokenize_utterance(b_utterance)
    tokenized_a = utils.tokenize_utterance(a_utterance)

    # throw this conversation out if difference in utterance length is greater than 20
    if abs(len(tokenized_b) - len(tokenized_a)) >= 20:
        return None

    coordination_counts = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    for marker_id in range(0, 8):

        if (a_utter_vec[marker_id] > 0) and (b_utter_vec[marker_id] > 0):
            coordination_counts[marker_id] = coordination_counts[marker_id] + 1.0

    return coordination_counts
Exemplo n.º 4
0
def get_liwc_features(combined_utterances):
    return list(utils.get_liwc_counts_from_utterance(combined_utterances))