示例#1
0
def documents2feature_vectors(documents):
    vectorizer = PolitenessFeatureVectorizer()
    fks = False
    X, y = [], []
    for d in documents:
        fs = vectorizer.features(d)
        if not fks:
            fks = sorted(fs.keys())
        fv = [fs[f] for f in fks]
        # If politeness score > 0.0,
        # the doc is polite, class=1
        l = 1 if d['score'] > 0.0 else 0
        X.append(fv)
        y.append(l)
    X = csr_matrix(np.asarray(X))
    y = np.asarray(y)
    return X, y
def documents2feature_vectors(documents):
    vectorizer = PolitenessFeatureVectorizer()
    fks = False
    X, y = [], []
    for d in documents:
        fs = vectorizer.features(d)
        if not fks:
            fks = sorted(fs.keys())
        fv = [fs[f] for f in fks]
        # If politeness score > 0.0, 
        # the doc is polite, class=1
        l = 1 if d['score'] > 0.0 else 0
        X.append(fv)
        y.append(l)
    X = csr_matrix(np.asarray(X))
    y = np.asarray(y)
    return X, y
示例#3
0
def get_features(requests):
    vectorizer = PolitenessFeatureVectorizer()
    fks = False
    X, y = [], []
    for req in requests:
        # get unigram, bigram features + politeness strategy features
        # in this specific document
        # vectorizer returns {feature-name: bool_value} dict
        # a matrix of zeros and ones
        fs = vectorizer.features(req)
        if not fks:
            fks = sorted(fs.keys())
        # get features vector
        fv = [fs[k] for k in fks]
        # If politeness score > 0.0, 
        # the doc is polite, class = 1
        if req['score'] > 0.0:
            l = 1 
        else:
            l = 0
        X.append(fv)
        y.append(l)
    # Single-row sparse matrix
    # where np.asarray converts the input to an array.
    #X = csr_matrix(np.asarray(X))
    X = np.asarray(X)
    # format 
    y = np.asarray(y)
    y_ = np.zeros((len(y), 2)) 
    for i in range(len(y)):
        if y[i] == 1:
            y_[i][1] = 1
        else:
            y_[i][0] = 1
    y = y_
    return X, y
示例#4
0
    get_parses("That is weird.  Why can't you just store the \"Range\"?"))
TEST_DOCUMENTS.append(
    get_parses(
        "This was supposed to have been moved to <url> per the cfd. why wasn't it moved?"
    ))
TEST_DOCUMENTS.append(get_parses("You are wrong. But the approach is correct"))

# TEST_DOCUMENTS.append(get_parses(""))

pp = pprint.PrettyPrinter(indent=4)

fks = False
for each in TEST_DOCUMENTS:
    # print each['sentences']
    # print
    fs = vectorizer.features(each)
    for feature, score in fs.items():
        if score == 1:
            if (feature.startswith('feature_politeness')):
                # print feature
                if ('==Direct_question==' in feature):
                    each['sentences'][0] = "Sorry but " + each['sentences'][0]
                # if ('==2nd_person==' in feature):
                # 	li = []
                # 	for i in each['sentences'][0].split(" "):
                # 		y = 'we' if i.lower() == 'you' else i
                # 		li.append(y)
                # 	each['sentences'] = sent_tokenize(' '.join(li))
                if ('==Direct_start==' in feature):
                    li = each['sentences'][0].split(" ")
                    li[0] = 'Do'