Пример #1
0
def distanceParams(vectors):
    if vectors == 'corpus':
        params = {
            'type': ['variance', 'avg-dist'],
            'vectors': ['tf-idf', 'probability'],
            'distance': distanceMeasures,
            'center': 'mean',
            'exp': 1.0
        }
        p = IdList(jp(fp(params), docSelectParams))
    elif vectors == 'world':
        params = {
            'type': ['variance', 'avg-dist'],
            'vectors': ['word2vec', 'glove'],
            'distance': cosine,
            'center': 'mean',
            'exp': 1.0
        }
        p = IdList(jp(fp(params), docSelectParams))
        params = {
            'type': ['variance', 'avg-dist'],
            'vectors': ['word2vec', 'glove', 'word2vec-avg', 'glove-avg'],
            'distance': [l1, l2],
            'center': 'mean',
            'exp': 1.0
        }
        p += jp(fp(params), docSelectParams)
    p.id = 'distance_params_%s_vectors' % vectors
    return p
Пример #2
0
def distanceParams(vectors=None):
    params = {
        'type': ['variance', 'avg-dist'],
        'vectors': None,
        'distance': distanceMeasures,
        'center': 'mean',
        'exp': 1.0
    }
    p = IdList(jp(fp(params), docSelectParams))
    p.id = 'distance_params'
    p = assignVectors(p, vectors)
    return p
Пример #3
0
def graphParams(vectors, distance):
    basic = {'type': 'graph', 'vectors': vectors, 'distance': distance}
    # Graph building with thresholding
    th = thresholds[(vectors, distance.__name__)]
    weightFilter = [[0, t] for t in th]
    thresh = {
        'algorithm': ['clustering', 'closeness'],
        'weightFilter': weightFilter,
        'weighted': [True, False],
        'center': 'mean',
    }
    threshNonWeighted = {
        'algorithm': ['communicability', 'num_connected'],
        'weightFilter': weightFilter,
        'weighted': False,
        'center': 'mean'
    }
    # Graph building without thresholding
    nothresh = {
        'algorithm': ['closeness', 'mst', 'clustering'],
        'weightFilter': None,
        'weighted': True,
        'center': 'mean'
    }
    p = jp(fp(basic), fp(thresh)) + jp(fp(basic), fp(threshNonWeighted)) + jp(
        fp(basic), fp(nothresh))
    p = IdList(jp(p, docSelectParams))
    # label parameter set as either 'world' or 'corpus'
    if vectors in ['word2vec', 'glove', 'word2vec-avg', 'glove-avg']:
        vecLabel = 'world'
    elif vectors in ['tf-idf', 'probability']:
        vecLabel = 'corpus'
    p.id = 'graph_params_%s_vectors' % vecLabel
    return p
Пример #4
0
def densityParams(vectors):
    if vectors == 'world':
        # world vectors are max. 300 in size, so they need
        # to be reduced to a dimension << 100
        dimReduce = [None, 5, 10, 20]
    elif vectors == 'corpus':
        dimReduce = [None, 5, 10, 20, 50, 100]
    basic = {
        'type': 'density',
        'covariance': ['diag', 'spherical'],
        'center': 'mean',
        'dimReduce': dimReduce
    }
    basic = IdList(jp(fp(basic), docSelectParams))
    basic.id = 'density_params'
    basic = assignVectors(basic, vectors)
    return basic
Пример #5
0
from doc_topic_coh.dataset.topic_splits import devTestSplit
from doc_topic_coh.coherence.measure_evaluation.utils import experiment, assignVectors
from doc_topic_coh.coherence.tools import flattenParams as fp, joinParams as jp, IdList
from pytopia.measure.topic_distance import cosine, l1, l2

docSelectParams = {'threshold': [10, 25, 50, 100]}
docSelectParams = fp(docSelectParams)

distanceMeasures = [cosine, l1, l2]

dev, test = devTestSplit()


def distanceParams(vectors=None):
    params = {
        'type': ['variance', 'avg-dist'],
        'vectors': None,
        'distance': distanceMeasures,
        'center': 'mean',
        'exp': 1.0
    }
    p = IdList(jp(fp(params), docSelectParams))
    p.id = 'distance_params'
    p = assignVectors(p, vectors)
    return p


thresholds = {
    ('tf-idf', 'cosine'): [
        0.92056,
        0.94344,