Exemplo n.º 1
0
from operator import add
from itertools import chain

from sift.models.text import EntityMentions
from sift.util import ngrams
from sift.dataset import ModelBuilder, Model

from sift import logging
log = logging.getLogger()


class EntitySkipGramEmbeddings(ModelBuilder, Model):
    """ Learn distributed representations for words and entities in a corpus via skip-gram embedding """
    def __init__(self,
                 dimensions=100,
                 min_word_count=500,
                 min_entity_count=10,
                 entity_prefix='en.wikipedia.org/wiki/',
                 exclude_words=False,
                 exclude_entities=False,
                 workers=4,
                 coalesce=None,
                 *args,
                 **kwargs):

        self.dimensions = dimensions
        self.min_word_count = min_word_count
        self.min_entity_count = min_entity_count
        self.filter_target = entity_prefix
        self.exclude_words = exclude_words
        self.exclude_entities = exclude_entities
Exemplo n.º 2
0
from operator import add
from itertools import chain

from sift.models.text import EntityMentions
from sift.util import ngrams
from sift.dataset import ModelBuilder, Model

from sift import logging
log = logging.getLogger()

class EntitySkipGramEmbeddings(ModelBuilder, Model):
    """ Learn distributed representations for words and entities in a corpus via skip-gram embedding """
    def __init__(
        self,
        dimensions=100,
        min_word_count=500,
        min_entity_count=10,
        entity_prefix='en.wikipedia.org/wiki/',
        exclude_words=False,
        exclude_entities=False,
        workers=4,
        coalesce=None,
        *args, **kwargs):

        self.dimensions = dimensions
        self.min_word_count = min_word_count
        self.min_entity_count = min_entity_count
        self.filter_target = entity_prefix
        self.exclude_words = exclude_words
        self.exclude_entities = exclude_entities
        self.workers = workers