from operator import add from itertools import chain from sift.models.text import EntityMentions from sift.util import ngrams from sift.dataset import ModelBuilder, Model from sift import logging log = logging.getLogger() class EntitySkipGramEmbeddings(ModelBuilder, Model): """ Learn distributed representations for words and entities in a corpus via skip-gram embedding """ def __init__(self, dimensions=100, min_word_count=500, min_entity_count=10, entity_prefix='en.wikipedia.org/wiki/', exclude_words=False, exclude_entities=False, workers=4, coalesce=None, *args, **kwargs): self.dimensions = dimensions self.min_word_count = min_word_count self.min_entity_count = min_entity_count self.filter_target = entity_prefix self.exclude_words = exclude_words self.exclude_entities = exclude_entities
from operator import add from itertools import chain from sift.models.text import EntityMentions from sift.util import ngrams from sift.dataset import ModelBuilder, Model from sift import logging log = logging.getLogger() class EntitySkipGramEmbeddings(ModelBuilder, Model): """ Learn distributed representations for words and entities in a corpus via skip-gram embedding """ def __init__( self, dimensions=100, min_word_count=500, min_entity_count=10, entity_prefix='en.wikipedia.org/wiki/', exclude_words=False, exclude_entities=False, workers=4, coalesce=None, *args, **kwargs): self.dimensions = dimensions self.min_word_count = min_word_count self.min_entity_count = min_entity_count self.filter_target = entity_prefix self.exclude_words = exclude_words self.exclude_entities = exclude_entities self.workers = workers