def __init__(self, text_or_tokens, max_sentence_length=399): if isinstance(text_or_tokens, Sentence): self.sentrep = text_or_tokens.sentrep elif isinstance(text_or_tokens, basestring): self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>', max_sentence_length) else: self.sentrep = parser.SentRep(text_or_tokens)
def __init__(self, text_or_tokens): if isinstance(text_or_tokens, parser.SentRep): # ensure that Python owns the pointer text_or_tokens.this.acquire() self.sentrep = text_or_tokens elif isinstance(text_or_tokens, Sentence): self.sentrep = text_or_tokens.sentrep elif isinstance(text_or_tokens, basestring): self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>') else: # text_or_tokens is a sequence -- need to make sure that each # element is a string to avoid crashing text_or_tokens = [ parser.ptbEscape(str(token)) for token in text_or_tokens ] self.sentrep = parser.SentRep(text_or_tokens)