Exemplo n.º 1
0
 def __init__(self, url='', site=''):
     """
     Variables go here, *not* outside of __init__
     """
     self.site = site
     self.url = url
     self.session = Manifest.session
     self.title = u''
     self.description = u''
     self.keywords = {}
     self.warnings = []
     self.translation = bytes.maketrans(
         punctuation.encode('utf-8'),
         str(u' ' * len(punctuation)).encode('utf-8'))
     self.social = {
         'facebook': {
             'shares': 0,
             'comments': 0,
             'likes': 0,
             'clicks': 0
         },
         'stumbleupon': {
             'stumbles': 0,
         }
     }
     super(Page, self).__init__()
Exemplo n.º 2
0
    def __init__(self, url='', base_domain=''):
        """
        Variables go here, *not* outside of __init__
        """

        self.base_domain = urlsplit(base_domain)
        self.parsed_url = urlsplit(url)
        self.url = url
        self.title = ''
        self.description = ''
        self.keywords = {}
        self.warnings = []
        self.translation = bytes.maketrans(
            punctuation.encode('utf-8'),
            str(' ' * len(punctuation)).encode('utf-8'))
        self.social = {
            'facebook': {
                'shares': 0,
                'comments': 0,
                'likes': 0,
                'clicks': 0
            },
        }
        self.links = []
        self.total_word_count = 0
        self.wordcount = Counter()
        self.bigrams = Counter()
        self.trigrams = Counter()
        self.stem_to_word = {}
        self.content_hash = None
Exemplo n.º 3
0
def freq_dist(data):
    """
    :param data: A string with sentences separated by '\n'
    :type data: str
	returns a dictionary with frequency of each word.
    """
    d = {}
    punc = punctuation.encode('utf-8')
    words = (word for line in data
             for word in line.translate(None, punc).decode('utf-8').split())
    for word in words:
        d[word] = d.get(word, 0) + 1
    return d
Exemplo n.º 4
0
 def __init__(self, url='', site=''):
     """
     Variables go here, *not* outside of __init__
     """
     self.site = site
     self.url = url
     self.title = u''
     self.description = u''
     self.keywords = u''
     self.warnings = []
     self.social = {}
     self.translation = bytes.maketrans(
         punctuation.encode('utf-8'),
         str(u' ' * len(punctuation)).encode('utf-8'))
     super(Page, self).__init__()