def parse_gmail_log_body(dic_my_activity_gmail, gmail_logs):
     list_gmail_search_logs = TakeoutHtmlParser.find_log_body(gmail_logs)
     if list_gmail_search_logs != []:
         idx = 0
         for content in list_gmail_search_logs:
             content = str(content).strip()
             content = content.replace(u'\xa0', ' ')
             if idx == 0:
                 if content == 'Searched for':
                     dic_my_activity_gmail['type'] = 'Search'
                 else:
                     dic_my_activity_gmail['type'] = content
             else:
                 if idx == 1:
                     if content.startswith('<a href="'):
                         idx2 = content.find('">')
                         url = content[9:idx2]
                         url = unquote(url)
                         dic_my_activity_gmail['keyword_url'] = url
                         keyword = content[idx2 + 2:content.find('</a>')]
                         dic_my_activity_gmail[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 keyword)
                 elif content.endswith('UTC'):
                     dic_my_activity_gmail[
                         'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime(
                             content)
             idx += 1
Пример #2
0
 def parse_voice_audio_log_body(dic_my_activity_voice_audio,
                                voice_audio_logs):
     list_voice_audio_event_logs = TakeoutHtmlParser.find_log_body(
         voice_audio_logs)
     if list_voice_audio_event_logs != []:
         idx = 0
         for content in list_voice_audio_event_logs:
             content = str(content).strip()
             content = content.replace(u'\xa0', ' ')
             if idx == 0:
                 if content.startswith('Said'):
                     dic_my_activity_voice_audio['type'] = 'Search'
                     if content != 'Said':
                         dic_my_activity_voice_audio['keyword'] = content[
                             4:].lstrip()
                 else:
                     dic_my_activity_voice_audio['type'] = content
             else:
                 if idx == 1 and dic_my_activity_voice_audio[
                         'type'] == 'Search':
                     if content.startswith('<a href="'):
                         idx2 = content.find('">')
                         keyword = content[idx2 + 2:content.find('</a>')]
                         dic_my_activity_voice_audio[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 keyword)
                         url = content[9:idx2]
                         url = unquote(url)
                         dic_my_activity_voice_audio['keyword_url'] = url
                 elif content.endswith('UTC'):
                     dic_my_activity_voice_audio[
                         'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime(
                             content)
             idx += 1
Пример #3
0
 def parse_youtube_log_body(dic_my_activity_youtube, youtube_logs):
     list_youtube_event_logs = TakeoutHtmlParser.find_log_body(youtube_logs)
     if list_youtube_event_logs != []:
         idx = 0
         for content in list_youtube_event_logs:
             content = str(content).strip()
             content = content.replace(u'\xa0', ' ')
             if idx == 0:
                 if content == 'Searched for':
                     dic_my_activity_youtube['type'] = 'Search'
                 elif content.startswith('Watched'):
                     dic_my_activity_youtube['type'] = 'Watch'
                     if len(content) >= 8 and content.find(' ') >= 1:
                         dic_my_activity_youtube[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 content)
                 elif content.startswith('Visited'):
                     dic_my_activity_youtube['type'] = 'Visit'
                     if len(content) >= 8 and content.find(' ') >= 1:
                         dic_my_activity_youtube[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 content)
                 else:
                     dic_my_activity_youtube['type'] = content
             else:
                 if idx == 1:
                     if content.startswith('<a href="'):
                         idx2 = content.find('">')
                         keyword = content[idx2 + 2:content.find('</a>')]
                         dic_my_activity_youtube[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 keyword)
                         url = content[9:idx2]
                         url = unquote(url)
                         dic_my_activity_youtube[
                             'keyword_url'] = TakeoutHtmlParser.remove_special_char(
                                 url)
                 else:
                     if dic_my_activity_youtube['type'] == 'Watch':
                         if content.startswith('<a href="'):
                             idx2 = content.find('">')
                             channel_name = content[idx2 +
                                                    2:content.find('</a>')]
                             dic_my_activity_youtube[
                                 'channel_name'] = TakeoutHtmlParser.remove_special_char(
                                     channel_name)
                             url = content[9:idx2]
                             url = unquote(url)
                             dic_my_activity_youtube[
                                 'channel_url'] = TakeoutHtmlParser.remove_special_char(
                                     url)
                     if content.endswith('UTC'):
                         dic_my_activity_youtube[
                             'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime(
                                 content)
             idx += 1
    def parse_analytics_log_body(dic_my_activity_google_analytics,
                                 analytics_logs):
        list_analytics_event_logs = TakeoutHtmlParser.find_log_body(
            analytics_logs)
        if list_analytics_event_logs != []:
            idx = 0
            for content in list_analytics_event_logs:
                content = str(content).strip()
                content = content.replace(u'\xa0', ' ')
                if idx == 0:
                    if content == 'Used':
                        dic_my_activity_google_analytics['type'] = 'Use'
                    elif content == 'Visited':
                        dic_my_activity_google_analytics['type'] = 'Visit'
                    else:
                        dic_my_activity_google_analytics['type'] = content
                else:
                    if idx == 1:
                        if content.startswith('<a href="'):
                            idx2 = content.find('">')
                            keyword = content[idx2 + 2:content.find('</a>')]
                            dic_my_activity_google_analytics[
                                'keyword'] = TakeoutHtmlParser.remove_special_char(
                                    keyword)
                            url = content[9:idx2]
                            url = unquote(url)
                            dic_my_activity_google_analytics[
                                'keyword_url'] = url
                            o = urlparse(url)
                            if o.query.startswith('q=') and o.query.find(
                                    '&amp;'):
                                real_url = o.query[2:o.query.find('&amp;')]
                                real_url = unquote(real_url)
                                dic_my_activity_google_analytics[
                                    'keyword_url'] = real_url
                                o = urlparse(real_url)
                                if o.netloc.startswith('m.'):
                                    dic_my_activity_google_analytics[
                                        'used_device'] = 'mobile'

                            if o.netloc.startswith('m.'):
                                dic_my_activity_google_analytics[
                                    'used_device'] = 'mobile'
                    elif content.endswith('UTC'):
                        dic_my_activity_google_analytics[
                            'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime(
                                content)
                idx += 1
 def parse_assistant_log_body(dic_my_activity_assistant, assistant_logs):
     list_assistant_search_logs = TakeoutHtmlParser.find_log_body(
         assistant_logs)
     if list_assistant_search_logs != []:
         idx = 0
         for content in list_assistant_search_logs:
             content = str(content).strip()
             content = content.replace(u'\xa0', ' ')
             if idx == 0:
                 if content.startswith('Said'):
                     dic_my_activity_assistant['type'] = 'Search'
                     if len(content) >= 5 and content.find(' ') >= 1:
                         keyword = content.split(' ', 1)[1]
                         dic_my_activity_assistant[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 keyword)
                 elif content.startswith('Used'):
                     dic_my_activity_assistant['type'] = 'Use'
                     if len(content) >= 5 and content.find(' ') >= 1:
                         keyword = content.split(' ', 1)[1]
                         dic_my_activity_assistant[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 keyword)
                 elif content.startswith('Trained'):
                     dic_my_activity_assistant['type'] = 'Train'
                     if len(content) >= 8 and content.find(' ') >= 1:
                         keyword = content.split(' ', 1)[1]
                         dic_my_activity_assistant[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 keyword)
                 elif content.startswith('Selected') or content.startswith(
                         'Listened'):
                     dic_my_activity_assistant['type'] = 'Use'
                     if len(content) >= 9 and content.find(' ') >= 1:
                         dic_my_activity_assistant[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 content)
             else:
                 if idx == 1:
                     if content.startswith('<a href="'):
                         idx2 = content.find('">')
                         keyword = content[idx2 + 2:content.find('</a>')]
                         dic_my_activity_assistant[
                             'keyword'] = TakeoutHtmlParser.remove_special_char(
                                 keyword)
                         url = content[9:idx2]
                         url = unquote(url)
                         dic_my_activity_assistant['keyword_url'] = url
                 elif content.endswith('UTC'):
                     dic_my_activity_assistant[
                         'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime(
                             content)
                 elif idx != 1 and content != '<br/>':
                     if content.startswith('<a href="'):
                         idx2 = content.find('">')
                         keyword = content[idx2 + 2:content.find('</a>')]
                         dic_my_activity_assistant[
                             'result'] = TakeoutHtmlParser.remove_special_char(
                                 keyword)
                         url = content[9:idx2]
                         url = unquote(url)
                         dic_my_activity_assistant['result_url'] = url
                         o = urlparse(url)
                     else:
                         dic_my_activity_assistant[
                             'result'] += TakeoutHtmlParser.remove_special_char(
                                 content)
             idx += 1
Пример #6
0
    def parse_maps_log_body(dic_my_activity_maps, maps_logs):
        list_maps_event_logs = TakeoutHtmlParser.find_log_body(maps_logs)
        if list_maps_event_logs != []:
            idx = 0
            for content in list_maps_event_logs:
                content = str(content).strip()
                content = content.replace(u'\xa0', ' ')
                if idx == 0:
                    if content.startswith('<a href="'):
                        url = content[9:content.find('">')]
                        keyword = content.split('>')[1].split('</a')[0]
                        dic_my_activity_maps['keyword'] = keyword.replace(
                            "\"", "\'")

                        if keyword.startswith('View'):
                            dic_my_activity_maps['type'] = 'View'
                        else:
                            dic_my_activity_maps['type'] = 'Search'
                        url = unquote(url)
                        dic_my_activity_maps[
                            'keyword_url'] = TakeoutHtmlParser.remove_special_char(
                                url)
                        o = urlparse(url)
                        if o.path.startswith('/maps/@'):
                            list_value = o.path.lstrip('/maps/@').split(',')
                            if list_value != []:
                                latitude = list_value[0]
                                longitude = list_value[1]
                                dic_my_activity_maps[
                                    'keyword_latitude'] = latitude
                                dic_my_activity_maps[
                                    'keyword_longitude'] = longitude
                        elif o.path.find('@') >= 1:
                            list_value = o.path.split('@')[1].split(',')
                            if list_value != []:
                                latitude = list_value[0]
                                longitude = list_value[1]
                                dic_my_activity_maps[
                                    'keyword_latitude'] = latitude
                                dic_my_activity_maps[
                                    'keyword_longitude'] = longitude
                        elif o.query.find('sll=') >= 1:
                            list_value = o.query.split('sll=', 1)[1].split(',')
                            if list_value != []:
                                latitude = list_value[0]
                                longitude = list_value[1].split('&')[0]
                                dic_my_activity_maps[
                                    'keyword_latitude'] = latitude
                                dic_my_activity_maps[
                                    'keyword_longitude'] = longitude
                    else:
                        if content == 'Searched for':
                            dic_my_activity_maps['type'] = 'Search'
                        elif content.startswith('Shared'):
                            dic_my_activity_maps['type'] = 'Share'
                        elif content.startswith('Viewed'):
                            dic_my_activity_maps['type'] = 'View'
                            if content == 'Viewed For you':
                                dic_my_activity_maps[
                                    'keyword'] = TakeoutHtmlParser.remove_special_char(
                                        content)
                        elif content == 'Used Maps':
                            dic_my_activity_maps['type'] = 'Use'
                            dic_my_activity_maps[
                                'keyword'] = TakeoutHtmlParser.remove_special_char(
                                    content)
                        elif content.startswith('Answered'):
                            dic_my_activity_maps['type'] = 'Answer'
                            dic_my_activity_maps[
                                'keyword'] = TakeoutHtmlParser.remove_special_char(
                                    content)
                        else:
                            dic_my_activity_maps['type'] = content
                else:
                    if idx == 1:
                        if content.startswith('<a href="'):
                            idx2 = content.find('">')
                            keyword = content[idx2 + 2:content.find('</a>')]
                            dic_my_activity_maps[
                                'keyword'] = TakeoutHtmlParser.remove_special_char(
                                    keyword)
                            url = content[9:idx2]
                            url = unquote(url)
                            dic_my_activity_maps[
                                'keyword_url'] = TakeoutHtmlParser.remove_special_char(
                                    url)
                            o = urlparse(url)
                            if o.path.startswith(
                                    '/maps/') and o.path.find('@') >= 1:
                                list_value = o.path.split('@')[1].split(',')
                                if list_value != []:
                                    latitude = list_value[0]
                                    longitude = list_value[1]
                                    dic_my_activity_maps[
                                        'keyword_latitude'] = latitude
                                    dic_my_activity_maps[
                                        'keyword_longitude'] = longitude
                            elif o.query.find('sll=') >= 1:
                                list_value = o.query.split('sll=',
                                                           1)[1].split(',')
                                if list_value != []:
                                    latitude = list_value[0]
                                    longitude = list_value[1].split('&')[0]
                                    dic_my_activity_maps[
                                        'keyword_latitude'] = latitude
                                    dic_my_activity_maps[
                                        'keyword_longitude'] = longitude
                    else:
                        if content.endswith('UTC'):
                            dic_my_activity_maps[
                                'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime(
                                    content)
                        elif idx == 4 and dic_my_activity_maps[
                                'type'] == '1 notification':
                            dic_my_activity_maps[
                                'keyword'] = TakeoutHtmlParser.remove_special_char(
                                    content)
                idx += 1