Exemplo n.º 1
0
    def parse_and_save(self):
        """Grab raw data from Google Analytics and save to the database"""
        from ga_auth import (init_service, get_profile_id)

        tokenfile = self.args[0]
        if not os.path.exists(tokenfile):
            raise Exception('Cannot find the token file %s' % self.args[0])

        try:
            self.service = init_service(self.args[0], None)
        except TypeError:
            print(
                'Have you correctly run the getauthtoken task and '
                'specified the correct file here')
            raise Exception('Unable to create a service')
        self.profile_id = get_profile_id(self.service)

        if len(self.args) > 1:
            if len(self.args) > 2 and self.args[1].lower() != 'internal':
                raise Exception('Illegal argument %s' % self.args[1])
            self.bulk_import()
        else:
            query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \
                    (PACKAGE_URL, self.resource_url_tag)
            packages_data = self.get_ga_data(query_filter=query)
            self.save_ga_data(packages_data)
            log.info("Saved %s records from google" % len(packages_data))
Exemplo n.º 2
0
    def command(self):
        self._load_config()

        from download_analytics import DownloadAnalytics
        from ga_auth import (init_service, get_profile_id)

        ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
        if not ga_token_filepath:
            print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
                  'Google Analytics token file under key: googleanalytics.token.filepath'
            return

        try:
            self.token, svc = init_service(ga_token_filepath, None)
        except TypeError:
            print ('Have you correctly run the getauthtoken task and '
                   'specified the correct token file in the CKAN config under '
                   '"googleanalytics.token.filepath"?')
            return

        downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc),
                                       delete_first=self.options.delete_first,
                                       skip_url_stats=self.options.skip_url_stats)

        time_period = self.args[0] if self.args else 'latest'
        if time_period == 'all':
            downloader.all_()
        elif time_period == 'latest':
            downloader.latest()
        else:
            # The month to use
            for_date = datetime.datetime.strptime(time_period, '%Y-%m')
            downloader.specific_month(for_date)
    def parse_and_save(self):
        """Grab raw data from Google Analytics and save to the database"""
        from ga_auth import (init_service, get_profile_id)

        tokenfile = self.args[0]
        if not os.path.exists(tokenfile):
            raise Exception('Cannot find the token file %s' % self.args[0])

        try:
            self.service = init_service(self.args[0], None)
        except TypeError:
            print ('Have you correctly run the getauthtoken task and '
                   'specified the correct file here')
            raise Exception('Unable to create a service')
        self.profile_id = get_profile_id(self.service)

        if len(self.args) > 1:
            if len(self.args) > 2 and self.args[1].lower() != 'internal':
                raise Exception('Illegal argument %s' % self.args[1])
            self.bulk_import()
        else:
            query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \
                    (PACKAGE_URL, self.resource_url_tag)
            packages_data = self.get_ga_data(query_filter=query)
            self.save_ga_data(packages_data)
            log.info("Saved %s records from google" % len(packages_data))
Exemplo n.º 4
0
    def parse_and_save(self, args):
        """Grab raw data from Google Analytics and save to the database"""
        from ga_auth import (init_service, get_profile_id)

        if len(args) == 1:
            raise Exception("Missing token file")
        tokenfile = args[1]
        if not os.path.exists(tokenfile):
            raise Exception('Cannot find the token file %s' % args[1])

        try:
            self.service = init_service(args[1], None)
        except TypeError:
            print(
                'Have you correctly run the getauthtoken task and '
                'specified the correct file here')
            raise Exception('Unable to create a service')
        self.profile_id = get_profile_id(self.service)
        if len(args) > 3:
            raise Exception('Too many arguments')

        given_start_date = None
        if len(args) == 3:
            given_start_date = datetime.datetime.strptime(args[2],
                                                          '%Y-%m-%d').date()

        packages_data = self.get_ga_data(start_date=given_start_date)
        self.save_ga_data(packages_data)
        self.log.info("Saved %s records from google" % len(packages_data))
Exemplo n.º 5
0
    def command(self):
        self._load_config()

        from download_analytics import DownloadAnalytics
        from ga_auth import (init_service, get_profile_id)

        ga_token_filepath = os.path.expanduser(
            config.get('googleanalytics.token.filepath', ''))
        if not ga_token_filepath:
            print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
                  'Google Analytics token file under key: googleanalytics.token.filepath'
            return

        try:
            svc = init_service(ga_token_filepath)
        except TypeError as e:
            print('Unable to create a service: {0}'.format(e))
            return

        downloader = DownloadAnalytics(svc,
                                       self.token,
                                       profile_id=get_profile_id(svc),
                                       delete_first=self.options.delete_first,
                                       stat=self.options.stat,
                                       print_progress=True)

        time_period = self.args[0] if self.args else 'latest'
        if time_period == 'all':
            downloader.all_()
        elif time_period == 'latest':
            downloader.latest()
        else:
            # The month to use
            for_date = datetime.datetime.strptime(time_period, '%Y-%m')
            downloader.specific_month(for_date)
    def parse_and_save(self):
        """Grab raw data from Google Analytics and save to the database"""
        from ga_auth import (init_service, get_profile_id)
        if len(self.args) == 0:
            raise Exception("Missing token file")
        tokenfile = self.args[0]
        if not os.path.exists(tokenfile):
            raise Exception('Cannot find the token file %s' % self.args[1])

        try:
            self.service = init_service(self.args[0])
        except TypeError as e:
            raise Exception('Unable to create a service: {0}'.format(e))

        self.profile_id = get_profile_id(self.service)
        if len(self.args) > 2:
            raise Exception('Too many arguments')

        given_start_date = None
        if len(self.args) == 2:
            given_start_date = datetime.datetime.strptime(self.args[1], '%Y-%m-%d').date()

        packages_data = self.get_ga_data(start_date=given_start_date)
        self.save_ga_data(packages_data)
        log.info("Saved %s records from google" % len(packages_data))
Exemplo n.º 7
0
    def command(self):
        self._load_config()

        from download_analytics import DownloadAnalytics
        from ga_auth import (init_service, get_profile_id)

        ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', ''))
        if not ga_token_filepath:
            print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
                  'Google Analytics token file under key: googleanalytics.token.filepath'
            return

        try:
            self.token, svc = init_service(ga_token_filepath, None)
        except TypeError:
            print ('Have you correctly run the getauthtoken task and '
                   'specified the correct token file in the CKAN config under '
                   '"googleanalytics.token.filepath"?')
            return

        downloader = DownloadAnalytics(svc, self.token, profile_id=get_profile_id(svc),
                                       delete_first=self.options.delete_first,
                                       stat=self.options.stat,
                                       print_progress=True)

        time_period = self.args[0] if self.args else 'latest'
        if time_period == 'all':
            downloader.all_()
        elif time_period == 'latest':
            downloader.latest()
        elif time_period == 'year':
            year_date = self.args[1]
            downloader.specific_year(year_date)
        elif time_period == 'year-month':
            year = self.args[1]
            month = self.args[2]
            downloader.specific_year_month(year, month)
        elif time_period == 'year-step':
            year_date = self.args[1]
            downloader.specific_year_step(year_date)
        elif time_period == 'only':
            only_data = str(self.args[1])
            selected_date = self.args[2]
            for_date = datetime.datetime.strptime(selected_date, '%Y-%m')
            downloader.specific_month_only(for_date, only_data)
        else:
            # The month to use
            for_date = datetime.datetime.strptime(time_period, '%Y-%m')
            downloader.specific_month(for_date)
Exemplo n.º 8
0
    def command(self):
        self._load_config()

        from download_analytics import DownloadAnalytics
        from ga_auth import init_service, get_profile_id

        ga_token_filepath = os.path.expanduser(config.get("googleanalytics.token.filepath", ""))
        if not ga_token_filepath:
            print "ERROR: In the CKAN config you need to specify the filepath of the " "Google Analytics token file under key: googleanalytics.token.filepath"
            return

        try:
            self.token, svc = init_service(ga_token_filepath, None)
        except TypeError:
            print (
                "Have you correctly run the getauthtoken task and "
                "specified the correct token file in the CKAN config under "
                '"googleanalytics.token.filepath"?'
            )
            return

        downloader = DownloadAnalytics(
            svc,
            self.token,
            profile_id=get_profile_id(svc),
            delete_first=self.options.delete_first,
            stat=self.options.stat,
            print_progress=True,
        )

        time_period = self.args[0] if self.args else "latest"
        if time_period == "all":
            downloader.all_()
        elif time_period == "latest":
            downloader.latest()
        else:
            # The month to use
            for_date = datetime.datetime.strptime(time_period, "%Y-%m")
            downloader.specific_month(for_date)
Exemplo n.º 9
0
    def command(self):
        self._load_config()

        from download_analytics import DownloadAnalytics
        from ga_auth import (init_service, get_profile_id)

        ga_token_filepath = os.path.expanduser(
            config.get('ds_stats.ga.token.filepath', ''))
        if not ga_token_filepath:
            print 'ERROR: In the CKAN config you need to specify the ' \
                  'filepath of the  Google Analytics token file under ' \
                  'key: ds_stats.ga.token.filepath'
            return

        try:
            self.token, svc = init_service(ga_token_filepath)
        except TypeError:
            print(
                'Have you correctly run the getauthtoken task and '
                'specified the correct token file in the CKAN config under '
                '"ds_stats.ga.token.filepath"?')
            return

        downloader = DownloadAnalytics(
            svc,
            self.token,
            profile_id=get_profile_id(svc),
            delete_first=self.options.delete_first,
            skip_url_stats=self.options.skip_url_stats)

        time_period = self.args[0] if self.args else 'latest'
        if time_period == 'all':
            downloader.all_()
        elif time_period == 'latest':
            downloader.latest()
        else:
            # The month to use
            for_date = datetime.datetime.strptime(time_period, '%Y-%m')
            downloader.specific_month(for_date)
Exemplo n.º 10
0
    def parse_and_save(self):
        """Grab raw data from Google Analytics and save to the database"""
        from ga_auth import (init_service, get_profile_id)

        tokenfile = self.args[0]
        if not os.path.exists(tokenfile):
            raise Exception('Cannot find the token file %s' % self.args[0])

        try:
            self.service = init_service(self.args[0])
        except TypeError as e:
            raise Exception('Unable to create a service: {0}'.format(e))
        self.profile_id = get_profile_id(self.service)

        if len(self.args) > 1:
            if len(self.args) > 2 and (self.args[1].lower() != 'internal' or self.args[1].lower() != 'downloads'):
                raise Exception('Illegal argument %s' % self.args[1])
            self.bulk_import(downloads=self.args[1].lower()=='downloads')
        else:
            query = 'ga:pagePath=~%s,ga:pagePath=~%s' % \
                    (PACKAGE_URL, self.resource_url_tag)
            packages_data = self.get_ga_data(query_filter=query)
            self.save_ga_data(packages_data)
            log.info("Saved %s records from google" % len(packages_data))
Exemplo n.º 11
0
    def command(self):
        """Grab raw data from Google Analytics and save to the database"""
        init = datetime.datetime.now()
        s_args = (self.args if self.args else '(no args)')
        print '[%s] - Init DgeGaReportLoadAnalytics command with args: %s.' % (
            init.strftime(DgeGaReportLoadAnalytics.datetime_format), s_args)
        try:
            self._load_config()
            from download_analytics import DownloadAnalytics
            from ga_auth import (init_service, get_profile_id)

            ga_token_filepath = config.get(
                'ckanext-dge-ga-report.token.filepath', '')
            if not ga_token_filepath or not os.path.exists(ga_token_filepath):
                print 'ERROR: In the CKAN config you need to specify the filepath of the ' \
                      'Google Analytics token file under key: googleanalytics.token.filepath'
                #return
                sys.exit(1)

            try:
                self.token, svc = init_service(ga_token_filepath, None)
            except TypeError:
                print(
                    'Unable to create a service. Have you correctly run the getauthtoken task and '
                    'specified the correct token file in the CKAN config under '
                    '"ckanext-dge-ga-report.token.filepath"?')
                #return
                sys.exit(1)
            save_print = self.args[0] if self.args else 'print'
            save = True if save_print == 'save' else False

            kind = self.args[1] if self.args else None
            if kind is None or kind not in DownloadAnalytics.KIND_STATS:
                print(
                    'A valid kind of statistics that you want to load must be '
                    'specified: %s' % DownloadAnalytics.KIND_STATS)
                #return
                sys.exit(1)

            downloader = DownloadAnalytics(
                svc,
                self.token,
                profile_id=get_profile_id(svc),
                delete_first=self.options.delete_first,
                stat=self.options.stat,
                print_progress=True,
                kind_stats=kind,
                save_stats=save)

            time_period = self.args[2] if self.args else 'latest'
            if time_period == 'latest':
                downloader.latest()
            elif time_period == 'last_month':
                now = datetime.datetime.now()
                if now.month == 1:
                    last_month = datetime.datetime(now.year - 1, 12, 1, 0, 0,
                                                   0)
                else:
                    last_month = datetime.datetime(now.year, now.month - 1, 1,
                                                   0, 0, 0)
                downloader.specific_month(last_month)
            else:
                # The month to use
                for_date = datetime.datetime.strptime(time_period, '%Y-%m')
                downloader.specific_month(for_date)
        except Exception as e:
            print 'Exception %s' % e
            sys.exit(1)
        finally:
            end = datetime.datetime.now()
            print '[%s] - End DgeGaReportLoadAnalytics command with args %s. Executed command in %s milliseconds.' % (
                end.strftime(DgeGaReportLoadAnalytics.datetime_format), s_args,
                (end - init).total_seconds() * 1000)
        sys.exit(0)
    def parse_and_save(self, args):
        """Grab raw data from Google Analytics and save to the database"""
        from ga_auth import get_profile_id

        self.init_service(args)

        self.profile_id = get_profile_id(self.service)
        if len(args) > 3:
            raise Exception('Too many arguments')

        given_start_date = None
        if len(args) == 3:
            given_start_date = datetime.datetime.strptime(args[2], '%Y-%m-%d').date()

        botFilters = [
            'ga:browser!@StatusCake',
            'ga:browser!@Python',
            'ga:sessionDurationBucket!=0',
            'ga:sessionDurationBucket!=1',
            'ga:sessionDurationBucket!=2',
            'ga:sessionDurationBucket!=3',
            'ga:networkDomain!=ua.es',
            'ga:networkDomain!=amazonaws.com',
            'ga:networkDomain!=kcura.com',
            'ga:networkDomain!=relativity.com',
        ]
        # list of queries to send to analytics
        queries = [{
            'type': 'package',
            'dates': self.get_dates_between_update(given_start_date, PackageStats.get_latest_update_date()),
            'filters': 'ga:pagePath=~%s,ga:pagePath=~%s' % (PACKAGE_URL, self.resource_url_tag),
            'metrics': 'ga:uniquePageviews, ga:entrances',
            'sort': 'ga:date',
            'dimensions': 'ga:pagePath, ga:date',
            'resolver': self.resolver_type_package,
            'save': self.save_type_package,
        }, {
            'type': 'resource',
            'dates': self.get_dates_between_update(given_start_date, ResourceStats.get_latest_update_date()),
            'filters': 'ga:pagePath=~%s' % self.resource_url_tag,
            'metrics': 'ga:uniquePageviews',
            'sort': 'ga:date',
            'dimensions': 'ga:pagePath, ga:date',
            'resolver': self.resolver_type_resource,
            'save': self.save_type_resource,
        }, {
            'type': 'visitorlocation',
            'dates': self.get_dates_between_update(given_start_date, AudienceLocationDate.get_latest_update_date()),
            'filters': ";".join(botFilters),
            'metrics': 'ga:sessions',
            'sort': 'ga:date',
            'dimensions': 'ga:country, ga:date',
            'resolver': self.resolver_type_visitorlocation,
            'save': self.save_type_visitorlocation,
        }, {
            'type': 'package_downloads',
            'dates': self.get_dates_between_update(given_start_date, PackageStats.get_latest_update_date()),
            'filters': "ga:eventCategory==Resource;ga:eventAction==Download",
            'metrics': "ga:uniqueEvents",
            'sort': "ga:date",
            'dimensions': "ga:pagePath, ga:date, ga:eventCategory",
            'resolver': self.resolver_type_package_downloads,
            'save': self.save_type_package_downloads,
        }, {
            'type': 'search_terms',
            'dates': self.get_dates_between_update(given_start_date, SearchStats.get_latest_update_date()),
            'filters': ";".join(botFilters),
            'metrics': "ga:searchUniques",
            'sort': "ga:date",
            'dimensions': "ga:searchKeyword, ga:date",
            'resolver': self.resolver_type_search_terms,
            'save': self.save_type_search_terms,
        }]

        # loop through queries, parse and save them to db
        for query in queries:
            data = {}
            current = datetime.datetime.now()
            self.log.info('performing analytics query of type: %s' % query['type'])
            print 'Querying type: %s' % query['type']
            for date in query['dates']:
                # run query with current query values
                results = self.ga_query(start_date=date,
                                        end_date=current,
                                        filters=query['filters'],
                                        metrics=query['metrics'],
                                        sort=query['sort'],
                                        dimensions=query['dimensions'])
                # parse query
                resolver = query['resolver']
                data = resolver(results, data)
                current = date

            save_function = query['save']
            print 'Saving type: %s' % query['type']
            save_function(data)
            model.Session.commit()
            print 'Saving done'
            self.log.info("Successfully saved analytics query of type: %s" % query['type'])