Exemplo n.º 1
0
    def test_build_monthly_stats(self):
        results = StringIO()
        stats = ApacheLocalStats()
        stats.build_monthly_stats(2008, 11, log_sample, results)
        results.seek(0)

        reader = csv.reader(results)
        res = list(reader)

        # first, make sure all entries have values
        for line in res:
            self.assertEquals(len(line), 4)
            self.assert_('' not in line)

        self.assertEquals(res[0],
                          ['appwsgi', '344.tar.bz2', 'Mozilla/5.0', '1'])

        self.assertEquals(
            res[456],
            ['Mtrax', 'Mtrax-2.2.07-py2.5-win32.egg', 'Firefox/3', '1'])

        self.assertEquals(
            res[486], ['OpenPGP', 'OpenPGP-0.2.3.tar.gz', 'Firefox/3', '1'])

        self.assertEquals(len(res), 10043)
Exemplo n.º 2
0
    def test_apache_count(self):

        # creating stats so they can be used by
        # main() as distant stats
        stats = ApacheLocalStats()
        stats.build_monthly_stats(2008,
                                  11,
                                  log_sample,
                                  bz2_file,
                                  compression='bz2')
        # now patching url so it return the built stats
        import urllib2
        old_open = urllib2.urlopen

        def _open(url):
            class FakeUrl(object):
                def read(self):
                    return open(bz2_file).read()

            return FakeUrl()

        urllib2.urlopen = _open

        # just to make sure it doesn't brake
        try:
            main(config_file, log_sample)
        finally:
            urllib2.urlopen = old_open
Exemplo n.º 3
0
    def test_build_daily_stats(self):
        stats = ApacheLocalStats()
        results = StringIO()
        stats.build_daily_stats(2008, 11, 18, log_sample, results)
        results.seek(0)

        reader = csv.reader(results)
        res = list(reader)

        # first, make sure all entries have values
        for line in res:
            self.assertEquals(len(line), 4)
            self.assert_('' not in line)

        self.assertEquals(
            res[0],
            ['4Suite-XML', '4Suite-XML-1.0.1.tar.bz2', 'Mozilla/5.0', '1'])
        self.assertEquals(
            res[456],
            ['PasteScript', 'PasteScript-0.3.1.tar.gz', 'Mozilla/5.0', '1'])
        self.assertEquals(
            res[486],
            ['Phebe', 'Phebe-0.1.1-py2.5.egg.asc', 'Mozilla/5.0', '1'])

        self.assertEquals(len(res), 8953)
Exemplo n.º 4
0
 def test_compression(self):
     stats = ApacheLocalStats()    
     stats.build_monthly_stats(2008, 11, log_sample, 
                               bz2_file, compression='bz2') 
     
     read = stats.read_stats(bz2_file)
     first_entry = read.next()
     self.assertEquals(first_entry['count'], '1')
     self.assertEquals(first_entry['packagename'], 'appwsgi')
Exemplo n.º 5
0
    def test_build_local_stats(self):

        # builds the standard stats local file
        stats = ApacheLocalStats()
        stats.build_local_stats(2008, 11, 18, log_sample, curdir)
        self.assert_(os.path.exists(stats_file))

        read = stats.read_stats(stats_file)
        first_entry = read.next()
        self.assertEquals(first_entry['count'], '1')
        self.assertEquals(first_entry['packagename'], '4Suite-XML')
Exemplo n.º 6
0
    def test_build_local_stats(self):

        # builds the standard stats local file
        stats = ApacheLocalStats()    
        stats.build_local_stats(2008, 11, 18, log_sample, curdir)
        self.assert_(os.path.exists(stats_file))

        read = stats.read_stats(stats_file)
        first_entry = read.next()
        self.assertEquals(first_entry['count'], '1')
        self.assertEquals(first_entry['packagename'], '4Suite-XML')
Exemplo n.º 7
0
    def test_compression(self):
        stats = ApacheLocalStats()
        stats.build_monthly_stats(2008,
                                  11,
                                  log_sample,
                                  bz2_file,
                                  compression='bz2')

        read = stats.read_stats(bz2_file)
        first_entry = read.next()
        self.assertEquals(first_entry['count'], '1')
        self.assertEquals(first_entry['packagename'], 'appwsgi')
Exemplo n.º 8
0
    def test_read_stats(self):

        results = StringIO()
        stats = ApacheLocalStats()
        stats.build_monthly_stats(2008, 11, log_sample, results)
        results.seek(0)

        read = stats.read_stats(results)
        first_entry = read.next()

        self.assertEquals(first_entry['count'], '1')
        self.assertEquals(first_entry['packagename'], 'appwsgi')
Exemplo n.º 9
0
    def test_read_stats(self):

        results = StringIO()
        stats = ApacheLocalStats()    
        stats.build_monthly_stats(2008, 11, log_sample, results)
        results.seek(0)
        
        read = stats.read_stats(results)
        first_entry = read.next()

        self.assertEquals(first_entry['count'], '1')
        self.assertEquals(first_entry['packagename'], 'appwsgi')
Exemplo n.º 10
0
    def test_distant_stats(self):

        os.mkdir(mirror)
        url = 'http://example.com/mirror/daily/2008-11-18.bz2'
        stats = ApacheDistantLocalStats(mirror)

        self.assertEquals(list(stats.read_stats(url)), [])

        # let's build the stats
        local_stats = ApacheLocalStats()
        local_stats.build_monthly_stats(2008,
                                        11,
                                        log_sample,
                                        bz2_file,
                                        compression='bz2')

        # now patching url so it return the built stats
        import urllib2
        old_open = urllib2.urlopen

        def _open(url):
            class FakeUrl(object):
                def read(self):
                    return open(bz2_file).read()

            return FakeUrl()

        urllib2.urlopen = _open

        read = stats.read_stats(url)
        first_entry = read.next()

        self.assertEquals(first_entry['count'], '1')
        self.assertEquals(first_entry['packagename'], 'appwsgi')

        # checking that the cache is filled
        self.assert_('2008-11-18.bz2' in os.listdir(mirror))

        # removing the urlopen patch
        urllib2.urlopen = old_open

        # the cache should be activated now
        read = stats.read_stats(url)
        first_entry = read.next()
        self.assertEquals(first_entry['count'], '1')
        self.assertEquals(first_entry['packagename'], 'appwsgi')
Exemplo n.º 11
0
    def test_distant_stats(self):

        os.mkdir(mirror)
        url = 'http://example.com/mirror/daily/2008-11-18.bz2'
        stats = ApacheDistantLocalStats(mirror)
        
        self.assertEquals(list(stats.read_stats(url)), [])
       
        # let's build the stats
        local_stats = ApacheLocalStats()    
        local_stats.build_monthly_stats(2008, 11, log_sample, 
                                  bz2_file, compression='bz2') 
        
        # now patching url so it return the built stats
        import urllib2
        old_open = urllib2.urlopen
        def _open(url):
            class FakeUrl(object):
                def read(self):
                    return open(bz2_file).read()
            return FakeUrl()
        urllib2.urlopen = _open

        read = stats.read_stats(url)
        first_entry = read.next()

        self.assertEquals(first_entry['count'], '1')
        self.assertEquals(first_entry['packagename'], 'appwsgi')
        
        # checking that the cache is filled
        self.assert_('2008-11-18.bz2' in os.listdir(mirror))

        # removing the urlopen patch
        urllib2.urlopen = old_open
       
        # the cache should be activated now
        read = stats.read_stats(url) 
        first_entry = read.next()
        self.assertEquals(first_entry['count'], '1')
        self.assertEquals(first_entry['packagename'], 'appwsgi')
Exemplo n.º 12
0
    def test_build_daily_stats(self):
        stats = ApacheLocalStats()
        results = StringIO()
        stats.build_daily_stats(2008, 11, 18, log_sample, results)
        results.seek(0)

        reader = csv.reader(results)
        res = list(reader)

        # first, make sure all entries have values
        for line in res:
            self.assertEquals(len(line), 4)
            self.assert_('' not in line)

        self.assertEquals(res[0], 
              ['4Suite-XML', '4Suite-XML-1.0.1.tar.bz2', 'Mozilla/5.0', '1'])
        self.assertEquals(res[456],
              ['PasteScript', 'PasteScript-0.3.1.tar.gz', 'Mozilla/5.0', '1'])
        self.assertEquals(res[486],
              ['Phebe', 'Phebe-0.1.1-py2.5.egg.asc', 'Mozilla/5.0', '1'])
        
        self.assertEquals(len(res), 8953)
Exemplo n.º 13
0
    def test_apache_count(self):

        # creating stats so they can be used by 
        # main() as distant stats
        stats = ApacheLocalStats()    
        stats.build_monthly_stats(2008, 11, log_sample, 
                                  bz2_file, compression='bz2') 
        # now patching url so it return the built stats
        import urllib2
        old_open = urllib2.urlopen
        def _open(url):
            class FakeUrl(object):
                def read(self):
                    return open(bz2_file).read()
            return FakeUrl()
        urllib2.urlopen = _open

        # just to make sure it doesn't brake
        try:
            main(config_file, log_sample)        
        finally:
            urllib2.urlopen = old_open
Exemplo n.º 14
0
    def test_build_monthly_stats(self):
        results = StringIO()
        stats = ApacheLocalStats()    
        stats.build_monthly_stats(2008, 11, log_sample, results)
        results.seek(0)

        reader = csv.reader(results)
        res = list(reader)

        # first, make sure all entries have values
        for line in res:
            self.assertEquals(len(line), 4)
            self.assert_('' not in line)
        
        self.assertEquals(res[0],
           ['appwsgi', '344.tar.bz2', 'Mozilla/5.0', '1'])

        self.assertEquals(res[456],
           ['Mtrax', 'Mtrax-2.2.07-py2.5-win32.egg', 'Firefox/3', '1'])
        
        self.assertEquals(res[486],
           ['OpenPGP', 'OpenPGP-0.2.3.tar.gz', 'Firefox/3', '1'])

        self.assertEquals(len(res), 10043)
Exemplo n.º 15
0
def main(config_file, logfile):
    """Populate the download counts."""
    # Read config file
    p = ConfigParser.ConfigParser()
    p.read(config_file)

    # Read mirror infos
    mirrors = p.get('mirrors', 'folder')

    # Read server-relative URI prefix
    files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2]
    # Setup database connection
    dbconn, cursor = get_cursor(p)

    # create a log reader, that filters on files_url
    # build an iterator here with chain and all distant files
    cursor.execute("select * from mirrors")
    def read_distant_stats(mirror, filename):
        mirror_domain = urlparse.urlparse(mirror[0])[1]
        mirror_domain = os.path.join(mirrors, mirror_domain)
        distant_reader = ApacheDistantLocalStats(mirror_domain)
        stat_file_url = '%s/%s/%s' % (mirror[0], mirror[3], filename)
        return distant_reader.read_stats(stat_file_url)

    # it supposes it runs the program at day + 1
    yesterday = datetime.datetime.now() - datetime.timedelta(1)
    filename = yesterday.strftime('%Y-%m-%d.bz2')
    mirrors = [read_distant_stats(mirror, filename) 
               for mirror in cursor.fetchall()]

    logs = chain(*[ApacheLogReader(logfile, files_url)] + mirrors)
    _log('Working with local stats and %d mirror(s)' % len(mirrors))

    # get last http access
    cursor.execute("select value from timestamps where name='http'")
    last_http = cursor.fetchone()[0]
    _log('Last time stamp was : %s' % last_http)

    downloads = {}

    # let's read the logs in the apache file
    for line in logs:
        day = int(line.get('day', yesterday.day))
        month = line.get('month', yesterday.month)
        year = int(line.get('year', yesterday.year))
        hour = int(line.get('hour', 0))
        minute = int(line.get('min', 0))
        sec = int(line.get('sec', 0))
        date = DateTime(year, month, day, hour, minute, sec)
        zone = utc_offset(line.get('zone', 0))
        date = date - zone
        count = int(line.get('count', 1))
        if date < last_http:
            continue
        
        filename = line['filename']
    
        _dotlog('.')
        # see if we have already read the old download count
        if not downloads.has_key(filename):
            cursor.execute("select downloads from release_files "
                           "where filename=%s", (filename,))
            record = cursor.fetchone()
            if not record:
                # No file entry. Could be a .sig file
                continue
            # make sure we're working with a number
            downloads[filename] = record[0] or 0
        # add a download
        downloads[filename] += count

    if downloads != []:

        for filename, count in downloads.items():
            # Update the download counts in the DB
            _log('Updating download count for %s: %s' % (filename, count))
            cursor.execute("update release_files set downloads=%s "
                        "where filename=%s", (count, filename))
        
        # Update the download timestamp
        date = psycopg.TimestampFromMx(datetime.datetime.now())
        cursor.execute("update timestamps set value=%s "
                    "where name='http'", (date,))

        dbconn.commit()

    # now creating the local stats file
    _log('Building local stats file')
    stats = ApacheLocalStats()
    stats_dir = p.get('mirrors', 'local-stats')
    if not os.path.exists(stats_dir):
        raise ValueError('"%s" folder not found (local-stats in config.ini)' \
                    % stats_dir)
    stats_file = os.path.join(stats_dir, filename) 
    stats.build_daily_stats(yesterday.year, yesterday.month, yesterday.day,
                            logfile, stats_file, files_url, 'bz2')


    # now creating the global stats file
    # which is built with the latest database counts
    _log('Building global stats file')
    globalstats_dir = p.get('mirrors', 'global-stats')   
    if not os.path.exists(globalstats_dir):
        raise ValueError('"%s" folder not found (global-stats in config.ini)' \
                % globalstats_dir)
    cursor.execute("select name, filename, downloads from release_files")

    def get_line(files_url):
        for line in cursor:
            data = {}
            data['day'] = yesterday.day
            data['month'] = yesterday.month
            data['year'] = yesterday.year
            data['filename'] = line[1]
            data['useragent'] = 'Unkown' # not stored yet
            data['packagename'] = line[0]
            data['count'] = line[2]
            yield data

    gstats = LocalStats()
    stats_file = os.path.join(globalstats_dir, filename) 
    gstats.build_daily_stats(yesterday.year, yesterday.month, yesterday.day,
                             get_line, stats_file, files_url, 'bz2')