def test_build_monthly_stats(self): results = StringIO() stats = ApacheLocalStats() stats.build_monthly_stats(2008, 11, log_sample, results) results.seek(0) reader = csv.reader(results) res = list(reader) # first, make sure all entries have values for line in res: self.assertEquals(len(line), 4) self.assert_('' not in line) self.assertEquals(res[0], ['appwsgi', '344.tar.bz2', 'Mozilla/5.0', '1']) self.assertEquals( res[456], ['Mtrax', 'Mtrax-2.2.07-py2.5-win32.egg', 'Firefox/3', '1']) self.assertEquals( res[486], ['OpenPGP', 'OpenPGP-0.2.3.tar.gz', 'Firefox/3', '1']) self.assertEquals(len(res), 10043)
def test_apache_count(self): # creating stats so they can be used by # main() as distant stats stats = ApacheLocalStats() stats.build_monthly_stats(2008, 11, log_sample, bz2_file, compression='bz2') # now patching url so it return the built stats import urllib2 old_open = urllib2.urlopen def _open(url): class FakeUrl(object): def read(self): return open(bz2_file).read() return FakeUrl() urllib2.urlopen = _open # just to make sure it doesn't brake try: main(config_file, log_sample) finally: urllib2.urlopen = old_open
def test_build_daily_stats(self): stats = ApacheLocalStats() results = StringIO() stats.build_daily_stats(2008, 11, 18, log_sample, results) results.seek(0) reader = csv.reader(results) res = list(reader) # first, make sure all entries have values for line in res: self.assertEquals(len(line), 4) self.assert_('' not in line) self.assertEquals( res[0], ['4Suite-XML', '4Suite-XML-1.0.1.tar.bz2', 'Mozilla/5.0', '1']) self.assertEquals( res[456], ['PasteScript', 'PasteScript-0.3.1.tar.gz', 'Mozilla/5.0', '1']) self.assertEquals( res[486], ['Phebe', 'Phebe-0.1.1-py2.5.egg.asc', 'Mozilla/5.0', '1']) self.assertEquals(len(res), 8953)
def test_compression(self): stats = ApacheLocalStats() stats.build_monthly_stats(2008, 11, log_sample, bz2_file, compression='bz2') read = stats.read_stats(bz2_file) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], 'appwsgi')
def test_build_local_stats(self): # builds the standard stats local file stats = ApacheLocalStats() stats.build_local_stats(2008, 11, 18, log_sample, curdir) self.assert_(os.path.exists(stats_file)) read = stats.read_stats(stats_file) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], '4Suite-XML')
def test_build_local_stats(self): # builds the standard stats local file stats = ApacheLocalStats() stats.build_local_stats(2008, 11, 18, log_sample, curdir) self.assert_(os.path.exists(stats_file)) read = stats.read_stats(stats_file) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], '4Suite-XML')
def test_compression(self): stats = ApacheLocalStats() stats.build_monthly_stats(2008, 11, log_sample, bz2_file, compression='bz2') read = stats.read_stats(bz2_file) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], 'appwsgi')
def test_read_stats(self): results = StringIO() stats = ApacheLocalStats() stats.build_monthly_stats(2008, 11, log_sample, results) results.seek(0) read = stats.read_stats(results) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], 'appwsgi')
def test_read_stats(self): results = StringIO() stats = ApacheLocalStats() stats.build_monthly_stats(2008, 11, log_sample, results) results.seek(0) read = stats.read_stats(results) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], 'appwsgi')
def test_distant_stats(self): os.mkdir(mirror) url = 'http://example.com/mirror/daily/2008-11-18.bz2' stats = ApacheDistantLocalStats(mirror) self.assertEquals(list(stats.read_stats(url)), []) # let's build the stats local_stats = ApacheLocalStats() local_stats.build_monthly_stats(2008, 11, log_sample, bz2_file, compression='bz2') # now patching url so it return the built stats import urllib2 old_open = urllib2.urlopen def _open(url): class FakeUrl(object): def read(self): return open(bz2_file).read() return FakeUrl() urllib2.urlopen = _open read = stats.read_stats(url) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], 'appwsgi') # checking that the cache is filled self.assert_('2008-11-18.bz2' in os.listdir(mirror)) # removing the urlopen patch urllib2.urlopen = old_open # the cache should be activated now read = stats.read_stats(url) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], 'appwsgi')
def test_distant_stats(self): os.mkdir(mirror) url = 'http://example.com/mirror/daily/2008-11-18.bz2' stats = ApacheDistantLocalStats(mirror) self.assertEquals(list(stats.read_stats(url)), []) # let's build the stats local_stats = ApacheLocalStats() local_stats.build_monthly_stats(2008, 11, log_sample, bz2_file, compression='bz2') # now patching url so it return the built stats import urllib2 old_open = urllib2.urlopen def _open(url): class FakeUrl(object): def read(self): return open(bz2_file).read() return FakeUrl() urllib2.urlopen = _open read = stats.read_stats(url) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], 'appwsgi') # checking that the cache is filled self.assert_('2008-11-18.bz2' in os.listdir(mirror)) # removing the urlopen patch urllib2.urlopen = old_open # the cache should be activated now read = stats.read_stats(url) first_entry = read.next() self.assertEquals(first_entry['count'], '1') self.assertEquals(first_entry['packagename'], 'appwsgi')
def test_build_daily_stats(self): stats = ApacheLocalStats() results = StringIO() stats.build_daily_stats(2008, 11, 18, log_sample, results) results.seek(0) reader = csv.reader(results) res = list(reader) # first, make sure all entries have values for line in res: self.assertEquals(len(line), 4) self.assert_('' not in line) self.assertEquals(res[0], ['4Suite-XML', '4Suite-XML-1.0.1.tar.bz2', 'Mozilla/5.0', '1']) self.assertEquals(res[456], ['PasteScript', 'PasteScript-0.3.1.tar.gz', 'Mozilla/5.0', '1']) self.assertEquals(res[486], ['Phebe', 'Phebe-0.1.1-py2.5.egg.asc', 'Mozilla/5.0', '1']) self.assertEquals(len(res), 8953)
def test_apache_count(self): # creating stats so they can be used by # main() as distant stats stats = ApacheLocalStats() stats.build_monthly_stats(2008, 11, log_sample, bz2_file, compression='bz2') # now patching url so it return the built stats import urllib2 old_open = urllib2.urlopen def _open(url): class FakeUrl(object): def read(self): return open(bz2_file).read() return FakeUrl() urllib2.urlopen = _open # just to make sure it doesn't brake try: main(config_file, log_sample) finally: urllib2.urlopen = old_open
def test_build_monthly_stats(self): results = StringIO() stats = ApacheLocalStats() stats.build_monthly_stats(2008, 11, log_sample, results) results.seek(0) reader = csv.reader(results) res = list(reader) # first, make sure all entries have values for line in res: self.assertEquals(len(line), 4) self.assert_('' not in line) self.assertEquals(res[0], ['appwsgi', '344.tar.bz2', 'Mozilla/5.0', '1']) self.assertEquals(res[456], ['Mtrax', 'Mtrax-2.2.07-py2.5-win32.egg', 'Firefox/3', '1']) self.assertEquals(res[486], ['OpenPGP', 'OpenPGP-0.2.3.tar.gz', 'Firefox/3', '1']) self.assertEquals(len(res), 10043)
def main(config_file, logfile): """Populate the download counts.""" # Read config file p = ConfigParser.ConfigParser() p.read(config_file) # Read mirror infos mirrors = p.get('mirrors', 'folder') # Read server-relative URI prefix files_url = urlparse.urlsplit(p.get('webui', 'files_url'))[2] # Setup database connection dbconn, cursor = get_cursor(p) # create a log reader, that filters on files_url # build an iterator here with chain and all distant files cursor.execute("select * from mirrors") def read_distant_stats(mirror, filename): mirror_domain = urlparse.urlparse(mirror[0])[1] mirror_domain = os.path.join(mirrors, mirror_domain) distant_reader = ApacheDistantLocalStats(mirror_domain) stat_file_url = '%s/%s/%s' % (mirror[0], mirror[3], filename) return distant_reader.read_stats(stat_file_url) # it supposes it runs the program at day + 1 yesterday = datetime.datetime.now() - datetime.timedelta(1) filename = yesterday.strftime('%Y-%m-%d.bz2') mirrors = [read_distant_stats(mirror, filename) for mirror in cursor.fetchall()] logs = chain(*[ApacheLogReader(logfile, files_url)] + mirrors) _log('Working with local stats and %d mirror(s)' % len(mirrors)) # get last http access cursor.execute("select value from timestamps where name='http'") last_http = cursor.fetchone()[0] _log('Last time stamp was : %s' % last_http) downloads = {} # let's read the logs in the apache file for line in logs: day = int(line.get('day', yesterday.day)) month = line.get('month', yesterday.month) year = int(line.get('year', yesterday.year)) hour = int(line.get('hour', 0)) minute = int(line.get('min', 0)) sec = int(line.get('sec', 0)) date = DateTime(year, month, day, hour, minute, sec) zone = utc_offset(line.get('zone', 0)) date = date - zone count = int(line.get('count', 1)) if date < last_http: continue filename = line['filename'] _dotlog('.') # see if we have already read the old download count if not downloads.has_key(filename): cursor.execute("select downloads from release_files " "where filename=%s", (filename,)) record = cursor.fetchone() if not record: # No file entry. Could be a .sig file continue # make sure we're working with a number downloads[filename] = record[0] or 0 # add a download downloads[filename] += count if downloads != []: for filename, count in downloads.items(): # Update the download counts in the DB _log('Updating download count for %s: %s' % (filename, count)) cursor.execute("update release_files set downloads=%s " "where filename=%s", (count, filename)) # Update the download timestamp date = psycopg.TimestampFromMx(datetime.datetime.now()) cursor.execute("update timestamps set value=%s " "where name='http'", (date,)) dbconn.commit() # now creating the local stats file _log('Building local stats file') stats = ApacheLocalStats() stats_dir = p.get('mirrors', 'local-stats') if not os.path.exists(stats_dir): raise ValueError('"%s" folder not found (local-stats in config.ini)' \ % stats_dir) stats_file = os.path.join(stats_dir, filename) stats.build_daily_stats(yesterday.year, yesterday.month, yesterday.day, logfile, stats_file, files_url, 'bz2') # now creating the global stats file # which is built with the latest database counts _log('Building global stats file') globalstats_dir = p.get('mirrors', 'global-stats') if not os.path.exists(globalstats_dir): raise ValueError('"%s" folder not found (global-stats in config.ini)' \ % globalstats_dir) cursor.execute("select name, filename, downloads from release_files") def get_line(files_url): for line in cursor: data = {} data['day'] = yesterday.day data['month'] = yesterday.month data['year'] = yesterday.year data['filename'] = line[1] data['useragent'] = 'Unkown' # not stored yet data['packagename'] = line[0] data['count'] = line[2] yield data gstats = LocalStats() stats_file = os.path.join(globalstats_dir, filename) gstats.build_daily_stats(yesterday.year, yesterday.month, yesterday.day, get_line, stats_file, files_url, 'bz2')