class BiggerDBTest(unittest.TestCase):
    def setUp(self):
        if os.path.isfile('testbigger.db'):
            os.remove('testbigger.db')
        self.fixture = ProcessWebscope('testbigger.db', log=False)

    def tearDown(self):
        del(self.fixture)

    def testProcess(self):
        self.fixture.process_file('tests/data/testdata.gz')
class BiggerDBTest(unittest.TestCase):
    def setUp(self):
        if os.path.isfile('testbigger.db'):
            os.remove('testbigger.db')
        self.fixture = ProcessWebscope('testbigger.db', log=False)

    def tearDown(self):
        del (self.fixture)

    def testProcess(self):
        self.fixture.process_file('tests/data/testdata.gz')
class FreshDBTest(unittest.TestCase):
    def setUp(self):
        self.dbname = 'test.db'
        if os.path.isfile(self.dbname):
            os.remove(self.dbname)
        self.fixture = ProcessWebscope(self.dbname, log=False)
        self.testfile = 'tests/data/minimal.gz'

    def tearDown(self):
        del (self.fixture)

    def testBasicProcess(self):
        self.fixture.process_file(self.testfile)

        conn = sqlite3.connect(self.dbname)
        c = conn.cursor()
        c.execute('''SELECT COUNT(*) FROM event''')
        count = c.fetchone()[0]
        c.execute('''SELECT COUNT(*) FROM user''')
        user_count = c.fetchone()[0]
        c.execute('''SELECT * from user WHERE feat2=0.121789''')
        user = c.fetchone()
        c.execute('''SELECT COUNT(*) FROM pool''')
        pools = c.fetchone()[0]
        c.execute('''SELECT COUNT(*) from poolarticle''')
        pool_articles = c.fetchone()[0]
        c.execute('''SELECT * from article WHERE articleID=109511''')
        article = c.fetchone()

        self.assertEqual(5, count)
        self.assertEqual(count, user_count)
        self.assertEqual(
            (4, 1, 0.121789, 0.003283, 0.628306, 0.246422, 0.000200), user[1:])
        self.assertEqual(3, pools)
        self.assertEqual(20 + 21 + 20, pool_articles)
        self.assertEqual(
            (109511, 0, 1, 0.381149, 0.000129, 0.060038, 0.269129, 0.289554),
            article)
        conn.close()

    def testSkipProcess(self):
        self.fixture.process_file(self.testfile, skip_lines=2)
        conn = sqlite3.connect(self.dbname)
        c = conn.cursor()
        c.execute('''SELECT COUNT(*) FROM event''')
        count = c.fetchone()[0]
        c.execute('''SELECT displayed FROM event WHERE eventID=1''')
        displayed = c.fetchone()[0]

        self.assertEqual(3, count)
        self.assertEqual(109511, displayed)

        conn.close()
class FreshDBTest(unittest.TestCase):
    def setUp(self):
        self.dbname = 'test.db'
        if os.path.isfile(self.dbname):
            os.remove(self.dbname)
        self.fixture = ProcessWebscope(self.dbname, log=False)
        self.testfile = 'tests/data/minimal.gz'

    def tearDown(self):
        del(self.fixture)

    def testBasicProcess(self):
        self.fixture.process_file(self.testfile)

        conn = sqlite3.connect(self.dbname)
        c = conn.cursor()
        c.execute('''SELECT COUNT(*) FROM event''')
        count = c.fetchone()[0]
        c.execute('''SELECT COUNT(*) FROM user''')
        user_count = c.fetchone()[0]
        c.execute('''SELECT * from user WHERE feat2=0.121789''')
        user = c.fetchone()
        c.execute('''SELECT COUNT(*) FROM pool''')
        pools = c.fetchone()[0]
        c.execute('''SELECT COUNT(*) from poolarticle''')
        pool_articles = c.fetchone()[0]
        c.execute('''SELECT * from article WHERE articleID=109511''')
        article = c.fetchone()

        self.assertEqual(5, count)
        self.assertEqual(count, user_count)
        self.assertEqual((4, 1, 0.121789, 0.003283,
                          0.628306, 0.246422, 0.000200), user[1:])
        self.assertEqual(3, pools)
        self.assertEqual(20 + 21 + 20, pool_articles)
        self.assertEqual((109511, 0, 1, 0.381149, 0.000129,
                          0.060038, 0.269129, 0.289554), article)
        conn.close()

    def testSkipProcess(self):
        self.fixture.process_file(self.testfile, skip_lines=2)
        conn = sqlite3.connect(self.dbname)
        c = conn.cursor()
        c.execute('''SELECT COUNT(*) FROM event''')
        count = c.fetchone()[0]
        c.execute('''SELECT displayed FROM event WHERE eventID=1''')
        displayed = c.fetchone()[0]

        self.assertEqual(3, count)
        self.assertEqual(109511, displayed)

        conn.close()
 def setUp(self):
     if os.path.isfile('testbigger.db'):
         os.remove('testbigger.db')
     self.fixture = ProcessWebscope('testbigger.db', log=False)
 def setUp(self):
     self.dbname = 'test.db'
     if os.path.isfile(self.dbname):
         os.remove(self.dbname)
     self.fixture = ProcessWebscope(self.dbname, log=False)
     self.testfile = 'tests/data/minimal.gz'
 def setUp(self):
     if os.path.isfile('testbigger.db'):
         os.remove('testbigger.db')
     self.fixture = ProcessWebscope('testbigger.db', log=False)
 def setUp(self):
     self.dbname = 'test.db'
     if os.path.isfile(self.dbname):
         os.remove(self.dbname)
     self.fixture = ProcessWebscope(self.dbname, log=False)
     self.testfile = 'tests/data/minimal.gz'
示例#9
0
"""
run_process.py
James Wang
Nov 26, 2014
"""
import os
import time
import datetime
from process_to_sqlite import ProcessWebscope

# process all files in the data directory
proc = ProcessWebscope('full.db', log=False)

skip = [4681992, 1936662, 0, 0, 0, 0, 0, 0, 0, 0]
counter = 0

t0 = time.time()
for file in os.listdir('Webscope/R6/'):
    if file.endswith('.gz'):
        print file
        proc.process_file('Webscope/R6/' + file, skip_lines=skip[counter])
        counter += 1
t1 = time.time()

the_time = str(datetime.timedelta(seconds=t1 - t0))
print('Processing all files took a total of {}'.format(the_time))
with open('gz_to_database_time.txt', 'w') as f:
    f.write('Total time = {}'.format(the_time))
示例#10
0
"""
run_process.py
James Wang
Nov 26, 2014
"""
import os
import time
import datetime
from process_to_sqlite import ProcessWebscope

# process all files in the data directory
proc = ProcessWebscope('full.db', log=False)

skip = [4681992, 1936662, 0, 0, 0, 0, 0, 0, 0, 0]
counter = 0

t0 = time.time()
for file in os.listdir('Webscope/R6/'):
    if file.endswith('.gz'):
        print file
        proc.process_file('Webscope/R6/' + file, skip_lines=skip[counter])
        counter += 1
t1 = time.time()

the_time = str(datetime.timedelta(seconds=t1-t0))
print('Processing all files took a total of {}'.format(the_time))
with open('gz_to_database_time.txt', 'w') as f:
    f.write('Total time = {}'.format(the_time))