Exemplo n.º 1
0
 def from_json_to_db(self):
     thread = ''
     db = Base("scnscraper/abap.pydb", save_to_file= True)
     # create new base with field names
     db.create('url', 'uid', 'type', 'author',
                    'title', 'date_time', 'tags', 'views',
                    'answers', 'resolve', 'upvotes', 'text', mode='override')
     i=0
     with open('scnsraper/threads.json', 'r') as file:
         for line in file:
             if(line.endswith(" }\n")):
                 thread += line
                 tokens = re.search(r"url:\s'(.*?)',\suid:\s'(.*?)',\stype:\s'(.*?)',\sauthor:\s'(.*?)',\stitle:\s'(.*?)',\sdate_time:\s'(.*?)',\stags:\s'(.*?)',\sviews:\s'(.*?)',\sanswers:\s'(.*?)',\sresolve:\s'(.*?)',\supvotes:\s'(.*?)', text:\s'((.|\n)*)'\s}", str(thread))
                 if tokens is not None:
                     db.insert(url = tokens.group(1), uid = tokens.group(2), type= tokens.group(3),
                             author=tokens.group(4), title = tokens.group(5), date_time = tokens.group(6),
                             tags = tokens.group(7), views = tokens.group(8), answers = tokens.group(9),
                             resolve = tokens.group(10), upvotes = tokens.group(11), text = tokens.group(12))
                     db.commit()
                 print ('\n--------------------------------------------\n')
                 thread = ''
             if(line.startswith(" ]")):
                 print("new page")
                 thread = ''
             if(line.endswith('\n') and (not line.startswith(" ]\n\n")) and (not line.endswith(" }\n"))):
                 thread += line
Exemplo n.º 2
0
def pydblite():
    from pydblite.pydblite import Base
    db = Base('dummy', save_to_file=False)
    # create new base with field names
    db.create('name', 'age', 'size')
    # insert new record
    db.insert(name='homer', age=23, size=1.84)
    # records are dictionaries with a unique integer key __id__
    # simple selection by field value
    records = db(name="homer")
    # complex selection by list comprehension
    res = [r for r in db if 30 > r['age'] >= 18 and r['size'] < 2]
    print("res:", res)
    # delete a record or a list of records
    r = records[0]
    db.delete(r)

    list_of_records = []
    r = db.insert(name='homer', age=23, size=1.84)
    list_of_records.append(db[r])
    r = db.insert(name='marge', age=36, size=1.94)
    list_of_records.append(db[r])

    # or generator expression
    for r in (r for r in db if r['name'] in ('homer', 'marge')):
        # print "record:", r
        pass

    db.delete(list_of_records)

    rec_id = db.insert(name='Bart', age=15, size=1.34)
    record = db[rec_id]  # the record such that record['__id__'] == rec_id

    # delete a record by its id
    del db[rec_id]

    # create an index on a field
    db.create_index('age')
    # update
    rec_id = db.insert(name='Lisa', age=13, size=1.24)

    # direct access by id
    record = db[rec_id]

    db.update(record, age=24)
    # add and drop fields
    db.add_field('new_field', default=0)
    db.drop_field('name')
    # save changes on disk
    db.commit()
Exemplo n.º 3
0
def pydblite():
    from pydblite.pydblite import Base
    db = Base('dummy', save_to_file=False)
    # create new base with field names
    db.create('name', 'age', 'size')
    # insert new record
    db.insert(name='homer', age=23, size=1.84)
    # records are dictionaries with a unique integer key __id__
    # simple selection by field value
    records = db(name="homer")
    # complex selection by list comprehension
    res = [r for r in db if 30 > r['age'] >= 18 and r['size'] < 2]
    print "res:", res
    # delete a record or a list of records
    r = records[0]
    db.delete(r)

    list_of_records = []
    r = db.insert(name='homer', age=23, size=1.84)
    list_of_records.append(db[r])
    r = db.insert(name='marge', age=36, size=1.94)
    list_of_records.append(db[r])

    # or generator expression
    for r in (r for r in db if r['name'] in ('homer', 'marge')):
        # print "record:", r
        pass

    db.delete(list_of_records)

    rec_id = db.insert(name='Bart', age=15, size=1.34)
    record = db[rec_id]  # the record such that record['__id__'] == rec_id

    # delete a record by its id
    del db[rec_id]

    # create an index on a field
    db.create_index('age')
    # update
    rec_id = db.insert(name='Lisa', age=13, size=1.24)

    # direct access by id
    record = db[rec_id]

    db.update(record, age=24)
    # add and drop fields
    db.add_field('new_field', default=0)
    db.drop_field('name')
    # save changes on disk
    db.commit()
Exemplo n.º 4
0
 def from_json_to_db(self):
     thread = ''
     db = Base("scnscraper/abap.pydb", save_to_file=True)
     # create new base with field names
     db.create('url',
               'uid',
               'type',
               'author',
               'title',
               'date_time',
               'tags',
               'views',
               'answers',
               'resolve',
               'upvotes',
               'text',
               mode='override')
     i = 0
     with open('scnsraper/threads.json', 'r') as file:
         for line in file:
             if (line.endswith(" }\n")):
                 thread += line
                 tokens = re.search(
                     r"url:\s'(.*?)',\suid:\s'(.*?)',\stype:\s'(.*?)',\sauthor:\s'(.*?)',\stitle:\s'(.*?)',\sdate_time:\s'(.*?)',\stags:\s'(.*?)',\sviews:\s'(.*?)',\sanswers:\s'(.*?)',\sresolve:\s'(.*?)',\supvotes:\s'(.*?)', text:\s'((.|\n)*)'\s}",
                     str(thread))
                 if tokens is not None:
                     db.insert(url=tokens.group(1),
                               uid=tokens.group(2),
                               type=tokens.group(3),
                               author=tokens.group(4),
                               title=tokens.group(5),
                               date_time=tokens.group(6),
                               tags=tokens.group(7),
                               views=tokens.group(8),
                               answers=tokens.group(9),
                               resolve=tokens.group(10),
                               upvotes=tokens.group(11),
                               text=tokens.group(12))
                     db.commit()
                 print('\n--------------------------------------------\n')
                 thread = ''
             if (line.startswith(" ]")):
                 print("new page")
                 thread = ''
             if (line.endswith('\n') and (not line.startswith(" ]\n\n"))
                     and (not line.endswith(" }\n"))):
                 thread += line
Exemplo n.º 5
0
    def test_open_existing(self):
        db = Base(test_db_name, save_to_file=True)
        db.create('unique_id', 'name', "active", mode="open")
        db.insert("123", "N", True)
        db.commit()

        # Just verify that it works to open an existing db.
        # The column names are ignored, therefore they should
        # equal the old column names
        db = Base(test_db_name, save_to_file=True)
        db.create('unique_id2', 'name2', "active2", mode="open")
        rec = db.insert("123", "N", True)
        db.commit()
        self.assertEqual(db.fields, ['unique_id', 'name', "active"])

        # mode="override" will overwrite existing db
        db = Base(test_db_name, save_to_file=True)
        db.create('unique_id', 'name', "active", mode="override")
        db.commit()
        self.assertEqual(len(self.filter_db), 0)

        # Equals passing mode=None
        self.assertRaises(IOError, db.create, 'unique_id', 'name', "active")
        self.assertRaises(ValueError,
                          db.create,
                          'unique_id',
                          'name',
                          "active",
                          mode="invalidmode")
Exemplo n.º 6
0
class InMemoryDBLite(InMemoryDB):
    """Class that implements all steps from Dextra's programming challange.

    Uses pydblite in-memory engine.
    """

    def __init__(self, name: str):
        logger.debug('Initializing DB.')
        self.connected = False
        self.name = name
        self.db = Base(name, save_to_file=False)

    def connect(self):
        logger.debug(f'Connecting to [{self.name}].')
        # When using pydblite in-memory engine, is unnecessary
        # connect to a db, so we just set the flag to true
        self.connected = True

    def disconnect(self):
        logger.debug(f'Disconnecting from [{self.name}].')
        if not self.connected:
            raise Exception('Not connected to db.')
        else:
            # When using pydblite in-memory engine, is unnecessary
            # disconnect from a db, so we just set the flag to false
            self.connected = False

    def create_schema(self, *args):
        logger.debug(f'Crating schema into [{self.name}].')
        if not self.connected:
            raise Exception('Not connected to db.')
        else:
            r = self.db.create(*args, mode='override')
            self.db.commit()
            return r

    def insert(self, item: dict):
        logger.debug(f'Inserting item into [{self.name}].')
        if not self.connected:
            raise Exception('Not connected to db.')
        else:
            r = self.db.insert(**item)
            self.db.commit()
            return r

    def insert_multiple(self, items: list):
        logger.debug(f'Inserting multiple items into [{self.name}].')
        if not self.connected:
            raise Exception('Not connected to db.')
        else:
            for item in items:
                r = self.db.insert(**item)
            self.db.commit()
            return r
Exemplo n.º 7
0
    def test_open_existing(self):
        db = Base(test_db_name, save_to_file=True)
        db.create('unique_id', 'name', "active", mode="open")
        db.insert("123", "N", True)
        db.commit()

        # Just verify that it works to open an existing db.
        # The column names are ignored, therefore they should
        # equal the old column names
        db = Base(test_db_name, save_to_file=True)
        db.create('unique_id2', 'name2', "active2", mode="open")
        rec = db.insert("123", "N", True)
        db.commit()
        self.assertEqual(db.fields, ['unique_id', 'name', "active"])

        # mode="override" will overwrite existing db
        db = Base(test_db_name, save_to_file=True)
        db.create('unique_id', 'name', "active", mode="override")
        db.commit()
        self.assertEqual(len(self.filter_db), 0)

        # Equals passing mode=None
        self.assertRaises(IOError, db.create, 'unique_id', 'name', "active")
        self.assertRaises(ValueError, db.create, 'unique_id', 'name', "active", mode="invalidmode")
Exemplo n.º 8
0
class PyDbLiteTestCase(Generic, unittest.TestCase):

    def setUp(self):  # NOQA
        self.first_record_id = 0
        filter_db = Base(test_db_name, save_to_file=False)
        filter_db.create('unique_id', 'name', "active", mode="override")
        self.filter_db = filter_db

    def tearDown(self):  # NOQA
        if os.path.isfile(test_db_name):
            os.remove(test_db_name)
        elif os.path.isdir(test_db_name):
            os.rmdir(test_db_name)

    def setup_db_for_filter(self):
        self.reset_status_values_for_filter()
        for d in self.status:
            res = self.filter_db.insert(**d)
        self.assertEqual(res, 6)

    def test_open(self):
        db = Base('dummy', save_to_file=False)
        db.create('name', 'age', 'size')
        db.insert(name='homer', age=23, size=1.84)

    def test_open_file_with_existing_dir(self):
        os.mkdir(test_db_name)
        db = Base(test_db_name, save_to_file=True)
        # A dir with that name exists
        self.assertRaises(IOError, db.create, 'unique_id', 'name', "active", mode="open")

    def test_open_existing(self):
        db = Base(test_db_name, save_to_file=True)
        db.create('unique_id', 'name', "active", mode="open")
        db.insert("123", "N", True)
        db.commit()

        # Just verify that it works to open an existing db.
        # The column names are ignored, therefore they should
        # equal the old column names
        db = Base(test_db_name, save_to_file=True)
        db.create('unique_id2', 'name2', "active2", mode="open")
        rec = db.insert("123", "N", True)
        db.commit()
        self.assertEqual(db.fields, ['unique_id', 'name', "active"])

        # mode="override" will overwrite existing db
        db = Base(test_db_name, save_to_file=True)
        db.create('unique_id', 'name', "active", mode="override")
        db.commit()
        self.assertEqual(len(self.filter_db), 0)

        # Equals passing mode=None
        self.assertRaises(IOError, db.create, 'unique_id', 'name', "active")
        self.assertRaises(ValueError, db.create, 'unique_id', 'name', "active", mode="invalidmode")

    def test_open_memory(self):
        db = Base(":memory:")
        self.assertFalse(db.save_to_file)

    def test_open_memory_with_existing_filename(self):
        self.filter_db = Base(test_db_name, save_to_file=True)
        self.filter_db.create('unique_id', 'name', "active", mode="override")
        self.filter_db.commit()

        db = Base(test_db_name, save_to_file=False)
        db.open()
        self.assertEqual(db.fields, ['unique_id', 'name', "active"])

        db = Base(test_db_name, save_to_file=False)
        db.create('unique_id2', 'name2', "active2", mode="override")
        self.assertEqual(db.fields, ['unique_id2', 'name2', "active2"])

    def test_insert_list(self):
        status = (8, "testname", 0)

        # Insert 7 entries
        rec = self.filter_db.insert(status)
        self.assertEqual(rec, 0)
        self.assertEqual(self.filter_db[rec]["unique_id"], status)

    def test_sqlite_compat_insert_list(self):
        self.filter_db = Base(test_db_name, save_to_file=False, sqlite_compat=True)
        self.filter_db.create('unique_id', 'name', "active", mode="override")
        status = [(8, "testname", 0)]

        # Insert 1 entries
        rec = self.filter_db.insert(status)
        self.assertEqual(rec, None)
        self.assertEqual(len(self.filter_db), 1)
        self.assertEqual(self.filter_db[0]["unique_id"], 8)
        self.assertEqual(self.filter_db[0]["name"], "testname")
        self.assertEqual(self.filter_db[0]["active"], 0)

    def test_sqlite_compat(self):
        db = Base(test_db_name, save_to_file=False, sqlite_compat=True)
        db.create('unique_id', 'name', "active", mode="open")
        self.reset_status_values_for_filter()

        # Insert 7 entries
        res = db.insert(self.status)
        self.assertEqual(res, None)
        self.assertEqual(len(db), 7)

        status = [(8, "testname", 0)]
        res = db.insert(status)
        self.assertEqual(res, None)
        self.assertEqual(len(db), 8)
Exemplo n.º 9
0
                      mon_dd_yyyy=mon_dd_yyyy,
                      month_dd_yyyy=month_dd_yyyy,
                      dd_mm_yyyy=dd_mm_yyyy,
                      mm_dd_yyyy=mm_dd_yyyy,
                      mm_dd_yy=mm_dd_yy,
                      dd_mm_yy=dd_mm_yy,
                      m_d_yy=m_d_yy,
                      d_m_yy=d_m_yy,
                      weekday_flag=weekday_flag,
                      week_first_day_flag=week_first_day_flag,
                      week_last_day_flag=week_last_day_flag,
                      month_first_day_flag=month_first_day_flag,
                      month_last_day_flag=month_last_day_flag,
                      quarter_first_day_flag=quarter_first_day_flag,
                      quarter_last_day_flag=quarter_last_day_flag,
                      year_first_day_flag=year_first_day_flag,
                      year_last_day_flag=year_last_day_flag,
                      leap_year_flag=leap_year_flag,
                      is_holiday=is_holiday,
                      holiday_name=holiday_name,
                      nth_weekday=nth_weekday)
# save data.
date_table.commit()
# write data as CSV
with open('temporal_data.csv', 'w', newline='') as myfile:
    wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
    # write header
    wr.writerow(list(date_table[0].keys()))
    # write value
    for item in date_table:
        wr.writerow(list(item.values()))
Exemplo n.º 10
0
class DataStoring():

    #Inizialize an instantiated object by opening json file and the database
    def __init__(self):
        self.out_file = open("scnscraper/abap.json", "a")
        self.out_file.close()
        self.db = Base("scnscraper/abap.pydb")
        if self.db.exists():
            self.db.open()
        else:
            self.db.create('url', 'uid', 'type', 'author', 'title', 'date_time', 'tags',
                           'views', 'answers', 'resolve', 'upvotes', 'text')

    #for each thread scraped, insert it into db
    def insert_items_into_db(self, threads):
            for thread in threads:
                item = SapItem() # New Item instance
                item = thread
                try:
                    # Insert into db
                    self.db.insert(url = str(item["url"]), uid = str(item["uid"]), type= str(item["type"] ),
                                   author=str(item["author"]), title = str(item["title"]),
                                   date_time = str(item["date_time"] ),tags = str(item["tags"] ),
                                   views = str(item["views"] ), answers = str(item["answers"] ),
                                   resolve = str(item["resolve"] ), upvotes = str(item["upvotes"] ),
                                   text = str(item["text"]))
                except UnicodeEncodeError:
                    print("Unicode Encode Exception!")
            #save changes on disk
            self.db.commit()

    # for each thread scraped, initialize the string to insert into json file
    def threads_to_str(self, threads):
        out_string = "[ "
        if threads.__len__() == 0:
            return ""
        for thread in threads:
            item = SapItem()
            item = thread
            try:
                out_string += "{ url: '" + str(item["url"] ) + "', " + "uid: '" + str(item["uid"] ) + "', "\
                                "type: '" + str(item["type"] )  + "', "\
                                "author: '"+ str(item["author"])  + "', "  \
                                "title: '"+ str(item["title"])  + "', "\
                                "date_time: '"+ str(item["date_time"] )  + "', " \
                                "tags: '"+ str(item["tags"] )  + "', " \
                                "views: '"+ str(item["views"] )  + "', "\
                                "answers: '"+ str(item["answers"] )  + "', " \
                                "resolve: '"+ str(item["resolve"] )  + "', " \
                                "upvotes: '"+ str(item["upvotes"] )  + "', "\
                                "text: '" + str(item["text"]) + "' }\n"
            except UnicodeEncodeError:
                print("Unicode Encode Exception!")

        out_string += " ]\n\n"
        return out_string


    #for each thread scraped, insert it into json file
    def insert_items_into_file(self, threads):
        try:
            self.out_file = open("scnscraper/abap.json", "a") # open in append mode
            #convert into string and insert into file
            self.out_file.write(self.threads_to_str(threads))
            self.out_file.close()
        except:
            print('Exception in writing file')
            self.out_file.close()


    # read the web page index
    def read_index_from_file(self):
        if os.path.exists('scnscraper/index.txt'):
            with open('scnscraper/index.txt') as f:
                index = int(f.readline())
                f.close()
        else:
            f = open('scnscraper/index.txt', 'w')
            index = 2
            f.write(str(index))
            f.close()
        return index

    # Write the web page index
    def write_index_into_file(self, i):
        f = open('scnscraper/index.txt', 'w')
        f.write(str(i))
        f.close()


    # Convert the content of json file into a new db
    def from_json_to_db(self):
        thread = ''
        db = Base("scnscraper/abap.pydb", save_to_file= True)
        # create new base with field names
        db.create('url', 'uid', 'type', 'author',
                       'title', 'date_time', 'tags', 'views',
                       'answers', 'resolve', 'upvotes', 'text', mode='override')
        i=0
        with open('scnsraper/threads.json', 'r') as file:
            for line in file:
                if(line.endswith(" }\n")):
                    thread += line
                    tokens = re.search(r"url:\s'(.*?)',\suid:\s'(.*?)',\stype:\s'(.*?)',\sauthor:\s'(.*?)',\stitle:\s'(.*?)',\sdate_time:\s'(.*?)',\stags:\s'(.*?)',\sviews:\s'(.*?)',\sanswers:\s'(.*?)',\sresolve:\s'(.*?)',\supvotes:\s'(.*?)', text:\s'((.|\n)*)'\s}", str(thread))
                    if tokens is not None:
                        db.insert(url = tokens.group(1), uid = tokens.group(2), type= tokens.group(3),
                                author=tokens.group(4), title = tokens.group(5), date_time = tokens.group(6),
                                tags = tokens.group(7), views = tokens.group(8), answers = tokens.group(9),
                                resolve = tokens.group(10), upvotes = tokens.group(11), text = tokens.group(12))
                        db.commit()
                    print ('\n--------------------------------------------\n')
                    thread = ''
                if(line.startswith(" ]")):
                    print("new page")
                    thread = ''
                if(line.endswith('\n') and (not line.startswith(" ]\n\n")) and (not line.endswith(" }\n"))):
                    thread += line


    def state_extraction():
        db = Base("scnscraper/abap.pydb")
        if db.exists():
            db.open()
            record = db(type = "Question")
            print("# discussion scraped: " + str(record.__len__()))
            print("Answered: " + str(db(resolve = "Answered.").__len__()))
            print("Answered with solution: "+ str(db(resolve = "solution").__len__()))
            print("Not Answered: " + str(db(resolve = "Not Answered.").__len__()))
            print("Assumed Answered: " + str(db(resolve = "Assumed Answered.").__len__()))

    state_extraction = staticmethod(state_extraction)
Exemplo n.º 11
0
class DataStoring():

    #Inizialize an instantiated object by opening json file and the database
    def __init__(self):
        self.out_file = open("scnscraper/abap.json", "a")
        self.out_file.close()
        self.db = Base("scnscraper/abap.pydb")
        if self.db.exists():
            self.db.open()
        else:
            self.db.create('url', 'uid', 'type', 'author', 'title',
                           'date_time', 'tags', 'views', 'answers', 'resolve',
                           'upvotes', 'text')

    #for each thread scraped, insert it into db
    def insert_items_into_db(self, threads):
        for thread in threads:
            item = SapItem()  # New Item instance
            item = thread
            try:
                # Insert into db
                self.db.insert(url=str(item["url"]),
                               uid=str(item["uid"]),
                               type=str(item["type"]),
                               author=str(item["author"]),
                               title=str(item["title"]),
                               date_time=str(item["date_time"]),
                               tags=str(item["tags"]),
                               views=str(item["views"]),
                               answers=str(item["answers"]),
                               resolve=str(item["resolve"]),
                               upvotes=str(item["upvotes"]),
                               text=str(item["text"]))
            except UnicodeEncodeError:
                print("Unicode Encode Exception!")
        #save changes on disk
        self.db.commit()

    # for each thread scraped, initialize the string to insert into json file
    def threads_to_str(self, threads):
        out_string = "[ "
        if threads.__len__() == 0:
            return ""
        for thread in threads:
            item = SapItem()
            item = thread
            try:
                out_string += "{ url: '" + str(item["url"] ) + "', " + "uid: '" + str(item["uid"] ) + "', "\
                                "type: '" + str(item["type"] )  + "', "\
                                "author: '"+ str(item["author"])  + "', "  \
                                "title: '"+ str(item["title"])  + "', "\
                                "date_time: '"+ str(item["date_time"] )  + "', " \
                                "tags: '"+ str(item["tags"] )  + "', " \
                                "views: '"+ str(item["views"] )  + "', "\
                                "answers: '"+ str(item["answers"] )  + "', " \
                                "resolve: '"+ str(item["resolve"] )  + "', " \
                                "upvotes: '"+ str(item["upvotes"] )  + "', "\
                                "text: '" + str(item["text"]) + "' }\n"
            except UnicodeEncodeError:
                print("Unicode Encode Exception!")

        out_string += " ]\n\n"
        return out_string

    #for each thread scraped, insert it into json file
    def insert_items_into_file(self, threads):
        try:
            self.out_file = open("scnscraper/abap.json",
                                 "a")  # open in append mode
            #convert into string and insert into file
            self.out_file.write(self.threads_to_str(threads))
            self.out_file.close()
        except:
            print('Exception in writing file')
            self.out_file.close()

    # read the web page index
    def read_index_from_file(self):
        if os.path.exists('scnscraper/index.txt'):
            with open('scnscraper/index.txt') as f:
                index = int(f.readline())
                f.close()
        else:
            f = open('scnscraper/index.txt', 'w')
            index = 2
            f.write(str(index))
            f.close()
        return index

    # Write the web page index
    def write_index_into_file(self, i):
        f = open('scnscraper/index.txt', 'w')
        f.write(str(i))
        f.close()

    # Convert the content of json file into a new db
    def from_json_to_db(self):
        thread = ''
        db = Base("scnscraper/abap.pydb", save_to_file=True)
        # create new base with field names
        db.create('url',
                  'uid',
                  'type',
                  'author',
                  'title',
                  'date_time',
                  'tags',
                  'views',
                  'answers',
                  'resolve',
                  'upvotes',
                  'text',
                  mode='override')
        i = 0
        with open('scnsraper/threads.json', 'r') as file:
            for line in file:
                if (line.endswith(" }\n")):
                    thread += line
                    tokens = re.search(
                        r"url:\s'(.*?)',\suid:\s'(.*?)',\stype:\s'(.*?)',\sauthor:\s'(.*?)',\stitle:\s'(.*?)',\sdate_time:\s'(.*?)',\stags:\s'(.*?)',\sviews:\s'(.*?)',\sanswers:\s'(.*?)',\sresolve:\s'(.*?)',\supvotes:\s'(.*?)', text:\s'((.|\n)*)'\s}",
                        str(thread))
                    if tokens is not None:
                        db.insert(url=tokens.group(1),
                                  uid=tokens.group(2),
                                  type=tokens.group(3),
                                  author=tokens.group(4),
                                  title=tokens.group(5),
                                  date_time=tokens.group(6),
                                  tags=tokens.group(7),
                                  views=tokens.group(8),
                                  answers=tokens.group(9),
                                  resolve=tokens.group(10),
                                  upvotes=tokens.group(11),
                                  text=tokens.group(12))
                        db.commit()
                    print('\n--------------------------------------------\n')
                    thread = ''
                if (line.startswith(" ]")):
                    print("new page")
                    thread = ''
                if (line.endswith('\n') and (not line.startswith(" ]\n\n"))
                        and (not line.endswith(" }\n"))):
                    thread += line

    def state_extraction():
        db = Base("scnscraper/abap.pydb")
        if db.exists():
            db.open()
            record = db(type="Question")
            print("# discussion scraped: " + str(record.__len__()))
            print("Answered: " + str(db(resolve="Answered.").__len__()))
            print("Answered with solution: " +
                  str(db(resolve="solution").__len__()))
            print("Not Answered: " +
                  str(db(resolve="Not Answered.").__len__()))
            print("Assumed Answered: " +
                  str(db(resolve="Assumed Answered.").__len__()))

    state_extraction = staticmethod(state_extraction)