예제 #1
0
    def test_replication_close_open(self):
        class FS(xodb.Schema):
            language = 'en'
            x = xodb.Integer

        class F(object):
            x = 9

        def _writer():
            db = xodb.temp()
            db.map(F, FS)
            return db

        writer = _writer()
        f = F()
        writer.add(f)
        path = writer.db_path
        writer.flush()

        reader = xodb.open(path, writable=False, replicated=True)
        assert len(reader) == 1

        g = F()
        writer.add(g)
        assert len(reader) == 1
        writer.flush()
        assert len(reader) == 2
예제 #2
0
    def test_replication_close_open(self):
        class FS(xodb.Schema):
            language = 'en'
            x = xodb.Integer

        class F(object):
            x = 9

        def _writer():
            db = xodb.temp()
            db.map(F, FS)
            return db

        writer = _writer()
        f = F()
        writer.add(f)
        path = writer.db_path
        writer.flush()
        
        reader = xodb.open(path, writable=False, replicated=True)
        assert len(reader) == 1

        g = F()
        writer.add(g)
        assert len(reader) == 1
        writer.flush()
        assert len(reader) == 2
예제 #3
0
파일: pump.py 프로젝트: michelp/wikiparser
def wikit():
    num = 0
    if len(sys.argv) > 1:
        db = xodb.open('test_et4', writable=False)
        last = db.backend.get_doccount()
    with bz2.BZ2File('enwiki-latest-pages-articles.xml.bz2') as f:
        current = None
        batch = []
        for event, element in etree.iterparse(f, events=('start', 'end')):
            if event == 'start':
                if element.tag.endswith('page'):
                    current = Page()
                    redirect = False
                if element.tag.endswith('title'):
                    current.title = element.text
                if element.tag.endswith('text'):
                    if element.text:
                        current.text = element.text
                if element.tag.endswith('redirect'):
                    redirect = True
            if event == 'end':
                if element.tag.endswith('page'):
                    # num += 1
                    # if num < last:
                    #     print 'Skipping  ', num, " ", last, " ", current.title
                    #     element.clear()
                    #     continue
                    if current.text and current.title and not redirect:
                        if not current.title.startswith(
                            ('Template:', 'Category:', 'File:')):
                            print "Pumping ", current.title
                            try:
                                batch.append(current)
                                if len(batch) > BATCH_SIZE:
                                    source.send(dumps(batch))
                                    batch = []
                            except Exception:
                                log.exception('wtf')
                    element.clear()
예제 #4
0
파일: pump.py 프로젝트: michelp/wikiparser
def wikit():
    num = 0
    if len(sys.argv) > 1:
        db = xodb.open('test_et4', writable=False)
        last = db.backend.get_doccount()
    with bz2.BZ2File('enwiki-latest-pages-articles.xml.bz2') as f:
        current = None
        batch = []
        for event, element in etree.iterparse(f, events=('start', 'end')):
            if event == 'start':
                if element.tag.endswith('page'):
                    current = Page()
                    redirect = False
                if element.tag.endswith('title'):
                    current.title = element.text
                if element.tag.endswith('text'):
                    if element.text:
                        current.text = element.text
                if element.tag.endswith('redirect'):
                    redirect = True
            if event == 'end':
                if element.tag.endswith('page'):
                    # num += 1
                    # if num < last:
                    #     print 'Skipping  ', num, " ", last, " ", current.title
                    #     element.clear()
                    #     continue
                    if current.text and current.title and not redirect:
                        if not current.title.startswith(('Template:', 'Category:', 'File:')):
                            print "Pumping ", current.title
                            try:
                                batch.append(current)
                                if len(batch) > BATCH_SIZE:
                                    source.send(dumps(batch))
                                    batch = []
                            except Exception:
                                log.exception('wtf')
                    element.clear()
예제 #5
0
def run(section, worker_cls):
    import xodb
    from xodb.tools.signals import register_signals
    register_signals()
    name = os.environ.get('SUPERVISOR_PROCESS_NAME', 'tester')
    from ConfigParser import ConfigParser

    if len(sys.argv) < 1:
        print "usage: %s config_file" % sys.argv[0]
        sys.exit(-1)

    config = ConfigParser()
    config.read(sys.argv[1])

    worker_url = config.get(section, 'worker_url')
    log_file = config.get(section, 'log_file')
    db_path = config.get(section, 'db_path')

    logging.basicConfig(filename=log_file % name, level=logging.DEBUG)
    db = xodb.open(db_path, writable=False)
    w = worker_cls(name, worker_url, db)
    logging.debug('Running worker on %s' % worker_url)
    w.run()
예제 #6
0
def run(section, worker_cls):
    import xodb
    from xodb.tools.signals import register_signals
    register_signals()
    name = os.environ.get('SUPERVISOR_PROCESS_NAME', 'tester')
    from ConfigParser import ConfigParser

    if len(sys.argv) < 1:
        print "usage: %s config_file" % sys.argv[0]
        sys.exit(-1)

    config = ConfigParser()
    config.read(sys.argv[1])

    worker_url = config.get(section, 'worker_url')
    log_file = config.get(section, 'log_file')
    db_path = config.get(section, 'db_path')

    logging.basicConfig(filename=log_file % name, level=logging.DEBUG)
    db = xodb.open(db_path, writable=False)
    w = worker_cls(name, worker_url, db)
    logging.debug('Running worker on %s' % worker_url)
    w.run()
예제 #7
0
import atexit
import logging
import zmq
import xodb
import schemas
from cPickle import loads

db = xodb.open('test_et4', writable=True)
db.map(schemas.Page, schemas.PageSchema)

atexit.register(db.flush)

logging.basicConfig(level=logging.DEBUG)

ctx = zmq.Context()
sink = ctx.socket(zmq.PULL)
sink.setsockopt(zmq.RCVHWM, 1000)
sink.bind('tcp://127.0.0.1:9123')

log = logging.getLogger(__name__)

while True:
    batch = loads(sink.recv())
    for o in batch:
        print o.title
        try:
            db.add(o)
        except Exception:
            log.exception('error')
            
예제 #8
0
import bz2
import xodb
import logging
from lxml import etree

from schemas import Page, PageSchema

log = logging.getLogger(__name__)
db = xodb.open('/home/michel/xap/ms_test2')
db.map(Page, PageSchema)

redirects = {}

with bz2.BZ2File('enwiki-latest-pages-articles.xml.bz2') as f:
    current = None
    for event, element in etree.iterparse(f, events=('start', 'end')):
        if event == 'start':
            if element.tag.endswith('page'):
                current = Page()
                redirect = False
            if element.tag.endswith('title'):
                current.title = element.text
            if element.tag.endswith('text'):
                if element.text:
                    current.text = element.text
            if element.tag.endswith('redirect'):
                redirect = True
        if event == 'end':
            if element.tag.endswith('page'):
                if current.text and current.title and not redirect:
                    try:
예제 #9
0
import bz2
import xodb
import logging
from lxml import etree

from schemas import Page, PageSchema


log = logging.getLogger(__name__)
db = xodb.open('/home/michel/xap/ms_test2')
db.map(Page, PageSchema)


redirects = {}


with bz2.BZ2File('enwiki-latest-pages-articles.xml.bz2') as f:
    current = None
    for event, element in etree.iterparse(f, events=('start', 'end')):
        if event == 'start':
            if element.tag.endswith('page'):
                current = Page()
                redirect = False
            if element.tag.endswith('title'):
                current.title = element.text
            if element.tag.endswith('text'):
                if element.text:
                    current.text = element.text
            if element.tag.endswith('redirect'):
                redirect = True
        if event == 'end':
예제 #10
0
from pprint import pprint
from collections import OrderedDict
import xodb
from schemas import Page, PageSchema
import json


db = xodb.open('test_et4', writable=False)
db.map(Page, PageSchema)


def e(q, l, count=10, limit=1025):
    print json.dumps(db.expand(q, [(i, count, limit, 1, 1) for j, i in enumerate(l)], language='en'), indent=4)


def q(terms, limit=20):
    # is it an entity?
    terms = " ".join(terms.strip().lower().split())
    entity_term = 'entity:"%s"' % terms
    if db.estimate(entity_term) > 20:
        for r in db.query(entity_term, language='en', limit=limit):
            print r.title