示例#1
0
    def db_finish(self):
        now = time.time()
        self.db.update('scan', where="id = "+str(self.id),
                       triples=self.c, 
                       time_complete=now,
                       status=1)
        
        for (term, use) in (self.term_uses.keys()):
            (ns, local) = splitter.split(term)
            nsid = irimap.to_id(self.db, ns)

            self.db.insert('term_use',
                           local=local,
                           namespace_id = nsid,
                           scan_id = self.id,
                           type = use,
                           count = self.term_uses[(term,use)]
                           )

        for t in self.primary_trackers:
            self.db.insert('trackers',
                           scan_id = self.id,
                           tracker_id = irimap.to_id(self.db, t),
                           is_primary = True
                           )
        for t in self.backup_trackers:
            self.db.insert('trackers',
                           scan_id = self.id,
                           tracker_id = irimap.to_id(self.db, t),
                           is_primary = False
                           )


        obsolete_old_scans(self.db, self.data_source_iri, self.id)
示例#2
0
def list_(term, timecode=(-1), db=None):   # , limit, offset):
    """

    see http://dev.mysql.com/doc/refman/5.0/en/select.html

    for us, timecodes ARE scanids.   A new scanid == a new change.

    are we going to need to SORT when we do limit/offset?   pagerank?

    """
    if db is None:
        db = dbconn.Connection()

    (ns, local) = splitter.split(term)
    ns_id = irimap.to_id(db, ns)

    if timecode == -1:
        timecode = latest_timecode(db)
        print "list using latest timecode:", timecode
    else:
        if timecode < min_timecode(db):
            raise GarbageTimecode()

    for r in db.query('select text, type from term_use, scan, iri where scan_id <= $timecode and obsoleted_by > $timecode and namespace_id=$ns_id and scan.id=scan_id and status=1 and local=$local and iri.id=source_id', vars=locals()):
        yield unicode(r.type)+" "+unicode(r.text)
示例#3
0
def obsolete_old_scans(db, source, scan_id):

    source_id = irimap.to_id(db, source)
    max =  max_timecode
    db.update('scan', 
              where='id < $scan_id and source_id = $source_id and obsoleted_by = $max',
              obsoleted_by=scan_id,
              vars=locals())
示例#4
0
def db_show(db, source, ns):
    good = get_latest_scan(db, source)
    scan_id = good.id
    ns_id = irimap.to_id(db, ns)
    print `good`
    for r in db.select('term_use', 
                       where='scan_id=$scan_id and namespace_id=$ns_id', 
                       vars=locals()):
        print r.count, r.local
示例#5
0
 def db_start(self):
     debug('scan', 'database connection started')
     self.source_id = irimap.to_id(self.db, self.data_source_iri)
     self.id = self.db.insert('scan', 
                              source_id=self.source_id, 
                              time_begun=self.start,
                              triples=0,
                              last_modified=self.last_modified,
                              status=0,  # or just use time_completed?
                              obsoleted_by=max_timecode
                              )
     debug('scan', 'database record created', self.id)
示例#6
0
def report(source, ns):
    """
    Return a report [in std format?] of the given source, those
    entries in the given namespace
    """
    db = dbconn.Connection()
    scan = ensure_scanned(db, source)
    scan_id = scan.id
    ns_id = irimap.to_id(db, ns)
    results = db.select('term_use', 
                        where="scan_id=$scan_id and namespace_id=$ns_id", 
                        vars=locals())
    out = u""
    for r in results:
        out +=  "%d %s %s\n" % (r.count, r.type, r.local)
    del db                   
    return out
示例#7
0
def get_latest_scan(db, source, all_scan_ids=None):
    '''Return a record of the latest completed scan of this source.

    If an array all_scan_ids is provided, all the scan ids will be
    appended to it.
    '''
    source_id = irimap.to_id(db, source)
    max_good_id = -1
    if all_scan_ids is None:
        all_scan_ids = []
    for r in db.select('scan', where='source_id=$source_id', vars=locals()):
        if r.status == 1 and r.id > max_good_id:
            max_good_id = r.id
            rr = copy.deepcopy(r)
        all_scan_ids.append(r.id)
    if max_good_id > -1:
        return rr
    else:
        raise NoGoodScan()