Exemplo n.º 1
0
    def add(self, sa=None, href=None, name=None, ext=None, public=False,
            tags=None, opts=None):
        "Create or update. alias --update?"
        lctr = [ r['lctr'] for r in self.assert_locator(sa=sa, href=href, opts=opts) ]
        if not lctr:
            yield dict( err="XXX Missed ref" )
        else:
            lctr = lctr.pop()
            assert lctr
            bm = Bookmark.fetch((Bookmark.ref==lctr,), _sa=sa, exists=False)
            if bm:
                # XXX: start local to bean dict
# XXXL name must be unique, must catch problems
                #if name != bm.name:
                #    bm.name = name
                #if lctr != bm.ref:
                #    bm.ref = lctr
                if ext != bm.extended:
                    bm.extended = ext
                if public != bm.public:
                    bm.public = public
                if tags != bm.tags:
                    bm.tags = tags
                bm.last_update = datetime.now()
            else:
                bm = Bookmark.fetch((Bookmark.name==name,), _sa=sa, exists=False)
                if bm:
                    log.err("Duplicate name %s", bm.name)
                    #bm.name = "%s (copy)" % name
                    bm = None
                else:
                    bm = Bookmark(
                            name=name,
                            ref=lctr,
                            extended=ext,
                            public=public,
                            tags=tags,
                        )
                    bm.init_defaults()
            if bm:
                assert bm.ref
                yield dict( bm=bm )
                sa.add(bm)
            if opts.rsr_auto_commit:
                sa.commit()
Exemplo n.º 2
0
def cmd_dlcs_import(opts, settings):
    """
    TODO: built into generic import/export (ie. complete set)  so heuristics can
        update all stats each import.. or find some way to fragment dataset.
    """
    importFile = opts.args.FILE
    data = dlcs_parse_xml(open(importFile).read())
    sa = Locator.get_session('default', opts.flags.dbref)
    #sa = model.get_session(opts.flags.dbref, metadata=SqlBase.metadata)
    tags_stat = {}
    domains_stat = {}
    # first pass: validate, track stats and create Locator records where missing
    for post in data['posts']:
        href = post['href']
        dt = datetime.strptime(post['time'], ISO_8601_DATETIME)
# validate URL
        url = urlparse(href)
        domain = url[1]
        if not domain:
            log.std("Ignored domainless (non-net?) URIRef: %s", href)
            continue
        assert re.match('[a-z0-9]+(\.[a-z0-9]+)*', domain), domain
# get/init Locator
        lctr = Locator.fetch((Locator.ref == href,), exists=False)
        if lctr:
            if lctr.date_added != dt:
                lctr.date_added = dt
                sa.add(lctr)
        else:
            lctr = Locator(
                    global_id=href,
                    ref=href,
                    date_added=datetime.strptime(post['time'], ISO_8601_DATETIME)
                )
            lctr.init_defaults()
            log.std("new: %s", lctr)
            sa.add(lctr)
# get/init Bookmark
        bm = Bookmark.fetch((Bookmark.ref_id == lctr.lctr_id,), exists=False)
        if bm:
            if bm.date_added != dt:
                bm.date_added = dt
                sa.add(bm)
            if bm.ref_id != lctr.lctr_id:
                bm.ref = lctr
                sa.add(bm)
        else:
            bm = Bookmark.fetch((Bookmark.name == post['description'],), exists=False)
            if bm:
                log.std("Name already exists: %r" % post['description'])
                continue
            bm = Bookmark(
                    ref=lctr,
                    name=post['description'],
                    extended=post['extended'],
                    tags=post['tag'].replace(' ', ', '),
                    date_added=datetime.strptime(post['time'], ISO_8601_DATETIME)
                )
            bm.init_defaults()
            log.std("new: %s", bm)
            sa.add(bm)
# track domain frequency
        if domain in domains_stat:
            domains_stat[domain] += 1
        else:
            domains_stat[domain] = 1
# track tag frequency
        for tag in post['tag'].split(' '):
            if tag in tags_stat:
                tags_stat[tag] += 1
            else:
                tags_stat[tag] = 1
    log.std("Checked %i locator references", len(data['posts']))
    sa.commit()
# Prepare domain stats
    avgDomainFreq = sum(domains_stat.values())/(len(domains_stat)*1.0)
    hiDomainFreq = max(domains_stat.values())
    log.std("Found domain usage (max/avg): %i/%i", hiDomainFreq, avgDomainFreq)
    domains = 0
    domainOffset = int(opts.flags.domain_offset)
    if domainOffset == 0:
        domainOffset = hiFreq
    elif domainOffset == -1:
        domainOffset = round(hiDomainFreq * 0.2)
    log.std("Setting domain-offset: %i", domainOffset)
# get/init Domains
    for domain in domains_stat:
        freq = domains_stat[domain]
        if freq >= domainOffset:
            domains += 1
            domain_record = Domain.fetch((Domain.name == domain,), exists=False)
            if not domain_record:
                domain_record = Domain(name=domain)
                domain_record.init_defaults()
                sa.add(domain_record)
    sa.commit()
    log.std("Checked %i domains", len(domains_stat))
    log.std("Tracking %i domains", domains)
# Prepare tag stats
    avgFreq = sum(tags_stat.values())/(len(tags_stat)*1.0)
    hiFreq = max(tags_stat.values())
    log.std("Found tag usage (max/avg): %i/%i", hiFreq, avgFreq)
    tagOffset = int(opts.flags.tag_offset)
    if tagOffset == 0:
        tagOffset = hiFreq
    elif tagOffset == -1:
        tagOffset = round(hiFreq * 0.1)
    log.std("Setting tag-offset: %i", tagOffset)
# get/init Tags
    tags = 0
    for tag in tags_stat:
        freq = tags_stat[tag]
        if not re.match('[A-Za-z0-9-]+', tag):
            log.std("Non-std tag %s", tag)
        if freq >= tagOffset:
            tags += 1
            t = Node.fetch((Node.name == tag,), exists=False)
            if not t:
                t = Tag(name=tag)
                t.init_defaults()
                log.std("new: %s", t)
                sa.add(t)
            # store frequencies
            # TODO tags_freq
    log.std("Checked %i tags", len(tags_stat))
    log.std("Tracking %i tags", tags)
    sa.commit()