示例#1
0
def optimize(encoder):
    m, ids = index.load()

    n_of_buckets = max(len(m) / 100 + 1, len(m))
    res = [bytearray() for i in xrange(n_of_buckets)]

    with open('back_index.bin', 'wb') as fout:

        ptr = 0
        for key in sorted(m.keys()):
            values = map(lambda x: struct.unpack('I', x)[0],
                         [x for x in chunks(m[key], 4)])
            l = encoder.compress(values, fout)
            #res[key] = (ptr, l)
            res[key % n_of_buckets].extend(struct.pack('i', key))
            res[key % n_of_buckets].extend(struct.pack('i', ptr))
            res[key % n_of_buckets].extend(struct.pack('i', l))
            ptr += l

    # check compression/decompression function
    # with open('back_index.bin', 'rb') as f:
    # 	for key in m:
    # 		rr = simple9.decompress(f, res[key][0], res[key][1])
    # 		if not (set(sorted(m[key])) == rr):
    # 			print('ERROR')
    # 			print(sorted(m[key]))
    # 			print(rr)
    # 			print(res[key])
    # 			raise Exception()

    dct = open('dict.bin', 'wb')
    dct_head = open('dict_head.bin', 'wb')

    it = 0

    dct_head.write(struct.pack('I', n_of_buckets))

    for i in xrange(n_of_buckets):
        dct_head.write(struct.pack('I', it))
        dct.write(res[i])
        it += len(res[i])

    dct_head.write(struct.pack('I', it))

    dct_head.close()
    dct.close()
示例#2
0
def sync_index(from_file=None):
    if from_file==None:
        from_file = download_index()
    
    index_file = open(from_file, 'r', encoding="Latin-1")
    index_data = index.load(index_file)
    
    changed_or_new_sources, deleted_sources = compare_index(index_data)

    for s in changed_or_new_sources:
        sync_source(s)

    # index is saved in sync_sources;
    # no need to explicitly save here
    
    # delete removed sources
    # TODO backup also?
    if len(deleted_sources) > 0:
        bulkOp = db.sources.initialize_unordered_bulk_op()
        for s in deleted_sources:
            bulkOp.find({"index.code": deleted_source["code"]}).remove()
            #bulkOp.find({"_id": deleted_source["_id"]}).remove()
        bulk_result = bulkOp.execute()
        print(bulk_result)
示例#3
0
import index
from random import randrange as rnd

index.load()

for n in range(6):
	stars = sorted(index.pictures(), key=lambda p:p.rating, reverse=True)[:5+n*2]
	query = []

	print 'Checkpoints:'
	for i in range(3+n):
		p = stars[rnd(len(stars))]
		query.append(p)
		stars.remove(p)
		print '{}. {}'.format(i+1, p.name)

	path = index.chain(query=query)

	print '\nPath:'
	print ' > '.join([p.name for p in path])

	reference = [p for p in query if p in path]
	index.export_html(reference+[None]+path, 'path{}.html'.format(n))
示例#4
0
	  # handlers implemented by browser
		f = handlers.get(event.keycode)
		if f:
			f(browser, event.keycode)
		if browser.redraw:
			browser.update(event.keycode)
	# print time(), 'leave keyhandler'

   #if len(browser.img.relates.keys()) > 0:
		#browser.img = browser.img.relates.keys()[0]
   #print browser.img.location



# Ok Go
index.load(recover=True)

# create tkinter window
root = tk.Tk()
root.title('tumblr img browser')
# make the root window the size of the image
root.geometry("%dx%d+%d+%d" % (1024, 740, 0, 0))
root.bind("<Key>", key)
# instantiate browser class
browser = Browser(root)

# screen size:
w = root.winfo_screenwidth()
h = root.winfo_screenheight()
print 'screen size {}x{}'.format(w,h)
# start the event loop
示例#5
0
import index
import source
import os
import urllib

indexfile = open("samples/index/Michael Dyck's Contradance Index_ Sources.html", encoding="Latin-1")
index_baseurl = "http://www.ibiblio.org/contradance/index/"

indexdata = index.load(indexfile)

set_to_download = set()

for row in indexdata:
    code_url = urllib.request.urljoin(index_baseurl, row['code_link'], allow_fragments=False)
    first_entered = row['date first entered']
    last_revised = row['date last revised']
    set_to_download.add(code_url)

for url in set_to_download:
    filename = os.path.basename(urllib.request.urlparse(url).path)
    urllib.request.urlretrieve(url, "samples/source/"+filename)
    
示例#6
0
# -*- coding: utf-8 -*- 
import index as ix

ix.load()

imgs=sorted(ix.picture.pictures(), key=lambda p:p.rating)
blogs=sorted(ix.tumblr.blogs(), key=lambda t:t.avg_img_rating())

print 'distributing blog scores...'
scores=ix.tumblr.dist_scores()
print 'sorting blogs by score'
hi=sorted(scores.items(), key=lambda t:t[1])

stars = {}
for p in imgs[-40:]:
	t = p.origin
	if t:
		stars[t] = stars.get(t,0)+p.rating

print ' '.join(['name','stars','imgs','local/blog',
	'links','in/out']),
print 'avg* - SCORE'
print '_'*75
for t,s in sorted(stars.items(),key=lambda x:x[1]):
	print u'{} - {}* {}/{}imgs ⇶{}/{}⇶'.format(
		t.name,s,len(t.proper_imgs),len(t.images),
		len(t.linked),len(t.links)),
	print '{:.2f}* - score {:.2f}'.format(t.avg_img_rating(),
		scores.get(t))

print 'ok'
示例#7
0
 def __init__(self):
   (self.index,self.graph) = index.load( conf.index_filename , conf.graph_filename )
   self.revindex = {}
   for filename, i in self.index.items():
     self.revindex[i] = filename
   print "Done loading"
示例#8
0
#!/usr/bin/python
import index
import config as conf


(index,g) = index.load( conf.index_filename , conf.graph_filename )
print "Done loading:" , len(index) , len(g.export())