Exemplo n.º 1
0
def merge_scores(msg):
        msg = cStringIO.StringIO(msg)

        layers = [None] * 10
        cueset_size = 0

        while True:
                try:
                        iblock_layers = decode_netstring_fd(msg)
                except EOFError:
                        break

                cueset_size += int(iblock_layers['cueset_size'])
                del iblock_layers['cueset_size']
		
                for layer_data in iblock_layers.itervalues():
                        offs, layer_id, layer =\
                                ainodex.deserialize_layer(
                                        layer_data, layers)

	#XXX: Since ixemes are allocated on different layers on each layer,
	# we must make sure that the ixeme counts match on every layer. This
	# could be easily avoided if ixemes were on the same layers on all
	# iblocks.  This should be easy to fix.
	t = time.time()
	ainodex.sync_layers(layers)
	erlay.report("Syncing layers took %dms" %\
	                   ((time.time() - t) * 1000.0))

        print "CUE", type(cueset_size), cueset_size
        for layer in layers:
                if layer:
                        ainodex.normalize_layer(layer, normtable, cueset_size)

        layers = [(str(i), ainodex.serialize_layer(layer))
                        for i, layer in enumerate(layers) if layer]

        return encode_netstring_fd(dict(layers))
Exemplo n.º 2
0
ainodex.open()

if len(sys.argv) < 2:
	print "Usage: simple [key] [cue]"
	sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
	# Compute token scores
	ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]

print zip(doc_keys, doc_scores)
Exemplo n.º 3
0
ainodex.open()

if len(sys.argv) < 2:
    print "Usage: simple [key] [cue]"
    sys.exit(1)

keys = ainodex.token2ixeme(sys.argv[1])
cues = ainodex.token2ixeme(sys.argv[2])
print "KEYS", keys, "CUES", cues

hits_len, hitset = ainodex.hits([keys], 0)
cues_len, cueset = ainodex.hits([cues], 0)

print "%s occurs in %d segments" % (sys.argv[1], hits_len)
print "%s occurs in %d segments" % (sys.argv[2], cues_len)

# Word frequencies
normtable = ainodex.normtable_to_judy(ainodex.normtable())

# Compute how many times tokens co-occur with the cueset
layers = [ainodex.new_layer(i, cueset) for i in range(10)]
for layer in layers:
    # Compute token scores
    ainodex.normalize_layer(layer, normtable, cues_len)

ranked = ainodex.rank(hitset, layers)
doc_keys = array.array("I", ranked)[:20:2]
doc_scores = array.array("f", ranked)[1:20:2]

print zip(doc_keys, doc_scores)