def ft_process(text): # splitting on lines lines = text.split('\n') # assigning max lowest to all possible hash values global max_0 for line in lines: # skipping blank entries if len(line) == 0: continue # skipping lines that don't begin with 'Q' if line[0] =="Q": quote = line[2:] for i in range(10): # calculating # of tailing 0's for murmurhash hash_value = pyhash.super_fast_hash(seed = i)(quote) tail_0 = len(bin(hash_value)[2:]) - (bin(hash_value)[2:].rfind('1') + 1) # assigning # of tailing 0's as 0 if no 1's found in bin string if tail_0 == len(bin(hash_value)[2:]): tail_0 = 0 if tail_0 > max_0[i]: max_0[i] = tail_0 for i in range(10,20): # calculating # of tailing 0's for murmurhash hash_value = pyhash.murmur3_32(seed = i)(quote) tail_0 = len(bin(hash_value)[2:]) - (bin(hash_value)[2:].rfind('1') + 1) # assigning # of tailing 0's as 0 if no 1's found in bin string if tail_0 == len(bin(hash_value)[2:]): tail_0 = 0 if tail_0 > max_0[i]: max_0[i] = tail_0 for i in range(20,30): # calculating # of tailing 0's for murmurhash hash_value = pyhash.xx_32(seed = i)(quote) tail_0 = len(bin(hash_value)[2:]) - (bin(hash_value)[2:].rfind('1') + 1) # assigning # of tailing 0's as 0 if no 1's found in bin string if tail_0 == len(bin(hash_value)[2:]): tail_0 = 0 if tail_0 > max_0[i]: max_0[i] = tail_0 return
class GenericObject(ABC): _hasher = pyhash.super_fast_hash() def _get_name(self): return self.__class__.__name__ def _get_id(self): return self._hasher(self._get_name()) def __str__(self): return "GenericObject"
def __init__(self, size=65536, k=7, name='bf', load=False): if load: self.load(name) else: self.size = size if k > 18 or k <= 0: print('k should be > 0 & <= 18') return None self.k = k self.name = name self.bitarray = bitarray.bitarray('0' * self.size) self.tables = [[set() for j in range(self.size)] for i in range(self.k)] self.hashes = [ pyhash.fnv1_64(), pyhash.murmur2_x64_64a(), pyhash.murmur3_x64_128(), pyhash.lookup3(), pyhash.super_fast_hash(), pyhash.city_128(), pyhash.spooky_128(), pyhash.farm_128(), pyhash.metro_128(), pyhash.mum_64(), pyhash.t1_64(), pyhash.xx_64(), lambda str: int(hashlib.md5(str.encode('utf-8')).hexdigest(), 16), lambda str: int(hashlib.sha1(str.encode('utf-8')).hexdigest(), 16), lambda str: int( hashlib.sha224(str.encode('utf-8')).hexdigest(), 16), lambda str: int( hashlib.sha256(str.encode('utf-8')).hexdigest(), 16), lambda str: int( hashlib.sha384(str.encode('utf-8')).hexdigest(), 16), lambda str: int( hashlib.sha512(str.encode('utf-8')).hexdigest(), 16) ]
def __init__(self, host, port, servers): self.host = host self.port = port self.servers = servers self.logger = logging.getLogger("FE-Bridge") self.socket = ServerSocket(host, port) self.be_conn = [] self.be_conn_locks = [] self.hasher = pyhash.super_fast_hash() self.logger.info("Connecting with BE servers") connections = [] for i in range(self.servers): conn, addr = self.socket.accept_client() self.logger.info("Connection accepted %r" % (addr, )) connections.append((addr, conn)) connections.sort() for client in connections: cs = ServersClientBridgePDUSocket(client[1]) self.be_conn.append(cs) self.be_conn_locks.append( (Lock(), Lock() )) # First lock to coordinate reading, second for writing
def super_fast_hash(self): return pyhash.super_fast_hash()(self.data)
import pyhash hasher = pyhash.super_fast_hash() # constants controlling amount of subindices NUM_OF_INVERTED_INDEX_SHARDS = 10000 # paths to data DATA_BASE_PATH = './data/' DATA_WIKI_PATH = DATA_BASE_PATH + 'wiki-pages/' DATA_TRAINING_PATH = DATA_BASE_PATH + 'train.jsonl' DATA_DEV_LABELED_PATH = DATA_BASE_PATH + 'shared_task_dev.jsonl' DATA_TEST_UNLABELED_PATH = DATA_BASE_PATH + 'shared_task_test.jsonl' #DATA_PRETRAINED_EMBEDDINGS_PATH = DATA_BASE_PATH + 'GoogleNews-vectors-negative300.bin' DATA_PRETRAINED_EMBEDDINGS_PATH = DATA_BASE_PATH + 'glove.840B.300d.txt' # paths to generated auxiliary data and output GENERATED_BASE_PATH = './generated/' GENERATED_FIGURES_BASE_PATH = GENERATED_BASE_PATH + 'figures/' GENERATED_COUNTS_PATH = GENERATED_BASE_PATH + 'accumulated_word_count.jsonl' GENERATED_IDF_PATH = GENERATED_BASE_PATH + 'words_with_idf.jsonl' GENERATED_WIKI_PAGE_MAPPINGS_PATH = GENERATED_BASE_PATH + 'wiki_page_batch_mappings.p' GENERATED_DOCUMENT_NORMS_MAPPING = GENERATED_BASE_PATH + 'docs_to_norms_mapping.jsonl' GENERATED_DOCUMENT_LENGTH_MAPPING = GENERATED_BASE_PATH + 'docs_to_lengths_mapping.jsonl' GENERATED_INVERTED_INDEX_DIRECTORY = GENERATED_BASE_PATH + 'inverted_index/' GENERATED_LR_PREPROCESSED_TRAINING_DATA = GENERATED_BASE_PATH + 'LR_preprocessed_training_data.p' GENERATED_LR_PREPROCESSED_DEV_DATA = GENERATED_BASE_PATH + 'LR_preprocessed_dev_data.p' GENERATED_LOGISTIC_REGRESSION_MODEL = GENERATED_BASE_PATH + 'LR_model.p' GENERATED_LOGISTIC_REGRESSION_LOSS_HISTORY = GENERATED_BASE_PATH + 'LR_loss_history.p' GENERATED_NEURAL_NETWORK_MODEL = GENERATED_BASE_PATH + 'NN_model_{}.p' GENERATED_NEURAL_NETWORK_LOSS_HISTORY = GENERATED_BASE_PATH + 'NN_loss_history_{}.p'
import pyhash #https://code.google.com/p/pyfasthash/ h_fnv1_32 = pyhash.fnv1_32() def fnv1_32(req): return h_fnv1_32(str(req)) h_lookup3 = pyhash.lookup3_big() def lookup3(req): return h_lookup3(str(req)) h_super_fast_hash = pyhash.super_fast_hash() def super_fast_hash(req): return h_super_fast_hash(str(req)) h_murmur2_x64_64a = pyhash.murmur2_x64_64a() def murmur2_x64_64a(req): return h_murmur2_x64_64a(str(req)) h_murmur3_32 = pyhash.murmur3_32() def murmur3_32(req): return h_murmur3_32(str(req)) h_fnv1a_64 = pyhash.fnv1a_64() def fnv1a_64(req):
import pyhash import sys import matplotlib.mlab as mlab import matplotlib.pyplot as plt bloomFilterSize = 10 bit_vector = [] #hashFunctions fnv = pyhash.fnv1a_32() mur = pyhash.murmur3_32() lookup = pyhash.lookup3() super1 = pyhash.super_fast_hash() city = pyhash.city_64() spooky = pyhash.spooky_32() farm = pyhash.farm_32() metro = pyhash.metro_64() mum = pyhash.mum_64() xx = pyhash.xx_32() #10 hash functions hashfuncs = [fnv, mur, lookup, super1, city, spooky, farm, metro, mum, xx] #hash def insertBloom(kmer, hashFuncCount): global bloomFilterSize global bit_vector index = 0 for hf in hashfuncs: if (index <= hashFuncCount): if (bit_vector[hf(kmer) % bloomFilterSize] == 0):
# TODO: add javascript escape code here so it's available in the template engine from datetime import datetime from routes import url_for from mako import filters from six.moves.urllib.parse import urlparse, ParseResult import re from routes import request_config from pybald import context import logging log = logging.getLogger(__name__) try: import pyhash hashfunc = pyhash.super_fast_hash() except ImportError: log.warning("-" * 80 + ''' Warning ------- Using python built-in hash() for asset URL generation. This is system implementation specific and may result in different hosts mapping static assets to different static hosts. That may cause inefficient use of browser caches. Optionally you can install pyhash to install additional fast, non-cryptographic, hashes that are not system dependent. pip install pyhash ''' + "-" * 80) hashfunc = hash try:
def __init__(self, locks_pool_size): self.hasher = pyhash.super_fast_hash() self.file_locks_len = locks_pool_size self.file_locks = [] for i in range(self.file_locks_len): self.file_locks.append(ReadWriteLock())
# encoding: utf-8 '''HTML page helper functions as well as simple asset tag handling.''' import os import project # from urlparse import urlparse, ParseResult # global request_config... how can we eliminate? # from routes import request_config from pybald.core.helpers import HTMLLiteral, AssetUrl import logging console = logging.getLogger(__name__) try: import pyhash hashfunc = pyhash.super_fast_hash() except ImportError: console.warn("!"*10 + ''' Using python built-in hash() for asset URL generation. This is system implementation specific and may result in different hosts mapping static assets to different static hosts. That may cause inefficient use of browser caches. Optionally you can install pyhash to install additional fast, non-cryptographic, hashes that are not system dependent. pip install pyhash ''') hashfunc = hash asset_tag_cache = {}
def __hash__(self): hasher = pyhash.super_fast_hash() return hasher(self)
def ft_superfasthash(text): global m hash_value = pyhash.super_fast_hash()(text) return (hash_value % m)