Пример #1
0
class TestHdHash(TestCase):
    nr_input_bits = 32 * 5
    nr_output_bits = 8
    h = hdhash.HdHash(nr_input_bits, nr_output_bits)

    def test_hashing_with_zeros(self):
        #givne
        data = [0] * 5

        #when
        value = self.h.hashing(data)

        #then
        self.assertEqual(0, value)

    def test_hashing_with_ones(self):
        #givne
        data = [0xffffffff] * 5

        #when
        value = self.h.hashing(data)

        #then
        self.assertEqual(0xff, value)

    def test_hashing_with_big_input(self):
        #given
        data = [0x0] * 10

        #when
        value = self.h.hashing(data)

        #then
        self.assertEqual(0, value)
Пример #2
0
class TestLshLookupTable(TestCase):
    hash = hdhash.HdHash(WINDOW_SIZE / BAND * NR_BIT_PER_WORD, 8)
    l = lsh.Lsh(WINDOW_SIZE, BAND, ROW, hash)

    def test_one_signature(self):
        #given
        signature = [0x0]

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual(1, len(lookup))
        self.assertEqual(['s0b0', [-1]], lookup[0])
        return

    def test_n_signature(self):
        #given
        signature = [0xff, 0]

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual(2, len(lookup))
        return

    def test_n_band_signature(self):
        #given
        nr_unique_signature = 9
        signature = list(range(nr_unique_signature)) * BAND

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual(nr_unique_signature * BAND, len(lookup))

    def test_lookup_table_with_None(self):
        #given
        signature = None

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual([], lookup)
        return

    def test_lookup_tabke_with_empty(self):
        #given
        signature = []

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual([], lookup)
Пример #3
0
class TestLshHashing(TestCase):
    hash = hdhash.HdHash(WINDOW_SIZE / BAND * NR_BIT_PER_WORD, 8)
    l = lsh.Lsh(WINDOW_SIZE, BAND, ROW, hash)

    def test_hashing(self):
        #given
        data = [0] * WINDOW_SIZE * 2

        #when
        value = self.l.hashing(data)

        #then
        self.assertEqual(BAND * 2, len(value))
        self.assertEqual('0' * BAND * 2, "".join(str(x) for x in value))
Пример #4
0
class TestParallelSearch(TestCase):
    window_size = 155
    band = 31
    row = 8
    band_size = int(window_size / band)

    sc = SparkContext(appName='SearchMusic')
    ss = SparkSession.builder.appName('SearchMusic').master(
        'local[*]').getOrCreate()

    query_df, music_df = music_query_parser.do(sc, ss, query_file, music_file)
    h = hdhash.HdHash(band_size * NR_BIT_PER_WORD, row)
    search = parallel_search.ParallelSearch(ss, query_df, music_df,
                                            window_size, band, row, h,
                                            hdhash.hamming_distance)

    def test_by_qid_mid(self):
        self.search.by_qid_and_mid(21, 42, 0.35)
Пример #5
0
class TestParallelLsh(TestCase):

    window_size = 155
    band = 31
    row = 8
    band_size = int(window_size/band)

    sc = SparkContext(appName='SearchMusic')
    ss = SparkSession.builder.appName('SearchMusic').master('local[*]').getOrCreate()

    query_df, music_df = music_query_parser.do(sc, ss, query_file, music_file)
    h = hdhash.HdHash(band_size * NR_BIT_PER_WORD, row)
    search = parallel_search.ParallelSearch(ss, query_df, music_df, window_size, band, row, h, hdhash.hamming_distance)

    def test_lookup_table(self):
        mdf = self.music_df
        mdf = self.p.lookup_table(mdf, 'mcode', 'mid')
        mdf.persist()
        mdf.show()
Пример #6
0
    sc, ss = spark_init()

    window_size = 155
    band = 31
    row = 8
    band_size = int(window_size/band)
    t1 = time.time()


    query_df, music_df = music_query_parser.do(sc, ss, qfile, mfile)


    #debug
    print ('befor search')

    lsh_hash = hdhash.HdHash(band_size * NR_BIT_PER_WORD, row)
    search = parallel_search.ParallelSearch(ss, query_df, music_df,
                                            window_size, band, row,
                                            lsh_hash, hdhash.hamming_distance)

    t2 = time.time()

    # qid = 7
    # searched = search.by_qid(qid, THRESHOLD)
    # if not searched:
    #     print ('Search failed: Query[%d]' % qid)
    # else:
    #     for s in searched:
    #         print ('Search success: Query[%d] hit on %s, songid(%d), code_idx(%d)~code_idx(%d)' %(s['qid'], s['file'], s['mid'], s['col']*5, s['col']*5 + window_size))

    nr_query = 30