Exemplo n.º 1
0
    def find_all_dups():
        Trace()
        dups = []
        table = FolderComp.FolderTable
        query = Expand(r'select distinct foldername from [table] Where original=0')
        folderNames = sql.execute(query, Flatten=True)
        for folderName in folderNames:
            dbg_print(len(dups), folderName)
            query = Expand(r"select * from [table] Where foldername=?")
            rows = sql.execute(query, Data=[folderName])
            if len(rows) > 1:
                original_row = rows[0]
                oid = original_row[0]
                for dup_row in rows[1:]:
                    result = Compare(original_row[2], dup_row[2])
                    if result:
                        dups.append([oid, dup_row[0]])
        print('')

        if len(dups):
            count = sql.update(table, dups, ['original=?'], "WHERE idx=?", Verbose=True)
            Log('Updated [count] rows')

        return len(dups)
Exemplo n.º 2
0
def SQL_UnitTest(GenerateTestData=False):
    Trace()

    data = [
        [ 55, 54, 53, 52, 51],
        [ 55, 44, 43, 42, 41],
        [ 35, 34, 33, 32, 31],
        [ 35, 34, 33, 22, 21],
        [ 15, 14, 13, 12, 11],
        [  5, 4, 3, 2, 1],
    ]

    columns = [
        [ 'col0', 'int' ],
        [ 'col1', 'int' ],
        [ 'col2', 'int' ],
        [ 'col3', 'int' ],
        [ 'col4', 'int' ],
    ]

    primaryKeys = ['col0', 'col1']

    sorted = sql.sort_data(data, [0, 1, 2, 3], columns)
    PrettyPrintList(sorted)

    for idx in range(0, len(data[0]), 1):
        UnitTest.Step('Check column [idx]')
        col = [row[idx] for row in data]

        sorted = [row[idx] for row in data]
        sorted.sort()
        sorted.reverse()

        UnitTest.Verify(col == sorted, 'Column [idx] is sorted correctly')
        Log('col:    %s' % col)
        Log('sorted: %s' % sorted)

    table = 'SQL_UnitTest'

    if GenerateTestData:
        jsonData = dictn()
        jsonData.expectedResultsJson = data
        UnitTest.SaveExpectedResults(jsonData)

    UnitTest.Step('Write & Read from table')
    sql.write_to_table(table, data, columns, Verbose=True)
    read = sql.select(table)
    UnitTest.Verify(data == read)

    UnitTest.Step('Update Tests')
    row = [
        [ 55, 54, 1, 1, 1]
    ]
    count = sql.update(table, row, WhereClause=r'Where col0=55 and col1=54', Verbose=True)
    Log('count=[count]')
    UnitTest.Verify(count == 1)

    updated = sql.select(table, WhereClause=r'Where col0=55 and col1=54', Verbose=True)
    UnitTest.VerifyMatch(updated, row)

    UnitTest.Step('Write & Read unique data')

    unique_data = []
    unique_data.append(list(range(100, 200, 20)))
    unique_data.append(list(range(200, 300, 20)))
    unique_data.append(list(range(300, 400, 20)))
    unique_data.append(list(range(400, 500, 20)))
    unique_data.append(list(range(500, 600, 20)))
    PrettyPrint(unique_data)

    sql.write_to_table(table, unique_data, columns, PrimaryKey=primaryKeys, UseExistingTable=False, IdentityIndex=False, Verbose=True)
    read = sql.select(table)
    PrettyPrint(read)
    UnitTest.Verify(unique_data == read)

    UnitTest.Step('Write & Read indexed unique data')
    sql.write_to_table(table, unique_data, columns, PrimaryKey=None, UseExistingTable=False, IdentityIndex=True, Verbose=True)
    read = sql.select(table)
    PrettyPrint(read)

    [row.insert(0, idx) for idx, row in enumerate(unique_data, 1)]
    UnitTest.Verify(unique_data == read)
Exemplo n.º 3
0
    def FindDups(self, Types='[AllMediaTypes]'):
        Trace(self.Folder, Types)
        if not sql.tables(self.Table):
            return []
        self.Verbose = True

        def FindDupsInSet(rowSet):
            dups = []
            foundIdx = []
            foundPathNames = []

            for idx, left in enumerate(rowSet):
                idxLeft = left[0]
                pathLeft = left[2]
                for right in rowSet[idx + 1 : ]:
                    idxRight = right[0]
                    if idxRight in foundIdx:
                        continue
                    pathRight = right[2]
                    if filecmp.cmp(pathLeft, pathRight, False):
                        dups.append([idxLeft, idxRight])
                        foundIdx.append(idxRight)
                        foundPathNames.append(pathRight)
            return dups, foundPathNames

        rows = self.select_rows('', Types, SortColumns=['size', 'modified_date ASC'])

        results = []
        dups = []
        found = 0
        Log('Total rows: %d' % (len(rows)))
        print('  Idx  Dups         Size')

        rowSet = []
        allSets = [rowSet]
        prev_size = 0
        for idx, row in enumerate(rows):
            print('\r%5d %5d' % (idx, found), end=' ')
            filepath = row[2]
            if not os.path.exists(filepath):
                Log('Error missing file: [filepath]')
                continue

            size = row[4]
            if idx == 0:
                prev_size = size
            
            if size == prev_size:
                rowSet.append(row)
            else:
                if len(rowSet):
                    rowSet = []
                    allSets.append(rowSet)
                prev_size = size
        print('')

        for rowSet in allSets:
            if not len(rowSet):
                continue
            dupsRowSet, pathsRowSet = FindDupsInSet(rowSet)
            dups.extend(dupsRowSet)
            results.extend(pathsRowSet)
            found += len(dupsRowSet)

        Log(r'Found %d duplicates' % (len(dups)))
        updated = sql.update(self.Table, dups, ['original=?'], "WHERE idx=?", Verbose=self.Verbose)
        Log(r'Updated %d duplicate rows' % (updated))

        return results