示例#1
0
    def __init__(self, **kw):
        super().__init__(**kw)
        a = self.select_table
        isNumeric = cfn('ISNUMERIC', ['val'])
        left = cfn('LEFT', ['val', 'num'])

        c, d = pk.Tables('UnitSMR', 'EquipType')

        days = fn.DateDiff(PseudoColumn('day'), a.DeliveryDate, fn.CurTimestamp())
        remaining = Case().when(days <= 365, 365 - days).else_(0).as_('Remaining')
        remaining2 = Case().when(days <= 365 * 2, 365 * 2 - days).else_(0)

        ge_remaining = Case().when(isNumeric(left(a.Model, 1)) == 1, remaining2).else_(None).as_('GE_Remaining')

        b = c.select(c.Unit, fn.Max(c.SMR).as_('CurrentSMR'), fn.Max(c.DateSMR).as_('DateSMR')).groupby(c.Unit).as_('b')

        cols = [a.MineSite, a.Customer, d.EquipClass, a.Model, a.Serial, a.Unit,
                b.CurrentSMR, b.DateSMR, a.DeliveryDate, remaining, ge_remaining]

        q = Query.from_(a) \
            .left_join(b).on_field('Unit') \
            .left_join(d).on_field('Model') \
            .orderby(a.MineSite, a.Model, a.Unit)

        f.set_self(vars())

        # NOTE lots of duplication with this pattern btwn avail/ac inspect/units/comp co
        # can't remember how everything works and don't want to dig into it
        self.stylemap_cols |= {'Model': dict(
            cols=['Model'],
            func=st.pipe_highlight_alternating,
            da=dict(
                subset=['Model'],
                color='maroon',
                theme=self.theme))}
示例#2
0
def loadAndCheckFile():
    if request.method == 'POST':
        file = request.files['file']
        if file and (allowed_file(
                file.filename)  #or allowed_archive(file.filename)
                     ):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            fileId = addOneFile(app.config['UPLOAD_FOLDER'], filename)[1]
            #TODO: Удалить файл после всех операций?
            return trueAlgo(fileId)
        elif file and (allowed_archive(file.filename)):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            with temporary_directory() as tmp:  #игнорируем ошибки.
                tmp_dir_name = tmp
                path = os.path.join(os.getcwd(), tmp_dir_name)
                with zipfile.ZipFile(
                        os.path.join(app.config['UPLOAD_FOLDER'],
                                     filename)) as zf:
                    zf.extractall(path)
                    info = addManyFiles(path, filename)
                    results = []
                    allMetaphones = getAllMetaphones()
                    for val in info:
                        print(val)
                        results.append(trueAlgo(val[1], True, allMetaphones))
                    jsonResult = json.dumps(results)
                    q = Query.into(db.tables["SearchResult"]).columns(
                        "result", "createdAt").insert(jsonResult,
                                                      functions.CurTimestamp())
                    executeQ(q)
                    return jsonify(results)
        else:
            return jsonify({"error": "failed"})
示例#3
0
    def test_current_timestamp_with_alias(self):
        query = Query.select(fn.CurTimestamp('ts'))

        self.assertEqual("SELECT CURRENT_TIMESTAMP \"ts\"", str(query))
示例#4
0
    def test_current_timestamp(self):
        query = Query.select(fn.CurTimestamp())

        self.assertEqual("SELECT CURRENT_TIMESTAMP", str(query))
示例#5
0
    def test_current_timestamp_with_alias(self):
        query = Query.select(fn.CurTimestamp("ts"))

        self.assertEqual('SELECT CURRENT_TIMESTAMP "ts"', str(query))
def addOneFile(dir, fileName, entryName="", id=0):

    cwd = os.getcwd()

    os.chdir(dir)

    if len(entryName) == 0:
        entryName = fileName

    code = ""
    splittedCode = []
    #print("AddFile: ", os.path.join(dir, fileName))
    with open(os.path.join(dir, fileName), encoding='utf-8',
              errors="replace") as f:
        code = f.read()

    #code = code.replace("\n", "")
    code = prettierCode(code, fileName)

    code = code.replace("\t", "")

    codeInBytes = str.encode(code, encoding='utf-8', errors="replace")
    hash_object = hashlib.sha256(codeInBytes)

    q = Query.from_(db.tables["File"]).select(
        "id", "path",
        "entryId").where(db.tables["File"].hash == hash_object.hexdigest())
    checkDuplicate = executeQ(q, True)
    for row in checkDuplicate:
        if row[1] == os.path.join(dir, fileName):
            print("Дубликат!!!", checkDuplicate[0][0], row[1],
                  os.path.join(dir, fileName))
            return (checkDuplicate[0][2], checkDuplicate[0][0], True)
        index1 = row[1].find(os.path.join("Local", "Temp"))
        index2 = os.path.join(dir,
                              fileName).find(os.path.join("Local", "Temp"))
        if index1 > -1 and index2 > -1:
            path1 = row[1].split(os.sep)
            path2 = os.path.join(dir, fileName).split(os.sep)
            if len(path1) == len(path2):
                startCheck = False
                tempPassed = False
                checkStatus = True
                for i in range(len(path1)):
                    if path1[i] == "Temp" and not tempPassed:
                        tempPassed = True
                    elif startCheck:
                        if path1[i] != path2[i]:
                            checkStatus = False
                            break

                    elif tempPassed:
                        startCheck = True

                if checkStatus:
                    print("Дубликат!", checkDuplicate[0][0], row[1],
                          os.path.join(dir, fileName))
                    return (checkDuplicate[0][2], checkDuplicate[0][0], True)
    #if checkDuplicate:
    #    print("Дубликат!", checkDuplicate[0][0])
    #    return (0, checkDuplicate[0][0])

    if id == 0:
        q = Query.into(db.tables["Entry"]).columns('name', 'createdAt').insert(
            entryName, functions.CurTimestamp())
        executeQ(q)
        q = Query.from_(db.tables["Entry"]).select('id').orderby(
            'id', order=Order.desc).limit(1)
        id = getId(executeQ(q, True))

    fileId = 0
    q = Query.into(db.tables["File"]).columns("entryId", "path",
                                              "hash").insert(
                                                  id,
                                                  os.path.join(dir, fileName),
                                                  hash_object.hexdigest())
    executeQ(q)
    q = Query.from_(db.tables["File"]).select('id').orderby(
        'id', order=Order.desc).limit(1)
    fileId = getId(executeQ(q, True))

    code = code.split("\n")
    #shift = 0
    for string in code:
        stringInBytes = str.encode(string, encoding='utf-8', errors="replace")
        strings = shorterString(stringInBytes)
        for val in strings:
            splittedCode.append(
                bytes.decode(val, encoding='utf-8', errors="replace"))
    i = 0
    for val in splittedCode:
        q = Query.into(db.tables["CodeFragment"]).columns(
            "fileId", "order", "text",
            "metaphone").insert(fileId, i, splittedCode[i],
                                db.func["metaphone"](splittedCode[i], 255))
        executeQ(q)
        i += 1

    #for i in range(0, (len(codeInBytes)//255)+1):
    #    splittedCode.append(bytes.decode(codeInBytes[0+(i*255):min(255*(i+1), len(codeInBytes))], encoding='utf-8'))
    #    q = Query.into(db.tables["CodeFragment"]).columns("fileId", "order", "text", "metaphone").insert(fileId, i, splittedCode[i], db.func["metaphone"](splittedCode[i], 255))
    #    executeQ(q)

    os.chdir(cwd)

    return (id, fileId, False)
示例#7
0
def trueAlgo(fileId, needList=False, allMetaphones=None):
    allTime = time.time()
    fileId = int(fileId)
    metaphones = {}
    texts = {}
    files = {}
    extensions = {}
    if not allMetaphones:
        metaphones, texts, files, extensions = getAllMetaphones()
    else:
        metaphones = allMetaphones[0]
        texts = allMetaphones[1]
        files = allMetaphones[2]
        extensions = allMetaphones[3]

    currentExtension = extensions[fileId]
    fileMetaphones = metaphones[fileId]

    distances = []
    stringsFile = []
    stringsRelevant = []
    stringsFrom = []
    stringsFromNum = []
    result = []
    combo = []
    counterF = 0
    lastFile = -1
    comboCounter = 1
    start_time = time.time()

    allKeys = list(metaphones.keys())
    dropKeys = []
    for k in allKeys:
        if k == fileId:
            dropKeys.append(k)
            continue
        if currentExtension != extensions[k]:
            #print("skip! ", currentExtension, " != ", extensions[k], files[k])
            dropKeys.append(k)
            continue
    for k in dropKeys:
        allKeys.remove(k)

    for val in fileMetaphones:

        stringsFile.append(texts[fileId][counterF])
        if val == "":
            counterF += 1
            stringsRelevant.append("_empty_")
            result.append("skipped")
            distances.append(255)
            stringsFromNum.append(-1)
            stringsFrom.append("")
            combo.append(0)
            continue

        minD = 255
        stringsRelevant.append("_empty_")
        result.append("unique")

        curFile = -1
        for k in allKeys:

            counter = 0

            for val2 in metaphones[k]:
                if val2 == "":
                    counter += 1
                    continue
                if abs(len(val2) - len(val)) > 3:
                    counter += 1
                    continue
                if len(val) > 1 and len(val2) > 1 and ((val2[0] != val[0]) or
                                                       (val2[1] != val[1])):
                    counter += 1
                    continue
                maxD = min(len(val), len(val2), 7) // 2 + 2

                q = Query.select(db.func["levenshtein_less_equal"](
                    str(val), val2, min(len(val), len(val2), 7) // 2 + 1))

                rows = executeQ(q, True)
                for row in rows:
                    if row[0] != maxD:
                        if minD > row[0]:
                            curFile = k
                            stringsRelevant[len(stringsRelevant) -
                                            1] = texts[k][counter]
                            if row[0] == 0 or row[0] == 1:
                                result[len(result) - 1] = "plagiarism"
                            else:
                                result[len(result) - 1] = "similar"
                        minD = min(minD, row[0])
                counter += 1
                if minD == 0 or minD == 1:
                    break
                #!!!!!!!!!!!!!!!!!!
                #if (minD == 2 or minD == 3) and lastFile == k:
                #    break
            if minD == 0 or minD == 1:
                break
            #!!!!!!!!!!!!!!!!!!
            #if (minD == 2 or minD == 3) and lastFile == k:
            #    break
        distances.append(minD)
        stringsFromNum.append(curFile)
        stringsFrom.append(files[curFile] if curFile != -1 else "")
        if (minD == 0 or minD == 1):
            allKeys.remove(curFile)
            allKeys.insert(0, curFile)
        if lastFile == -1:
            combo.append(1)
        else:
            if lastFile == curFile:
                comboCounter += 1
            else:
                comboCounter = 1
            combo.append(comboCounter)

        lastFile = curFile

        counterF += 1

    coincidences = 0
    empty = 0
    currentCombo = 0
    comboToAdd = 0
    for i in reversed(range(len(stringsFile))):
        #print(
        #    stringsFile[i], " ||| ", stringsRelevant[i], " ||| ", distances[i],
        #    result[i]
        #)
        if result[i] == "plagiarism":
            coincidences += 1
        elif result[i] == "similar":
            coincidences += 0.1
        elif result[i] == "skipped":
            empty += 1

        if currentCombo > 0:
            if combo[i] == 0:
                continue
            combo[i] += comboToAdd
            currentCombo -= 1
            comboToAdd += 1
        else:
            if combo[i] > 1:
                currentCombo = combo[i] - 1
                comboToAdd = 1

    divisor = (len(stringsFile) - empty)
    if divisor == 0:
        divisor = 1
    print("RESULT: ", round(coincidences / divisor * 100, 1))
    fullResult = [
        stringsFile, stringsRelevant, stringsFrom, combo, distances, result,
        round(coincidences / divisor * 100, 1), files[fileId]
    ]
    print("--- %s seconds ---" % (time.time() - start_time))
    if needList:
        return fullResult
    else:
        jsonResult = json.dumps(fullResult)
        q = Query.into(db.tables["SearchResult"]).columns(
            "result", "createdAt").insert(jsonResult, functions.CurTimestamp())
        executeQ(q)
        return jsonify(fullResult)