def check_a_repo_by_random_algorithm(repo, shuffle: bool = False):
    # retrieve the code snippet
    code = repo.code

    # step 0. remove comments
    text = comment_remover(code.snipped_code)

    # step 1. Tokenize the code snippet (A)
    tokens = word_tokenize(text)

    # step 2. extract the previous keywords (B)
    keywords = extract_from_description(code.description, with_space=True)

    # TODO step 3. subtract the second set from the first set (C = A - B)
    if len(keywords) == 0:
        return False

    # step 4. choose random keywords from C (to the A number)
    final_tokens = extract_tokens(tokens,
                                  len(keywords),
                                  min_len=3,
                                  shuffling=shuffle)

    print(final_tokens)

    # step 5. check repo with new keywords
    # step 6. store result in the database
    check_github_repo_with_keywords(
        repo,
        checking_type=CHECKING_TYPE.RANDOM_ALGORITHM,
        keywords=final_tokens)

    return True
Пример #2
0
def _check_repos():
    repos = GHResult_KeywordMeter.objects.filter(is_checked=False)
    for repo in repos:

        # retrieve the code snippet
        code = repo.code

        # step 0. remove comments
        text = comment_remover(code.snipped_code)

        # step 1. Tokenize the code snippet (A)
        tokens = word_tokenize(text)

        # step 2. extract the previous keywords (B)
        keywords = extract_from_description(code.description, with_space=True)

        # TODO step 3. subtract the second set from the first set (C = A - B)
        if len(keywords) == 0:
            continue
        # step 4. choose random keywords from C (to the A number)
        final_tokens = extract_tokens(tokens, len(keywords))

        print(final_tokens)

        # step 5. check repo with new keywords
        # step 6. store result in the database
        _checkGHUrl(repo, final_tokens)

    return "Done"
Пример #3
0
def _checkGHUrl(gResult, keywords: list = None):
    try:
        # load content
        req = requests.get(gResult.ghUrl)
        # remove comments from content
        content = comment_remover(req.text.replace(" ", ""))
        #print(content)

        # load keywords
        if keywords is None:
            code = Code.objects.filter(id=gResult.code_id).first()
            keywords = extract_from_description(code.description)

        # check keywords for file
        if contain_keywords(content, keywords):
            print("YESS", gResult.id, gResult.code_id, gResult.answer_id)
            # save it to the DB
            gResult.is_vulnerable = True
            gResult.is_checked    = True
            gResult.save()
        else:
            print("NOOO", gResult.code_id, gResult.answer_id)
            gResult.is_vulnerable = False
            gResult.is_checked = True
            gResult.save()
        return True
    except Exception as e:
        # open('/home/ali/error_connection_report','a').write("Exception: {}\n"
        #                                                     "Answer Link: {}\n"
        #                                                     "CodeID: {}\n\n".format(e,answer_id,code_id))
        gResult.is_error = True
        gResult.report = "{}".format(e)
        gResult.save()
        return False
Пример #4
0
def _extract_from_code(request, id, extract_type):
    vulnerable_code = Code.objects.filter(is_vulnerable=True).filter(
        id=id).first()

    if vulnerable_code is None:
        return HttpResponse("", status=404)

    result = extract_from_description(vulnerable_code.description,
                                      extract_type)
    if result is None:
        return HttpResponse(
            "'{}' type not exists. (valid types: explain, keywords, mitigation, references)"
            .format(extract_type),
            status=406)

    return HttpResponse(result)
def check_github_repo_with_keywords(gResult,
                                    checking_type: int,
                                    keywords: list = None):
    try:
        # load content
        req = requests.get(gResult.ghUrl)
        # remove comments from content
        content = comment_remover(req.text.replace(" ", ""))

        # load keywords
        if keywords is None:
            code = Code.objects.filter(id=gResult.code_id).first()
            keywords = extract_from_description(code.description)

        # check keywords for file
        if contain_keywords(content, keywords):
            print("YESS", gResult.id, gResult.code_id, gResult.answer_id)
            # save it to the DB
            if checking_type == CHECKING_TYPE.OUR_ALGORITHM:
                gResult.is_vulnerable_our_algorithm = True
                gResult.status = KeywordMeterStatus.checked_by_our_algorithm
            elif checking_type == CHECKING_TYPE.RANDOM_ALGORITHM:
                gResult.is_vulnerable_random_algorithm = True
                gResult.status = KeywordMeterStatus.checked_by_random_algorithm

            gResult.save()
        else:
            print("NOOO", gResult.code_id, gResult.answer_id)
            if checking_type == CHECKING_TYPE.OUR_ALGORITHM:
                gResult.is_vulnerable_our_algorithm = False
                gResult.status = KeywordMeterStatus.checked_by_our_algorithm
            elif checking_type == CHECKING_TYPE.RANDOM_ALGORITHM:
                gResult.is_vulnerable_random_algorithm = False
                gResult.status = KeywordMeterStatus.checked_by_random_algorithm
            gResult.save()
        return True
    except Exception as e:
        gResult.is_error = True
        gResult.report = "{}".format(e)
        gResult.save()
        return False
Пример #6
0
 def extract_section(self, code: Code, section):
     return '\n'.join(
         extract_from_description(code.description,
                                  section=section,
                                  with_space=True))