#clean these
    dataClass.dataset['corpus'] = dataClass.dataset.corpus.apply(
        dataClass.cleanSentences)
    # create potential phrases from stems.
    #print(dataClass.dataset['corpus'][0])

    precision = 0
    recall = 0
    fscore = 0
    allIndex = []

    #for index in range(0, 3):
    for index in range(len(list(dataClass.dataset['processDocs']))):
        text = dataClass.dataset.corpus[index]

        PR = pageRankClass(text)

        PR.posCorp = PR.extractPosTags(text)

        if applyStemming:
            PR.posCorp = dataClass.stem_Doc(PR.posCorp)
        # rename keyTerms to match old code implementation
        #dataClass.dataset .rename(columns = {'keyTerms':'targetTerms'}, inplace = True)
        # not just yet ^^^^^

        df1 = df[df.doc_id_list == index]
        termsDict = dict(zip(list(df1.term_list), list(df1.term_idf_list)))
        # df contains 4 grams  --> remove only single instances

        # singletons = {}
        # ngrams = {}
            #dataClass.dataset.processDocs = dataClass.dataset.processDocs.apply(dataClass.stem_Doc)
            dataClass.dataset['keyTerms'] = dataClass.dataset[
                'keyTerms'].apply(dataClass.stem_array)

        allIndex = []
        precision = 0
        recall = 0
        fscore = 0
        for index in range(len(list(dataClass.dataset['processDocs']))):
            #for index in range(0, 1):

            print("at stage {}".format(index))

            testerDoc = dataClass.dataset['processDocs'][index]

            PR = pageRankClass(testerDoc)

            # as far as here it is good
            PR.constructGraph(testerDoc, stem=applyStemming)
            print("number of nodes : " + str(len(PR.graph.nodes())))

            PR.createPhrasese()

            print(len(PR.textRankDict.items()))
            PR.PhraseCandidates = dict(
                sorted(PR.PhraseCandidates.items(),
                       key=lambda x: x[1],
                       reverse=True))

            print(len(PR.PhraseCandidates))