Exemplo n.º 1
0
class CreateProposerDocument():
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

    def createDoc(self):
        print('Loading original...')
        self.gspreadWrapper.loadAssessmentsFile()
        proposerDf = self.gspreadWrapper.prepareDataFromExport()
        spreadsheet = self.gspreadWrapper.createDoc(
            self.opt.proposerDocumentName)

        # Define all the columns needed in the file
        headings = [
            self.opt.proposalKeyCol, self.opt.ideaURLCol, self.opt.questionCol,
            self.opt.assessorCol, self.opt.assessmentCol, self.opt.ratingCol,
            self.opt.assessmentsIdCol, self.opt.tripletIdCol,
            self.opt.proposalIdCol, self.opt.blankCol, self.opt.topQualityCol,
            self.opt.profanityCol, self.opt.nonConstructiveCol,
            self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol,
            self.opt.notRelatedCol, self.opt.otherCol,
            self.opt.otherRationaleCol
        ]

        print('Assign blanks...')
        # Assign 'x' for blank assessments
        proposerDf[self.opt.blankCol] = proposerDf[
            self.opt.assessmentCol].apply(lambda r: 'x'
                                          if (r.strip() == "") else '')

        print('Format columns...')
        widths = [('A:D', 150), ('E', 400), ('F', 60), ('G:R', 30), ('S', 400)]

        formats = [
            ('F:R', self.utils.counterFormat),
            ('A1:S1', self.utils.headingFormat),
            ('F1:R1', self.utils.verticalHeadingFormat),
            ('K2:K', self.utils.greenFormat),
            ('L2:L', self.utils.redFormat),
            ('M2:R', self.utils.yellowFormat),
            ('A2:E', self.utils.textFormat),
        ]

        self.gspreadWrapper.createSheetFromDf(spreadsheet, "Assessments",
                                              proposerDf, headings, widths,
                                              formats)
        print('Document for proposers created')
        print('Link: {}'.format(spreadsheet.url))
class CreateSimilarity():
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

        print('Load vca data...')
        self.gspreadWrapper.getVCAMasterData()

        self.assessors = {}
        self.similarities = []

        self.similarityMinScore = 0.5

        self.initSimilarity()

    def createDoc(self):

        self.findSimilarity()

        for k in self.assessors:
            self.assessors[k]['similarity_other_assessors'] = ','.join(
                list(
                    dict.fromkeys(
                        self.assessors[k]['similarity_other_assessors'])))

        self.assessors = list(self.assessors.values())

        dfSimilarities = pd.DataFrame(self.similarities)
        dfAssessors = pd.DataFrame(self.assessors)

        dfSimilarities.to_csv('cache/sim5.csv')
        dfAssessors.to_csv('cache/sim_ass5.csv')

        spreadsheet = self.gspreadWrapper.createDoc('Similarity Analysis')

        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Assessments',
            dfSimilarities, [
                'id A', 'id B', 'Assessor A', 'Assessor B', 'Note A', 'Note B',
                'Similarity Score'
            ],
            columnWidths=[('A:B', 50), ('C:D', 150), ('E:F', 300), ('G', 60)],
            formats=[('G', self.utils.counterFormat),
                     ('A1:G1', self.utils.headingFormat)])

        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'CAs',
            dfAssessors, [
                'Assessor', 'similarity_other_assessors',
                'similarity_count_others', 'similarity_count_self'
            ],
            columnWidths=[('A:B', 150), ('C:D', 60)],
            formats=[
                ('C:D', self.utils.counterFormat),
                ('A1:D1', self.utils.headingFormat),
                ('C1:D1', self.utils.verticalHeadingFormat),
            ])
        worksheet = spreadsheet.get_worksheet(0)
        spreadsheet.del_worksheet(worksheet)

        print('Link: {}'.format(spreadsheet.url))

    def initSimilarity(self):
        self.vectorize = lambda Text: TfidfVectorizer().fit_transform(
            Text).toarray()
        self.similarity = lambda doc1, doc2: cosine_similarity([doc1, doc2])

    def findSimilarity(self):
        data = self.gspreadWrapper.dfVca
        notes = list(data[self.opt.assessmentCol])
        ids = list(data[self.opt.assessmentsIdCol])
        vectors = self.vectorize(notes)
        s_vectors = list(zip(ids, vectors))
        plagiarism_results = set()
        progress = 0
        for assessor_a, text_vector_a in s_vectors:
            print("{} of {}".format(progress, len(s_vectors)))
            new_vectors = s_vectors.copy()
            current_index = new_vectors.index((assessor_a, text_vector_a))
            del new_vectors[current_index]
            for assessor_b, text_vector_b in new_vectors:
                sim_score = self.similarity(text_vector_a, text_vector_b)[0][1]
                assessor_pair = sorted((assessor_a, assessor_b))
                score = (assessor_pair[0], assessor_pair[1], sim_score)
                plagiarism_results.add(score)
            progress = progress + 1
        for res in plagiarism_results:
            if (res[2] > self.similarityMinScore):
                ass_0 = data.loc[data[self.opt.assessmentsIdCol] == res[0]][
                    self.opt.assessorCol].item()
                ass_1 = data.loc[data[self.opt.assessmentsIdCol] == res[1]][
                    self.opt.assessorCol].item()
                assessment_0 = data.loc[data[self.opt.assessmentsIdCol] ==
                                        res[0]][self.opt.assessmentCol].item()
                assessment_1 = data.loc[data[self.opt.assessmentsIdCol] ==
                                        res[1]][self.opt.assessmentCol].item()
                if (ass_0 not in self.assessors):
                    self.assessors[ass_0] = {
                        'Assessor': ass_0,
                        'similarity_other_assessors': [],
                        'similarity_count_others': 0,
                        'similarity_count_self': 0
                    }
                if (ass_0 != ass_1):
                    self.assessors[ass_0]['similarity_other_assessors'].append(
                        ass_1)
                    self.assessors[ass_0][
                        'similarity_count_others'] = self.assessors[ass_0][
                            'similarity_count_others'] + 1
                else:
                    self.assessors[ass_0][
                        'similarity_count_self'] = self.assessors[ass_0][
                            'similarity_count_self'] + 1
                self.similarities.append({
                    'id A': res[0],
                    'id B': res[1],
                    'Assessor A': ass_0,
                    'Assessor B': ass_1,
                    'Note A': assessment_0,
                    'Note B': assessment_1,
                    'Similarity Score': res[2]
                })
Exemplo n.º 3
0
class createVCAAggregate():
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

        self.infringementsColumns = [
            self.opt.profanityCol, self.opt.nonConstructiveCol,
            self.opt.scoreCol, self.opt.copyCol, self.opt.incompleteReadingCol,
            self.opt.notRelatedCol, self.opt.otherCol
        ]
        self.positiveColumns = [self.opt.fairCol, self.opt.topQualityCol]
        '''
        self.neutralColumns = [
            self.opt.abstainCol, self.opt.lenientCol, self.opt.strictCol
        ]
        '''
        self.neutralColumns = [self.opt.abstainCol]
        self.indicatorColumns = [self.opt.lenientCol, self.opt.strictCol]
        self.yellowColumns = [
            self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol,
            self.opt.incompleteReadingCol, self.opt.notRelatedCol,
            self.opt.otherCol
        ]
        self.redColumns = [self.opt.profanityCol]
        self.feedbackColumns = self.infringementsColumns + self.positiveColumns
        self.allColumns = self.infringementsColumns + self.positiveColumns + self.neutralColumns + self.indicatorColumns

    def prepareBaseData(self):
        self.gspreadWrapper.getVCAMasterData()
        self.dfVca = self.gspreadWrapper.dfVca.set_index('id')
        # Set all counters to 0
        self.dfVca[self.opt.noVCAReviewsCol] = 0
        self.dfVca[self.opt.yellowCardCol] = 0
        self.dfVca[self.opt.redCardCol] = 0
        for col in self.allColumns:
            self.dfVca[col] = 0

        self.gspreadWrapper.getVCAMasterAssessors()
        self.gspreadWrapper.dfVcaAssessors[self.opt.yellowCardCol] = 0
        self.gspreadWrapper.dfVcaAssessors[self.opt.redCardCol] = 0

        self.gspreadWrapper.getProposersData()

    def loadVCAsFiles(self):
        self.prepareBaseData()
        self.vcaData = []
        self.vcaDocs = []
        for vcaFile in self.opt.VCAsFiles:
            print(vcaFile)
            vcaDocument = self.gspreadWrapper.gc.open_by_key(vcaFile)
            try:
                vcaSheet = vcaDocument.worksheet("Assessments")
            except:
                vcaSheet = vcaDocument.get_worksheet(0)
            data = pd.DataFrame(vcaSheet.get_all_records())
            data.set_index('id', inplace=True)
            self.vcaData.append(data)
            self.vcaDocs.append(vcaDocument)
            #sleep(35)

    def createDoc(self):
        self.loadVCAsFiles()
        # Loop over master ids as reference
        for id, row in self.dfVca.iterrows():
            # Loop over all vca files
            for vcaDf in self.vcaData:
                if (id in vcaDf.index):
                    locAss = vcaDf.loc[id]
                    integrity = self.checkIntegrity(id, row, locAss)
                    if (integrity is False):
                        print('Error')
                        break
                        break

                    good = self.goodFeedback(locAss)
                    bad = self.badFeedback(locAss)
                    neutral = self.neutralFeedback(locAss)
                    if (self.isVCAfeedbackValid(locAss, good, bad, neutral)):
                        if (good or bad):
                            self.dfVca.loc[
                                id, self.opt.noVCAReviewsCol] = self.dfVca.loc[
                                    id, self.opt.noVCAReviewsCol] + 1
                        for col in self.allColumns:
                            colVal = self.checkIfMarked(locAss, col)
                            if (colVal > 0):
                                self.dfVca.loc[
                                    id, col] = self.dfVca.loc[id, col] + colVal

            (yellow, red) = self.calculateCards(self.dfVca.loc[id])
            self.dfVca.loc[id, self.opt.yellowCardCol] = yellow
            self.dfVca.loc[id, self.opt.redCardCol] = red

        # Extract red card assessors and update List
        redCards = self.dfVca[self.dfVca[self.opt.redCardCol] > 0]
        self.redCardsAssessors = list(redCards[self.opt.assessorCol].unique())

        # Select valid assessments (no red card assessors, no yellow card assessments, no blank assessments)
        validAssessments = self.dfVca[(
            (self.dfVca[self.opt.yellowCardCol] == 0)
            & ~self.dfVca[self.opt.assessorCol].isin(self.redCardsAssessors))]
        validAssessments[self.opt.assessmentsIdCol] = validAssessments.index

        # Add Proposal title (getting it from Proposer doc)
        validAssessments[self.opt.proposalKeyCol] = validAssessments.apply(
            lambda x: str(self.gspreadWrapper.dfProposers.loc[
                self.gspreadWrapper.dfProposers[self.opt.assessmentsIdCol] ==
                x[self.opt.assessmentsIdCol], self.opt.proposalKeyCol].iloc[0]
                          ),
            axis=1)

        # create group for final scores
        finalProposals = validAssessments.groupby(
            self.opt.proposalKeyCol,
            as_index=False)[self.opt.ratingCol].mean()

        # generate Assessor Recap
        assessors = self.assessorRecap()
        # generate nonValidAssessment recap
        nonValidAssessments = self.nonValidAssessments(validAssessments)

        # Generate Doc
        validAssessments.to_csv('cache/valid.csv')
        nonValidAssessments.to_csv('cache/non-valid.csv')
        assessors.to_csv('cache/assessors.csv')
        spreadsheet = self.gspreadWrapper.createDoc(
            self.opt.VCAAggregateFileName)

        # Print valid assessments
        assessmentsHeadings = [
            self.opt.assessmentsIdCol, self.opt.tripletIdCol,
            self.opt.proposalKeyCol, self.opt.ideaURLCol,
            self.opt.proposalIdCol, self.opt.questionCol,
            self.opt.questionIdCol, self.opt.ratingCol, self.opt.assessorCol,
            self.opt.assessmentCol, self.opt.proposerMarkCol, self.opt.fairCol,
            self.opt.topQualityCol, self.opt.abstainCol, self.opt.strictCol,
            self.opt.lenientCol, self.opt.profanityCol,
            self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol,
            self.opt.incompleteReadingCol, self.opt.notRelatedCol,
            self.opt.otherCol, self.opt.noVCAReviewsCol,
            self.opt.yellowCardCol, self.opt.redCardCol
        ]
        assessmentsWidths = [('A', 40), ('B', 60), ('C', 120), ('D', 120),
                             ('E', 40), ('F', 200), ('G', 40), ('H', 60),
                             ('I', 120), ('J', 400), ('K:Z', 30)]
        assessmentsFormats = [
            ('H:H', self.utils.counterFormat),
            ('J:J', self.utils.noteFormat),
            ('K:Z', self.utils.counterFormat),
            ('A1:X1', self.utils.headingFormat),
            ('B1', self.utils.verticalHeadingFormat),
            ('E1', self.utils.verticalHeadingFormat),
            ('G1:H1', self.utils.verticalHeadingFormat),
            ('K1:Z1', self.utils.verticalHeadingFormat),
            ('L2:L', self.utils.greenFormat),
            ('Q2:Q', self.utils.redFormat),
            ('R2:W', self.utils.yellowFormat),
            ('Y2:Y', self.utils.yellowFormat),
            ('Z2:Z', self.utils.redFormat),
        ]

        self.gspreadWrapper.createSheetFromDf(spreadsheet,
                                              'Valid Assessments',
                                              validAssessments,
                                              assessmentsHeadings,
                                              columnWidths=assessmentsWidths,
                                              formats=assessmentsFormats)

        # Print assessors recap

        # Write sheet with CAs summary
        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Community Advisors',
            assessors, [
                'assessor', 'total', 'blanks', 'blankPercentage', 'excluded',
                'Yellow Card', 'Red Card', 'Constructive Feedback', 'note'
            ],
            columnWidths=[('A', 140), ('B:D', 60), ('E', 100), ('F:H', 40),
                          ('I', 200)],
            formats=[
                ('B:C', self.utils.counterFormat),
                ('F:H', self.utils.counterFormat),
                ('D2:D', self.utils.percentageFormat),
                ('A1:E1', self.utils.headingFormat),
                ('B1:D1', self.utils.verticalHeadingFormat),
                ('F1:H1', self.utils.verticalHeadingFormat),
                ('F2:F', self.utils.yellowFormat),
                ('G2:G', self.utils.redFormat),
                ('H2:H', self.utils.greenFormat),
            ])

        # Print non valid assessments -> reason, extract from proposer Doc

        nonAssessmentsHeadings = [
            self.opt.assessmentsIdCol, self.opt.tripletIdCol,
            self.opt.ideaURLCol, self.opt.proposalIdCol, self.opt.questionCol,
            self.opt.ratingCol, self.opt.assessorCol, self.opt.assessmentCol,
            self.opt.blankCol, self.opt.proposerMarkCol, self.opt.fairCol,
            self.opt.topQualityCol, self.opt.abstainCol, self.opt.strictCol,
            self.opt.lenientCol, self.opt.profanityCol,
            self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol,
            self.opt.incompleteReadingCol, self.opt.notRelatedCol,
            self.opt.otherCol, self.opt.noVCAReviewsCol,
            self.opt.yellowCardCol, self.opt.redCardCol, 'reason'
        ]
        nonAssessmentsWidths = [('A', 40), ('B', 60), ('C', 120), ('D', 40),
                                ('E', 200), ('F', 40), ('G', 120), ('H', 400),
                                ('I:Y', 30), ('Z', 200)]
        nonAssessmentsFormats = [
            ('G:G', self.utils.counterFormat),
            ('H:H', self.utils.noteFormat),
            ('I:Y', self.utils.counterFormat),
            ('A1:W1', self.utils.headingFormat),
            ('B1', self.utils.verticalHeadingFormat),
            ('D1', self.utils.verticalHeadingFormat),
            ('F1:G1', self.utils.verticalHeadingFormat),
            ('I1:Y1', self.utils.verticalHeadingFormat),
            ('K2:L', self.utils.greenFormat),
            ('P2:P', self.utils.redFormat),
            ('Q2:V', self.utils.yellowFormat),
            ('X2:X', self.utils.yellowFormat),
            ('Y2:Y', self.utils.redFormat),
        ]

        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Excluded Assessments',
            nonValidAssessments,
            nonAssessmentsHeadings,
            columnWidths=nonAssessmentsWidths,
            formats=nonAssessmentsFormats)

        # Print vca recap
        allVcas = []
        for vcaDoc in self.vcaDocs:
            allVcas.append({'title': vcaDoc.title, 'link': vcaDoc.url})

        allVcasDf = pd.DataFrame(allVcas)

        self.gspreadWrapper.createSheetFromDf(spreadsheet,
                                              'Veteran Community Advisors',
                                              allVcasDf)

        proposalsWidths = [('A', 300), ('B', 60)]
        proposalsFormats = [('B:B', self.utils.counterFormat),
                            ('A:A', self.utils.noteFormat),
                            ('A1:B1', self.utils.headingFormat),
                            ('B1', self.utils.verticalHeadingFormat)]

        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Proposals',
            finalProposals, [self.opt.proposalKeyCol, self.opt.ratingCol],
            columnWidths=proposalsWidths,
            formats=proposalsFormats)

        print('Aggregated Document created')
        print('Link: {}'.format(spreadsheet.url))

    def nonValidAssessments(self, validAssessments):
        self.gspreadWrapper.getProposersData()
        dfProposers = self.gspreadWrapper.dfProposers.set_index('id')
        dfProposers[self.opt.assessmentsIdCol] = dfProposers.index
        for col in self.allColumns:
            dfProposers[col] = ''
        dfProposers[self.opt.proposerMarkCol] = ''
        dfProposers[self.opt.otherRationaleCol] = ''
        nonValidAssessments = dfProposers[~dfProposers[
            self.opt.assessmentsIdCol].isin(validAssessments.index)].copy()
        for id, row in nonValidAssessments.iterrows():
            if (id in self.dfVca.index):
                for col in self.allColumns:
                    nonValidAssessments.loc[id, col] = int(self.dfVca.loc[id,
                                                                          col])
                nonValidAssessments.loc[id, self.opt.yellowCardCol] = int(
                    self.dfVca.loc[id, self.opt.yellowCardCol])
                nonValidAssessments.loc[id, self.opt.redCardCol] = int(
                    self.dfVca.loc[id, self.opt.redCardCol])
                nonValidAssessments.loc[id, self.opt.noVCAReviewsCol] = int(
                    self.dfVca.loc[id, self.opt.noVCAReviewsCol])
        nonValidAssessments['reason'] = nonValidAssessments.apply(
            self.describeReason, axis=1)
        nonValidAssessments.fillna('', inplace=True)
        return nonValidAssessments

    def describeReason(self, row):
        reason = []
        if (row[self.opt.blankCol] == 'x'):
            reason.append(self.opt.blankCol)
        if (row['id'] in self.dfVca.index):
            tot = row[self.opt.noVCAReviewsCol]
            for col in self.infringementsColumns:
                if (tot > 0):
                    if ((row[col] / tot) >= self.opt.cardLimit):
                        reason.append(col)
        excludedAssessors = list(self.gspreadWrapper.dfVcaAssessors[
            self.gspreadWrapper.dfVcaAssessors['excluded'] == 'TRUE']
                                 ['assessor'])
        if (row[self.opt.assessorCol] in excludedAssessors):
            reason.append('Assessor excluded')
        return ', '.join(reason)

    def assessorRecap(self):
        self.gspreadWrapper.dfVcaAssessors.loc[
            self.gspreadWrapper.dfVcaAssessors['assessor'].
            isin(self.redCardsAssessors), 'excluded'] = 'TRUE'
        self.gspreadWrapper.dfVcaAssessors.loc[
            self.gspreadWrapper.dfVcaAssessors['assessor'].
            isin(self.redCardsAssessors), 'note'] = "red card"

        # Extract assessors
        locAssessors = self.dfVca.groupby(self.opt.assessorCol).agg(
            constructiveFeedback=(self.opt.topQualityCol, 'sum'),
            red=(self.opt.redCardCol, 'sum'),
            yellow=(self.opt.yellowCardCol, 'sum'),
        )
        for id, row in locAssessors.iterrows():
            self.gspreadWrapper.dfVcaAssessors.loc[
                self.gspreadWrapper.dfVcaAssessors['assessor'] == id,
                self.opt.redCardCol] = row['red']
            self.gspreadWrapper.dfVcaAssessors.loc[
                self.gspreadWrapper.dfVcaAssessors['assessor'] == id,
                self.opt.yellowCardCol] = row['yellow']
            self.gspreadWrapper.dfVcaAssessors.loc[
                self.gspreadWrapper.dfVcaAssessors['assessor'] == id,
                self.opt.topQualityCol] = row['constructiveFeedback']

        self.gspreadWrapper.dfVcaAssessors.fillna('', inplace=True)
        return self.gspreadWrapper.dfVcaAssessors

    def checkIntegrity(self, id, ass1, ass2):
        if ((ass1[self.opt.proposalIdCol] != ass2[self.opt.proposalIdCol]) or
            (ass1[self.opt.questionIdCol] != ass2[self.opt.questionIdCol])
                or (ass1[self.opt.ratingCol] != ass2[self.opt.ratingCol])
                or (ass1[self.opt.assessorCol] != ass2[self.opt.assessorCol])):
            print("Something wrong with assessment {}".format(id))
            return False
        return True

    def checkIfMarked(self, row, column):
        if (row[column].strip() != ''):
            return 1
        return 0

    def calculateCards(self, row):
        yellow = 0
        red = 0
        tot = row[self.opt.noVCAReviewsCol]
        if (tot >= self.opt.minimumVCA):
            if ((row[self.opt.profanityCol] / tot) >= self.opt.cardLimit):
                red = red + 1
            for col in self.yellowColumns:
                if ((row[col] / tot) >= self.opt.cardLimit):
                    yellow = yellow + 1
        return (yellow, red)

    def goodFeedback(self, row):
        for col in self.positiveColumns:
            if (self.checkIfMarked(row, col) > 0):
                return True
        return False

    def badFeedback(self, row):
        for col in self.infringementsColumns:
            if (self.checkIfMarked(row, col) > 0):
                return True
        return False

    def badValid(self, row):
        if ((self.checkIfMarked(row, self.opt.otherCol) == 1) and
            (self.checkIfMarked(row, self.opt.otherRationaleCol) == 0)):
            return False
        return True

    def neutralFeedback(self, row):
        for col in self.neutralColumns:
            if (self.checkIfMarked(row, col) > 0):
                return True
        return False

    def isVCAfeedbackValid(self, row, good, bad, neutral):
        if (bad):
            if (self.badValid(row) is False):
                return False
        if (sum([good, bad, neutral]) <= 1):
            return True
        return False
Exemplo n.º 4
0
class CreateVCAMaster():
    def __init__(self):
        self.opt = Options()
        self.utils = Utils()
        self.gspreadWrapper = GspreadWrapper()

    def createDoc(self):
        spreadsheet = self.gspreadWrapper.createDoc(self.opt.VCAMasterFileName)

        # Define headings for VCAMasterFile
        print('Define headings...')
        headings = [
            self.opt.assessmentsIdCol, self.opt.tripletIdCol,
            self.opt.ideaURLCol, self.opt.proposalIdCol, self.opt.questionCol,
            self.opt.questionIdCol, self.opt.ratingCol, self.opt.assessorCol,
            self.opt.assessmentCol, self.opt.proposerMarkCol, self.opt.fairCol,
            self.opt.topQualityCol, self.opt.abstainCol, self.opt.strictCol,
            self.opt.lenientCol, self.opt.profanityCol,
            self.opt.nonConstructiveCol, self.opt.scoreCol, self.opt.copyCol,
            self.opt.incompleteReadingCol, self.opt.notRelatedCol,
            self.opt.otherCol, self.opt.otherRationaleCol
        ]

        print('Load proposers flagged reviews...')
        self.gspreadWrapper.getProposersData()
        #self.gspreadWrapper.dfProposers.to_csv('test.csv')

        # Extract assessors
        assessors = self.gspreadWrapper.dfProposers.groupby(
            self.opt.assessorCol).agg(total=(self.opt.assessmentCol, 'count'),
                                      blanks=(self.opt.blankCol,
                                              (lambda x: (x == 'x').sum())))

        # Calculate and extract assessors by blanks
        assessors['blankPercentage'] = assessors['blanks'] / assessors['total']
        assessors['excluded'] = (assessors['blankPercentage'] >=
                                 self.opt.allowedBlankPerAssessor)
        excludedAssessors = assessors[(
            assessors['excluded'] == True)].index.tolist()
        includedAssessors = assessors[(assessors['excluded'] !=
                                       True)].index.tolist()

        # Exclude assessors that are also proposers (get from options)
        includedAssessors = [
            x for x in includedAssessors
            if (x not in self.opt.excludedCAProposers)
        ]
        excludedAssessors.extend(self.opt.excludedCAProposers)

        assessors['assessor'] = assessors.index

        # Filter out assessments made by excluded assessors
        validAssessments = self.gspreadWrapper.dfProposers[
            self.gspreadWrapper.dfProposers[self.opt.assessorCol].isin(
                includedAssessors)]

        # Filter out blank assessments
        validAssessments = validAssessments[
            validAssessments[self.opt.blankCol] != 'x']

        # Remove proposers marks
        criteria = self.gspreadWrapper.infringementsColumns + [
            self.opt.topQualityCol, self.opt.otherRationaleCol
        ]
        for col in criteria:
            validAssessments[col] = ''

        # Assign 'x' for marks
        validAssessments[self.opt.proposerMarkCol] = validAssessments[
            self.opt.proposerMarkCol].apply(lambda r: 'x' if (r) else '')

        # Write sheet with assessments
        assessmentsWidths = [('A', 40), ('B', 60), ('D', 40), ('E', 200),
                             ('F', 40), ('G', 60), ('H', 120), ('I', 400),
                             ('J:V', 30), ('W', 300)]
        assessmentsFormats = [
            ('G:G', self.utils.counterFormat),
            ('I:I', self.utils.noteFormat),
            ('J:V', self.utils.counterFormat),
            ('A1:W1', self.utils.headingFormat),
            ('B1', self.utils.verticalHeadingFormat),
            ('D1', self.utils.verticalHeadingFormat),
            ('F1:G1', self.utils.verticalHeadingFormat),
            ('J1:V1', self.utils.verticalHeadingFormat),
            ('K2:L', self.utils.greenFormat),
            ('P2:P', self.utils.redFormat),
            ('Q2:V', self.utils.yellowFormat),
        ]

        self.gspreadWrapper.createSheetFromDf(spreadsheet,
                                              'Assessments',
                                              validAssessments,
                                              headings,
                                              columnWidths=assessmentsWidths,
                                              formats=assessmentsFormats)

        # Write sheet with CAs summary
        self.gspreadWrapper.createSheetFromDf(
            spreadsheet,
            'Community Advisors',
            assessors,
            ['assessor', 'total', 'blanks', 'blankPercentage', 'excluded'],
            columnWidths=[('A', 140), ('B:D', 60), ('E', 100)],
            formats=[
                ('B:C', self.utils.counterFormat),
                ('D2:D', self.utils.percentageFormat),
                ('A1:E1', self.utils.headingFormat),
                ('B1:D1', self.utils.verticalHeadingFormat),
            ])
        print('Master Document for vCAs created')
        print('Link: {}'.format(spreadsheet.url))