示例#1
0
    def __str__(self):
        if self.original_value >= 1:
            ap = str(humanize.apnumber(int(self.original_value)))
            cap = ap.capitalize()

            return "{} Pound Coin".format(cap)
        else:
            ap = humanize.apnumber(int(self.original_value * 100))
            cap = ap.capitalize()
            return "{} Pence Coin".format(cap)
    def get(self):
        args = parser.parse_args()

        start_of_the_day = datetime.combine(date.today(), time())
        end_of_the_day = start_of_the_day + timedelta(days=1) - timedelta(
            microseconds=1)

        processing_time = timedelta(microseconds=0)
        count = 0
        processed_count = 0
        processed_today = 0
        total_input_file_size = 0
        total_processed_file_size = 0
        for media_file in mp.mfq:
            if media_file.date_started and media_file.date_finished:
                processed_count += 1
                processing_time += media_file.date_finished - media_file.date_started

                if media_file.date_started >= start_of_the_day and media_file.date_finished <= end_of_the_day:
                    processed_today += 1
                if media_file.transcoded_file_size:
                    total_processed_file_size += media_file.transcoded_file_size
            total_input_file_size += media_file.file_size
            count += 1

        average_processing_time = QueueStats.mean(processing_time,
                                                  processed_count)
        average_processed_per_day = QueueStats.mean(
            processed_count,
            (processing_time.total_seconds() / (60 * 60 * 24)))
        average_processed_file_size = total_processed_file_size / processed_count
        average_input_file_size = total_input_file_size / count
        input_to_processed_file_size_ratio = float(
            average_input_file_size) / max(average_processed_file_size, 1)

        return {
            'processed_today':
            apnumber(processed_today) if args.humanize else processed_today,
            'average_processed_per_day':
            apnumber(average_processed_per_day)
            if args.humanize else str(average_processed_per_day),
            'average_processing_time':
            naturaldelta(average_processing_time)
            if args.humanize else str(average_processing_time),
            'average_processed_file_size':
            naturalsize(average_processed_file_size)
            if args.humanize else average_processed_file_size,
            'average_input_file_size':
            naturalsize(average_input_file_size)
            if args.humanize else average_input_file_size,
            'input_to_processed_file_size_ratio':
            fractional(input_to_processed_file_size_ratio)
            if args.humanize else input_to_processed_file_size_ratio
        }
示例#3
0
def about():
    now = datetime.utcnow()

    years_programming = relativedelta(now, DATE_STARTED_PROGRAMMING)
    years_programming = years_programming.years
    years_programming = apnumber(years_programming)

    return render_template('about.html', years_programming=years_programming)
示例#4
0
def location():
    res = Response()
    with res.gather(numDigits=ID_NUM_DIGITS,
                    action=url_for('id_recieved')) as g:
        g.say(
            'Please enter the {} digit payphone identification number'.format(
                humanize.apnumber(ID_NUM_DIGITS)))
    return res
示例#5
0
 def numericalize(self, amt):
     if amt > 100.0 and amt < 1e6:
         return humanize.intcomma(int(amt))
     if amt >= 1e6:
         return humanize.intword(int(amt))
     elif isinstance(amt, int) or amt.is_integer():
         return humanize.apnumber(int(amt))
     else:
         return humanize.fractional(amt)
示例#6
0
def location():
    res = Response()
    with res.gather(numDigits=str(ID_NUM_DIGITS),
                    action=url_for('id_recieved')) as g:
        g.say(
            'Please enter the {} digit payphone identification number'
            .format(humanize.apnumber(ID_NUM_DIGITS)),
            language='en-AU'
        )
    return res
 def to_json(value):
     if isinstance(value, datetime):
         return naturaltime(value) if humanize else value.isoformat()
     elif isinstance(value, uuid.UUID):
         return str(value)
     elif isinstance(value, NodeState):
         return value.value
     elif isinstance(value, int):
         return apnumber(value) if humanize else value
     else:
         return str(value)
示例#8
0
        notifier.notify_email(new_email)
    notifier.loop.run()


def get_date(d):
    return datetime.strptime(d, date_format)


def prepare_emails(emails):
    now = datetime.now(tzlocal())
    emails_sorted = sorted(emails, key=itemgetter(2), reverse=True)
    emails_prepared = [[s, f, naturaltime(now - get_date(d))]
                       for s, f, d in emails_sorted]
    return emails_prepared


if __name__ == '__main__':
    cache = get_cache()
    emails = get_emails()
    prepared_emails = prepare_emails(emails)
    new_emails = [x for x in emails if x not in cache]

    print('{} new(s) message(s) in INBOX'.format(apnumber(len(emails))))
    if any(emails):
        print(tabulate(prepared_emails, headers=['SUBJECT', 'FROM', 'DATE']))

    put_cache(emails)
    if any(new_emails):
        with DaemonContext():
            notify(new_emails)
示例#9
0
def run(args, browser, db, urls):
    courses = []

    try:
        browser.log.debug('Checking for existing login: %s' %
                          urls.map(banner.classlist.class_list))
        result = browser.get(urls.map(banner.classlist.class_list))
        if not result.ok:
            raise ValueError(result)
        soup = result.soup

        if soup.find('form', {'name': 'loginform'}):
            raise banner.LoginError()

        forms = soup.findAll(lambda i: i.name == 'form' and i['action'].
                             endswith('CRNQueryResults'))

        if len(forms) != 1:
            message = 'unable to parse CRNQueryResults from page: '

            if len(forms) == 0:
                message += 'no such form'
            else:
                message += 'too many forms'

            raise banner.ParseError(message, soup)

        (form, ) = forms

        form.find('input', {'name': 'p_term'})['value'] = args.term

        subject = form.find('select', {'name': 'p_subj'})
        subject.find('option', {'value': args.subject})['selected'] = True

        form.find('input', {'name': 'p_crse'})['value'] = args.course_number

        browser.log.info('Querying for %s %s @ %s' %
                         (args.term, args.course_number, course_query))
        result = browser.submit(form, urls.map(course_query))
        if not result.ok:
            raise ValueError(result)

        tables = result.soup.findAll(
            lambda i: i.name == 'table' and 'nowrap' in i.attrs)

        if len(tables) == 0:
            sys.stderr.write('No instances of %s %s found in term %s\n' %
                             (args.subject, args.course_number, args.term))
            sys.stderr.write('Did you mean to use `--term 20xx0x`?\n')
            sys.exit(1)

        (table, ) = tables
        rows = table.select('tr')
        headers = [col.text for col in rows[1].select('th')]

        for i in range(5, 8):
            headers[i] = headers[i] + ' Enrollment'

        for i in range(8, 11):
            headers[i] = headers[i] + ' Waitlist'

        for row in table.select('tr')[2:]:
            row = dict(zip(headers, (col.text for col in row.select('td'))))
            courses.append(row)

    except requests.exceptions.ConnectionError as e:
        sys.stderr.write('Error: %s\n' % e.message)
        sys.exit(1)

    print('Found %s course %s:' % (
        humanize.apnumber(len(courses)),
        inflect.engine().plural('section', len(courses)),
    ))

    for course in courses:
        print('%6s %5s: %-20s   %3d/%3d registered (%3d remaining)' % (
            course['Term'],
            course['CRN'],
            course['Course'],
            int(course['Actual Enrollment']),
            int(course['Max Enrollment']),
            int(course['Remaining Enrollment']),
        ))
示例#10
0
 async def normal_state(self):
     if await self.check_reset():
         return
     match = constants.CMD_HELLO.match(self.msg_content)
     if match:
         await self.reply(constants.USER_MSG_HELLO, self.author.mention)
         return
     match = constants.CMD_HELP.match(self.msg_content)
     if match:
         await self.reply(constants.USER_MSG_HELP)
         return
     match = constants.CMD_DISCLAIMER.match(self.msg_content)
     if match:
         await self.reply(constants.USER_MSG_DISCLAIMER)
         return
     match = constants.CMD_CLASS_INFO.match(self.msg_content)
     if match:
         term, crn = get_term_and_crn_from_match(match)
         class_info = await get_class_info(self.school_id, crn, term=term)
         if class_info is not None:
             message = constants.USER_MSG_CLASS_ON_WATCHLIST
             try:
                 next(
                     db.execute(constants.SQL_GET_WATCHLIST_RECORD,
                                (self.user_id, class_info.db_id)))
             except StopIteration:
                 message = constants.USER_MSG_CLASS_NOT_ON_WATCHLIST
             fmt_params = class_info._asdict()
             fmt_params['human_term'] = get_human_readable_term(
                 class_info.term)
             fmt_params['human_timedelta'] = humanize.naturaltime(
                 datetime.timedelta(
                     seconds=class_info.seats_updated_seconds_ago))
             await self.reply(message, **fmt_params)
         else:
             await self.reply(constants.USER_MSG_CLASS_NOT_FOUND)
         return
     match = constants.CMD_CLASS_START_WATCHING.match(self.msg_content)
     if match:
         term, crn = get_term_and_crn_from_match(match)
         class_info = await get_class_info(self.school_id, crn, term=term)
         if class_info is not None:
             message = constants.USER_MSG_CLASS_ADDED_TO_WATCHLIST
             try:
                 next(
                     db.execute(constants.SQL_GET_WATCHLIST_RECORD,
                                (self.user_id, class_info.db_id)))
             except StopIteration:
                 with db:
                     db.execute(constants.SQL_ADD_TO_WATCHLIST,
                                (self.user_id, class_info.db_id))
             else:
                 message = constants.USER_MSG_CLASS_ALREADY_ON_WATCHLIST
             fmt_params = class_info._asdict()
             fmt_params['human_term'] = get_human_readable_term(
                 class_info.term)
             fmt_params['human_timedelta'] = humanize.naturaltime(
                 datetime.timedelta(
                     seconds=class_info.seats_updated_seconds_ago))
             await self.reply(message, **fmt_params)
         else:
             await self.reply(constants.USER_MSG_CLASS_NOT_FOUND)
         return
     match = constants.CMD_CLASS_STOP_WATCHING.match(self.msg_content)
     if match:
         term, crn = get_term_and_crn_from_match(match)
         class_info = await get_class_info(self.school_id, crn, term=term)
         if class_info is not None:
             message = constants.USER_MSG_CLASS_REMOVED_FROM_WATCHLIST
             try:
                 watchlist_record, = next(
                     db.execute(constants.SQL_GET_WATCHLIST_RECORD,
                                (self.user_id, class_info.db_id)))
             except StopIteration:
                 message = constants.USER_MSG_CLASS_NOT_ON_WATCHLIST
             else:
                 with db:
                     db.execute(constants.SQL_REMOVE_FROM_WATCHLIST,
                                (watchlist_record, ))
             fmt_params = class_info._asdict()
             fmt_params['human_term'] = get_human_readable_term(
                 class_info.term)
             fmt_params['human_timedelta'] = humanize.naturaltime(
                 datetime.timedelta(
                     seconds=class_info.seats_updated_seconds_ago))
             await self.reply(message, **fmt_params)
         else:
             await self.reply(constants.USER_MSG_CLASS_NOT_FOUND)
         return
     match = constants.CMD_WATCHLIST.match(self.msg_content)
     if match:
         watchlist = []
         for term, crn, name, course_id, section, seat_cap, seat_rem, \
                 wait_cap, wait_rem in db.execute(
                     constants.SQL_GET_USER_WATCHLIST, (self.user_id,)):
             seat_or_waitlist = constants.MSG_PARAM_SEAT
             if seat_rem <= 0 and wait_cap > 0:
                 seat_cap = wait_cap
                 seat_rem = wait_rem
                 seat_or_waitlist = constants.MSG_PARAM_WAITLIST_SPOT
             watchlist.append(
                 constants.USER_MSG_WATCHLIST_ENTRY.format(
                     id=course_id,
                     section=section,
                     name=name,
                     crn=crn,
                     term=term,
                     human_term=get_human_readable_term(term),
                     seat_cap=seat_cap,
                     seat_rem=seat_rem,
                     seat_or_waitlist_cap=pluralize(seat_or_waitlist,
                                                    seat_cap),
                 ))
         if watchlist:
             lines = deque(watchlist)
             lines.appendleft(
                 constants.USER_MSG_WATCHLIST.format(
                     humanize.apnumber(len(watchlist)),
                     '' if len(watchlist) == 1 else 's'))
             await self.reply('\n'.join(lines))
         else:
             await self.reply(constants.USER_MSG_WATCHLIST_EMPTY)
         return
     await self.reply(constants.USER_MSG_INVALID_COMMAND)
示例#11
0
文件: filters.py 项目: pdxbmw/soflyco
def apnumber(arg):
    out = humanize.apnumber(arg)
    return out
示例#12
0
    def _generate_summary(self):
        data_dict = {}
        rules_dict = self._table
        data_dict["blockSplitter"] = self._blockSplitter
        data_dict["targetcol"] = self._colname
        groups = rules_dict.keys()
        probabilityCutoff = 75
        probabilityGroups = [{
            "probability": probabilityCutoff,
            "count": 0,
            "range": [probabilityCutoff, 100]
        }, {
            "probability": probabilityCutoff - 1,
            "count": 0,
            "range": [0, probabilityCutoff - 1]
        }]
        tableArray = [[
            "Prediction Rule", "Probability", "Prediction", "Freq", "group",
            "richRules"
        ]]
        dropdownData = []
        chartDict = {}
        self._completionStatus = self._dataframe_context.get_completion_status(
        )
        progressMessage = CommonUtils.create_progress_message_object(
            self._analysisName,
            "custom",
            "info",
            "Generating Prediction rules",
            self._completionStatus,
            self._completionStatus,
            display=True)
        CommonUtils.save_progress_message(self._messageURL,
                                          progressMessage,
                                          ignore=False)

        for idx, target in enumerate(rules_dict.keys()):
            targetToDisplayInTable = target.split(":")[0].strip()
            if idx == 0:
                dropdownData.append({
                    "displayName": target,
                    "name": targetToDisplayInTable,
                    "searchTerm": targetToDisplayInTable,
                    "selected": True,
                    "id": idx + 1
                })
            else:
                dropdownData.append({
                    "displayName": target,
                    "name": targetToDisplayInTable,
                    "searchTerm": targetToDisplayInTable,
                    "selected": False,
                    "id": idx + 1
                })
            rulesArray = rules_dict[target]
            probabilityArray = [
                round(x, 2) for x in self.success_percent[target]
            ]
            groupArray = [
                "strong" if x >= probabilityCutoff else "mixed"
                for x in probabilityArray
            ]
            for idx2, obj in enumerate(probabilityGroups):
                grpCount = len([
                    x for x in probabilityArray
                    if x >= obj["range"][0] and x <= obj["range"][1]
                ])
                obj["count"] += grpCount
                probabilityGroups[idx2] = obj
            predictionArray = [targetToDisplayInTable] * len(rulesArray)
            freqArray = self.total_predictions[target]
            chartDict[target] = sum(freqArray)
            success = self.successful_predictions[target]
            success_percent = self.success_percent[target]
            richRulesArray = []
            crudeRuleArray = []
            analysisType = self._dataframe_context.get_analysis_type()
            targetCol = self._dataframe_context.get_result_column()
            binFlag = False
            if self._dataframe_context.get_custom_analysis_details() != None:
                binnedColObj = [
                    x["colName"] for x in
                    self._dataframe_context.get_custom_analysis_details()
                ]
                if binnedColObj != None and targetCol in binnedColObj:
                    binFlag = True
            for idx2, crudeRule in enumerate(rulesArray):
                richRule, crudeRule = NarrativesUtils.generate_rules(
                    self._colname,
                    target,
                    crudeRule,
                    freqArray[idx2],
                    success[idx2],
                    success_percent[idx2],
                    analysisType,
                    binFlag=binFlag)
                richRulesArray.append(richRule)
                crudeRuleArray.append(crudeRule)
            probabilityArray = map(
                lambda x: humanize.apnumber(x) + "%"
                if x >= 10 else str(int(x)) + "%", probabilityArray)
            # targetArray = zip(rulesArray,probabilityArray,predictionArray,freqArray,groupArray)
            targetArray = zip(crudeRuleArray, probabilityArray,
                              predictionArray, freqArray, groupArray,
                              richRulesArray)
            targetArray = [list(x) for x in targetArray]
            tableArray += targetArray

        donutChartMaxLevel = 10
        if len(chartDict) > donutChartMaxLevel:
            chartDict = NarrativesUtils.restructure_donut_chart_data(
                chartDict, nLevels=donutChartMaxLevel)
        chartData = NormalChartData([chartDict]).get_data()
        chartJson = ChartJson(data=chartData)
        chartJson.set_title(self._colname)
        chartJson.set_chart_type("donut")
        mainCardChart = C3ChartData(data=chartJson)
        mainCardChart.set_width_percent(45)
        # mainCardChart = {"dataType": "c3Chart","widthPercent":33 ,"data": {"data": [chartDict],"title":self._colname,"axes":{},"label_text":{},"legend":{},"yAxisNumberFormat": ".2s","types":None,"axisRotation":False, "chart_type": "donut"}}

        dropdownDict = {
            "dataType": "dropdown",
            "label": "Showing prediction rules for",
            "data": dropdownData
        }

        data_dict["probabilityGroups"] = probabilityGroups

        maincardSummary = NarrativesUtils.get_template_output(self._base_dir,\
                                                    'decisiontreesummary.html',data_dict)
        main_card = NormalCard()
        main_card_data = []
        main_card_narrative = NarrativesUtils.block_splitter(
            maincardSummary, self._blockSplitter)
        main_card_data += main_card_narrative

        main_card_data.append(mainCardChart)
        main_card_data.append(dropdownDict)

        main_card_table = TableData()
        main_card_table.set_table_data(tableArray)
        main_card_table.set_table_type("popupDecisionTreeTable")
        main_card_data.append(main_card_table)
        main_card.set_card_data(main_card_data)
        main_card.set_card_name("Predicting Key Drivers of {}".format(
            self._colname))
        self._decisionTreeNode.add_a_card(main_card)
示例#13
0
        notifier.notify_email(new_email)
    notifier.loop.run()


def get_date(d):
    return datetime.strptime(d, date_format)


def prepare_emails(emails):
    now = datetime.now(tzlocal())
    emails_sorted = sorted(emails, key=itemgetter(2), reverse=True)
    emails_prepared = [[s, f, naturaltime(now - get_date(d))]
                       for s, f, d in emails_sorted]
    return emails_prepared


if __name__ == '__main__':
    cache = get_cache()
    emails = get_emails()
    prepared_emails = prepare_emails(emails)
    new_emails = [x for x in emails if x not in cache]

    print('{} new(s) message(s) in INBOX'.format(apnumber(len(emails))))
    if any(emails):
        print(tabulate(prepared_emails, headers=['SUBJECT', 'FROM', 'DATE']))

    put_cache(emails)
    if any(new_emails):
        with DaemonContext():
            notify(new_emails)
示例#14
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

# SOURCE: https://github.com/jmoiron/humanize

# pip install humanize
import humanize

# Integer humanization:
print(humanize.intcomma(12345))  # '12,345'
print(humanize.intcomma(123456789))  # '123,456,789'
print()

print(humanize.intword(123455913))  # '123.5 million'
print(humanize.intword(12345591313))  # '12.3 billion'
print(humanize.intword(1339014900000))  # '1.3 trillion'
print()

print(humanize.apnumber(4))  # 'four'
print(humanize.apnumber(7))  # 'seven'
print(humanize.apnumber(41))  # '41'
    def Predict(self):
        self._scriptWeightDict = self._dataframe_context.get_ml_model_prediction_weight(
        )
        self._scriptStages = {
            "initialization": {
                "summary": "Initialized the Naive Bayes Scripts",
                "weight": 2
            },
            "prediction": {
                "summary": "Spark ML Naive Bayes Model Prediction Finished",
                "weight": 2
            },
            "frequency": {
                "summary": "descriptive analysis finished",
                "weight": 2
            },
            "chisquare": {
                "summary": "chi Square analysis finished",
                "weight": 4
            },
            "completion": {
                "summary": "all analysis finished",
                "weight": 4
            },
        }

        self._completionStatus += self._scriptWeightDict[self._analysisName][
            "total"] * self._scriptStages["initialization"]["weight"] / 10
        progressMessage = CommonUtils.create_progress_message_object(self._analysisName,\
                                    "initialization",\
                                    "info",\
                                    self._scriptStages["initialization"]["summary"],\
                                    self._completionStatus,\
                                    self._completionStatus)
        CommonUtils.save_progress_message(self._messageURL, progressMessage)
        self._dataframe_context.update_completion_status(
            self._completionStatus)

        SQLctx = SQLContext(sparkContext=self._spark.sparkContext,
                            sparkSession=self._spark)
        dataSanity = True
        level_counts_train = self._dataframe_context.get_level_count_dict()
        categorical_columns = self._dataframe_helper.get_string_columns()
        numerical_columns = self._dataframe_helper.get_numeric_columns()
        time_dimension_columns = self._dataframe_helper.get_timestamp_columns()
        result_column = self._dataframe_context.get_result_column()
        categorical_columns = [
            x for x in categorical_columns if x != result_column
        ]

        level_counts_score = CommonUtils.get_level_count_dict(
            self._data_frame,
            categorical_columns,
            self._dataframe_context.get_column_separator(),
            output_type="dict",
            dataType="spark")
        for key in level_counts_train:
            if key in level_counts_score:
                if level_counts_train[key] != level_counts_score[key]:
                    dataSanity = False
            else:
                dataSanity = False

        test_data_path = self._dataframe_context.get_input_file()
        score_data_path = self._dataframe_context.get_score_path(
        ) + "/data.csv"
        trained_model_path = self._dataframe_context.get_model_path()
        trained_model_path = "/".join(
            trained_model_path.split("/")[:-1]
        ) + "/" + self._slug + "/" + self._dataframe_context.get_model_for_scoring(
        )
        # score_summary_path = self._dataframe_context.get_score_path()+"/Summary/summary.json"

        pipelineModel = MLUtils.load_pipeline(trained_model_path)

        df = self._data_frame
        transformed = pipelineModel.transform(df)
        label_indexer_dict = MLUtils.read_string_indexer_mapping(
            trained_model_path, SQLctx)
        prediction_to_levels = udf(lambda x: label_indexer_dict[x],
                                   StringType())
        transformed = transformed.withColumn(
            result_column, prediction_to_levels(transformed.prediction))

        if "probability" in transformed.columns:
            probability_dataframe = transformed.select(
                [result_column, "probability"]).toPandas()
            probability_dataframe = probability_dataframe.rename(
                index=str, columns={result_column: "predicted_class"})
            probability_dataframe[
                "predicted_probability"] = probability_dataframe[
                    "probability"].apply(lambda x: max(x))
            self._score_summary[
                "prediction_split"] = MLUtils.calculate_scored_probability_stats(
                    probability_dataframe)
            self._score_summary["result_column"] = result_column
            scored_dataframe = transformed.select(
                categorical_columns + time_dimension_columns +
                numerical_columns + [result_column, "probability"]).toPandas()
            scored_dataframe['predicted_probability'] = probability_dataframe[
                "predicted_probability"].values
            # scored_dataframe = scored_dataframe.rename(index=str, columns={"predicted_probability": "probability"})
        else:
            self._score_summary["prediction_split"] = []
            self._score_summary["result_column"] = result_column
            scored_dataframe = transformed.select(categorical_columns +
                                                  time_dimension_columns +
                                                  numerical_columns +
                                                  [result_column]).toPandas()

        labelMappingDict = self._dataframe_context.get_label_map()
        if score_data_path.startswith("file"):
            score_data_path = score_data_path[7:]
        scored_dataframe.to_csv(score_data_path, header=True, index=False)

        uidCol = self._dataframe_context.get_uid_column()
        if uidCol == None:
            uidCols = self._metaParser.get_suggested_uid_columns()
            if len(uidCols) > 0:
                uidCol = uidCols[0]
        uidTableData = []
        predictedClasses = list(scored_dataframe[result_column].unique())
        if uidCol:
            if uidCol in df.columns:
                for level in predictedClasses:
                    levelDf = scored_dataframe[scored_dataframe[result_column]
                                               == level]
                    levelDf = levelDf[[
                        uidCol, "predicted_probability", result_column
                    ]]
                    levelDf.sort_values(by="predicted_probability",
                                        ascending=False,
                                        inplace=True)
                    levelDf["predicted_probability"] = levelDf[
                        "predicted_probability"].apply(
                            lambda x: humanize.apnumber(x * 100) + "%"
                            if x * 100 >= 10 else str(int(x * 100)) + "%")
                    uidTableData.append(levelDf[:5])
                uidTableData = pd.concat(uidTableData)
                uidTableData = [list(arr) for arr in list(uidTableData.values)]
                uidTableData = [[uidCol, "Probability", result_column]
                                ] + uidTableData
                uidTable = TableData()
                uidTable.set_table_width(25)
                uidTable.set_table_data(uidTableData)
                uidTable.set_table_type("normalHideColumn")
                self._result_setter.set_unique_identifier_table(
                    json.loads(
                        CommonUtils.convert_python_object_to_json(uidTable)))

        self._completionStatus += self._scriptWeightDict[self._analysisName][
            "total"] * self._scriptStages["prediction"]["weight"] / 10
        progressMessage = CommonUtils.create_progress_message_object(self._analysisName,\
                                    "prediction",\
                                    "info",\
                                    self._scriptStages["prediction"]["summary"],\
                                    self._completionStatus,\
                                    self._completionStatus)
        CommonUtils.save_progress_message(self._messageURL, progressMessage)
        self._dataframe_context.update_completion_status(
            self._completionStatus)

        print("STARTING DIMENSION ANALYSIS ...")
        columns_to_keep = []
        columns_to_drop = []

        columns_to_keep = self._dataframe_context.get_score_consider_columns()

        if len(columns_to_keep) > 0:
            columns_to_drop = list(set(df.columns) - set(columns_to_keep))
        else:
            columns_to_drop += ["predicted_probability"]

        scored_df = transformed.select(categorical_columns +
                                       time_dimension_columns +
                                       numerical_columns + [result_column])
        columns_to_drop = [
            x for x in columns_to_drop if x in scored_df.columns
        ]
        modified_df = scored_df.select(
            [x for x in scored_df.columns if x not in columns_to_drop])
        resultColLevelCount = dict(
            modified_df.groupby(result_column).count().collect())
        self._metaParser.update_column_dict(
            result_column, {
                "LevelCount": resultColLevelCount,
                "numberOfUniqueValues": len(resultColLevelCount.keys())
            })
        self._dataframe_context.set_story_on_scored_data(True)

        self._dataframe_context.update_consider_columns(columns_to_keep)
        df_helper = DataFrameHelper(modified_df, self._dataframe_context,
                                    self._metaParser)
        df_helper.set_params()
        spark_scored_df = df_helper.get_data_frame()

        if len(predictedClasses) >= 2:
            try:
                fs = time.time()
                df_decision_tree_obj = DecisionTrees(
                    spark_scored_df,
                    df_helper,
                    self._dataframe_context,
                    self._spark,
                    self._metaParser,
                    scriptWeight=self._scriptWeightDict,
                    analysisName=self._analysisName).test_all(
                        dimension_columns=[result_column])
                narratives_obj = CommonUtils.as_dict(
                    DecisionTreeNarrative(result_column,
                                          df_decision_tree_obj,
                                          self._dataframe_helper,
                                          self._dataframe_context,
                                          self._metaParser,
                                          self._result_setter,
                                          story_narrative=None,
                                          analysisName=self._analysisName,
                                          scriptWeight=self._scriptWeightDict))
                print(narratives_obj)
            except Exception as e:
                print("DecisionTree Analysis Failed ", str(e))
        else:
            data_dict = {
                "npred": len(predictedClasses),
                "nactual": len(labelMappingDict.values())
            }

            if data_dict["nactual"] > 2:
                levelCountDict[predictedClasses[0]] = resultColLevelCount[
                    predictedClasses[0]]
                levelCountDict["Others"] = sum([
                    v for k, v in resultColLevelCount.items()
                    if k != predictedClasses[0]
                ])
            else:
                levelCountDict = resultColLevelCount
                otherClass = list(
                    set(labelMappingDict.values()) - set(predictedClasses))[0]
                levelCountDict[otherClass] = 0

                print(levelCountDict)

            total = float(
                sum([x for x in levelCountDict.values() if x != None]))
            levelCountTuple = [({
                "name":
                k,
                "count":
                v,
                "percentage":
                humanize.apnumber(v * 100 / total) + "%"
            }) for k, v in levelCountDict.items() if v != None]
            levelCountTuple = sorted(levelCountTuple,
                                     key=lambda x: x["count"],
                                     reverse=True)
            data_dict["blockSplitter"] = "|~NEWBLOCK~|"
            data_dict["targetcol"] = result_column
            data_dict["nlevel"] = len(levelCountDict.keys())
            data_dict["topLevel"] = levelCountTuple[0]
            data_dict["secondLevel"] = levelCountTuple[1]
            maincardSummary = NarrativesUtils.get_template_output(
                "/apps/", 'scorewithoutdtree.html', data_dict)

            main_card = NormalCard()
            main_card_data = []
            main_card_narrative = NarrativesUtils.block_splitter(
                maincardSummary, "|~NEWBLOCK~|")
            main_card_data += main_card_narrative

            chartData = NormalChartData([levelCountDict]).get_data()
            chartJson = ChartJson(data=chartData)
            chartJson.set_title(result_column)
            chartJson.set_chart_type("donut")
            mainCardChart = C3ChartData(data=chartJson)
            mainCardChart.set_width_percent(33)
            main_card_data.append(mainCardChart)

            uidTable = self._result_setter.get_unique_identifier_table()
            if uidTable != None:
                main_card_data.append(uidTable)
            main_card.set_card_data(main_card_data)
            main_card.set_card_name(
                "Predicting Key Drivers of {}".format(result_column))
            self._result_setter.set_score_dtree_cards([main_card], {})
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

# SOURCE: https://github.com/jmoiron/humanize

# pip install humanize
import humanize
import datetime as DT

# Localization. How to change locale in runtime
print(humanize.naturaltime(DT.timedelta(seconds=3)))  # '3 seconds ago'
print(humanize.intword(123455913))  # '123.5 million'
print(humanize.intword(12345591313))  # '12.3 billion'
print(humanize.intword(1339014900000))  # '1.3 trillion'
print(humanize.apnumber(4))  # 'four'
print(humanize.apnumber(7))  # 'seven'
print()

_t = humanize.i18n.activate('ru_RU')

print(humanize.naturaltime(DT.timedelta(seconds=3)))  # '3 секунды назад'
print(humanize.intword(123455913))  # '123.5 миллиона'
print(humanize.intword(12345591313))  # '12.3 миллиарда'
print(humanize.intword(1339014900000))  # '1.3 триллиона'
print(humanize.apnumber(4))  # 'четыре'
print(humanize.apnumber(7))  # 'семь'
print()

humanize.i18n.deactivate()
示例#17
0
#!/usr/bin/env python

import humanize
for i in range(100):
    print humanize.apnumber(i)
示例#18
0
    def Predict(self):
        self._scriptWeightDict = self._dataframe_context.get_ml_model_prediction_weight(
        )
        self._scriptStages = {
            "initialization": {
                "summary": "Initialized the Random Forest Scripts",
                "weight": 2
            },
            "prediction": {
                "summary": "Random Forest Model Prediction Finished",
                "weight": 2
            },
            "frequency": {
                "summary": "descriptive analysis finished",
                "weight": 2
            },
            "chisquare": {
                "summary": "chi Square analysis finished",
                "weight": 4
            },
            "completion": {
                "summary": "all analysis finished",
                "weight": 4
            },
        }

        self._completionStatus += old_div(
            self._scriptWeightDict[self._analysisName]["total"] *
            self._scriptStages["initialization"]["weight"], 10)
        progressMessage = CommonUtils.create_progress_message_object(self._analysisName,\
                                    "initialization",\
                                    "info",\
                                    self._scriptStages["initialization"]["summary"],\
                                    self._completionStatus,\
                                    self._completionStatus)
        CommonUtils.save_progress_message(self._messageURL,
                                          progressMessage,
                                          ignore=self._ignoreMsg)
        self._dataframe_context.update_completion_status(
            self._completionStatus)
        # Match with the level_counts and then clean the data
        dataSanity = True
        level_counts_train = self._dataframe_context.get_level_count_dict()
        cat_cols = self._dataframe_helper.get_string_columns()
        # level_counts_score = CommonUtils.get_level_count_dict(self._data_frame,cat_cols,self._dataframe_context.get_column_separator(),output_type="dict")
        # if level_counts_train != {}:
        #     for key in level_counts_train:
        #         if key in level_counts_score:
        #             if level_counts_train[key] != level_counts_score[key]:
        #                 dataSanity = False
        #         else:
        #             dataSanity = False
        categorical_columns = self._dataframe_helper.get_string_columns()
        uid_col = self._dataframe_context.get_uid_column()
        if self._metaParser.check_column_isin_ignored_suggestion(uid_col):
            categorical_columns = list(set(categorical_columns) - {uid_col})
        allDateCols = self._dataframe_context.get_date_columns()
        categorical_columns = list(set(categorical_columns) - set(allDateCols))
        numerical_columns = self._dataframe_helper.get_numeric_columns()
        result_column = self._dataframe_context.get_result_column()
        test_data_path = self._dataframe_context.get_input_file()

        if self._mlEnv == "spark":
            pass
        elif self._mlEnv == "sklearn":

            score_data_path = self._dataframe_context.get_score_path(
            ) + "/data.csv"
            if score_data_path.startswith("file"):
                score_data_path = score_data_path[7:]
            trained_model_path = self._dataframe_context.get_model_path()
            trained_model_path += "/" + self._dataframe_context.get_model_for_scoring(
            ) + ".pkl"
            if trained_model_path.startswith("file"):
                trained_model_path = trained_model_path[7:]
            score_summary_path = self._dataframe_context.get_score_path(
            ) + "/Summary/summary.json"
            if score_summary_path.startswith("file"):
                score_summary_path = score_summary_path[7:]
            trained_model = joblib.load(trained_model_path)
            # pandas_df = self._data_frame.toPandas()
            df = self._data_frame.toPandas()
            model_columns = self._dataframe_context.get_model_features()
            pandas_df = MLUtils.create_dummy_columns(
                df, [x for x in categorical_columns if x != result_column])
            pandas_df = MLUtils.fill_missing_columns(pandas_df, model_columns,
                                                     result_column)
            if uid_col:
                pandas_df = pandas_df[[
                    x for x in pandas_df.columns if x != uid_col
                ]]
            y_score = trained_model.predict(pandas_df)
            y_prob = trained_model.predict_proba(pandas_df)
            y_prob = MLUtils.calculate_predicted_probability(y_prob)
            y_prob = list([round(x, 2) for x in y_prob])
            score = {
                "predicted_class": y_score,
                "predicted_probability": y_prob
            }

        df["predicted_class"] = score["predicted_class"]
        labelMappingDict = self._dataframe_context.get_label_map()
        df["predicted_class"] = df["predicted_class"].apply(
            lambda x: labelMappingDict[x] if x != None else "NA")
        df["predicted_probability"] = score["predicted_probability"]
        self._score_summary[
            "prediction_split"] = MLUtils.calculate_scored_probability_stats(
                df)
        self._score_summary["result_column"] = result_column
        if result_column in df.columns:
            df.drop(result_column, axis=1, inplace=True)
        df = df.rename(index=str, columns={"predicted_class": result_column})
        df.to_csv(score_data_path, header=True, index=False)
        uidCol = self._dataframe_context.get_uid_column()
        if uidCol == None:
            uidCols = self._metaParser.get_suggested_uid_columns()
            if len(uidCols) > 0:
                uidCol = uidCols[0]
        uidTableData = []
        predictedClasses = list(df[result_column].unique())
        if uidCol:
            if uidCol in df.columns:
                for level in predictedClasses:
                    levelDf = df[df[result_column] == level]
                    levelDf = levelDf[[
                        uidCol, "predicted_probability", result_column
                    ]]
                    levelDf.sort_values(by="predicted_probability",
                                        ascending=False,
                                        inplace=True)
                    levelDf["predicted_probability"] = levelDf[
                        "predicted_probability"].apply(
                            lambda x: humanize.apnumber(x * 100) + "%"
                            if x * 100 >= 10 else str(int(x * 100)) + "%")
                    uidTableData.append(levelDf[:5])
                uidTableData = pd.concat(uidTableData)
                uidTableData = [list(arr) for arr in list(uidTableData.values)]
                uidTableData = [[uidCol, "Probability", result_column]
                                ] + uidTableData
                uidTable = TableData()
                uidTable.set_table_width(25)
                uidTable.set_table_data(uidTableData)
                uidTable.set_table_type("normalHideColumn")
                self._result_setter.set_unique_identifier_table(
                    json.loads(
                        CommonUtils.convert_python_object_to_json(uidTable)))

        self._completionStatus += old_div(
            self._scriptWeightDict[self._analysisName]["total"] *
            self._scriptStages["prediction"]["weight"], 10)
        progressMessage = CommonUtils.create_progress_message_object(self._analysisName,\
                                    "prediction",\
                                    "info",\
                                    self._scriptStages["prediction"]["summary"],\
                                    self._completionStatus,\
                                    self._completionStatus)
        CommonUtils.save_progress_message(self._messageURL,
                                          progressMessage,
                                          ignore=self._ignoreMsg)
        self._dataframe_context.update_completion_status(
            self._completionStatus)
        # CommonUtils.write_to_file(score_summary_path,json.dumps({"scoreSummary":self._score_summary}))

        print("STARTING DIMENSION ANALYSIS ...")
        columns_to_keep = []
        columns_to_drop = []

        # considercolumnstype = self._dataframe_context.get_score_consider_columns_type()
        # considercolumns = self._dataframe_context.get_score_consider_columns()
        # if considercolumnstype != None:
        #     if considercolumns != None:
        #         if considercolumnstype == ["excluding"]:
        #             columns_to_drop = considercolumns
        #         elif considercolumnstype == ["including"]:
        #             columns_to_keep = considercolumns

        columns_to_keep = self._dataframe_context.get_score_consider_columns()
        if len(columns_to_keep) > 0:
            columns_to_drop = list(set(df.columns) - set(columns_to_keep))
        else:
            columns_to_drop += ["predicted_probability"]
        columns_to_drop = [
            x for x in columns_to_drop
            if x in df.columns and x != result_column
        ]
        print("columns_to_drop", columns_to_drop)
        df.drop(columns_to_drop, axis=1, inplace=True)

        resultColLevelCount = dict(df[result_column].value_counts())
        # self._metaParser.update_level_counts(result_column,resultColLevelCount)
        self._metaParser.update_column_dict(
            result_column, {
                "LevelCount": resultColLevelCount,
                "numberOfUniqueValues": len(list(resultColLevelCount.keys()))
            })
        self._dataframe_context.set_story_on_scored_data(True)
        SQLctx = SQLContext(sparkContext=self._spark.sparkContext,
                            sparkSession=self._spark)
        spark_scored_df = SQLctx.createDataFrame(df)
        # spark_scored_df.write.csv(score_data_path+"/data",mode="overwrite",header=True)
        # TODO update metadata for the newly created dataframe
        self._dataframe_context.update_consider_columns(columns_to_keep)
        df_helper = DataFrameHelper(spark_scored_df, self._dataframe_context,
                                    self._metaParser)
        df_helper.set_params()
        spark_scored_df = df_helper.get_data_frame()
        # try:
        #     fs = time.time()
        #     narratives_file = self._dataframe_context.get_score_path()+"/narratives/FreqDimension/data.json"
        #     if narratives_file.startswith("file"):
        #         narratives_file = narratives_file[7:]
        #     result_file = self._dataframe_context.get_score_path()+"/results/FreqDimension/data.json"
        #     if result_file.startswith("file"):
        #         result_file = result_file[7:]
        #     init_freq_dim = FreqDimensions(df, df_helper, self._dataframe_context,scriptWeight=self._scriptWeightDict,analysisName=self._analysisName)
        #     df_freq_dimension_obj = init_freq_dim.test_all(dimension_columns=[result_column])
        #     df_freq_dimension_result = CommonUtils.as_dict(df_freq_dimension_obj)
        #     narratives_obj = DimensionColumnNarrative(result_column, df_helper, self._dataframe_context, df_freq_dimension_obj,self._result_setter,self._prediction_narrative,scriptWeight=self._scriptWeightDict,analysisName=self._analysisName)
        #     narratives = CommonUtils.as_dict(narratives_obj)
        #
        #     print "Frequency Analysis Done in ", time.time() - fs,  " seconds."
        #     self._completionStatus += self._scriptWeightDict[self._analysisName]["total"]*self._scriptStages["frequency"]["weight"]/10
        #     progressMessage = CommonUtils.create_progress_message_object(self._analysisName,\
        #                                 "frequency",\
        #                                 "info",\
        #                                 self._scriptStages["frequency"]["summary"],\
        #                                 self._completionStatus,\
        #                                 self._completionStatus)
        #     CommonUtils.save_progress_message(self._messageURL,progressMessage,ignore=self._ignoreMsg)
        #     self._dataframe_context.update_completion_status(self._completionStatus)
        #     print "Frequency ",self._completionStatus
        # except:
        #     print "Frequency Analysis Failed "
        #
        # try:
        #     fs = time.time()
        #     narratives_file = self._dataframe_context.get_score_path()+"/narratives/ChiSquare/data.json"
        #     if narratives_file.startswith("file"):
        #         narratives_file = narratives_file[7:]
        #     result_file = self._dataframe_context.get_score_path()+"/results/ChiSquare/data.json"
        #     if result_file.startswith("file"):
        #         result_file = result_file[7:]
        #     init_chisquare_obj = ChiSquare(df, df_helper, self._dataframe_context,scriptWeight=self._scriptWeightDict,analysisName=self._analysisName)
        #     df_chisquare_obj = init_chisquare_obj.test_all(dimension_columns= [result_column])
        #     df_chisquare_result = CommonUtils.as_dict(df_chisquare_obj)
        #     chisquare_narratives = CommonUtils.as_dict(ChiSquareNarratives(df_helper, df_chisquare_obj, self._dataframe_context,df,self._prediction_narrative,self._result_setter,scriptWeight=self._scriptWeightDict,analysisName=self._analysisName))
        # except:
        #     print "ChiSquare Analysis Failed "
        if len(predictedClasses) >= 2:
            try:
                fs = time.time()
                df_decision_tree_obj = DecisionTrees(
                    spark_scored_df,
                    df_helper,
                    self._dataframe_context,
                    self._spark,
                    self._metaParser,
                    scriptWeight=self._scriptWeightDict,
                    analysisName=self._analysisName).test_all(
                        dimension_columns=[result_column])
                narratives_obj = CommonUtils.as_dict(
                    DecisionTreeNarrative(result_column,
                                          df_decision_tree_obj,
                                          self._dataframe_helper,
                                          self._dataframe_context,
                                          self._metaParser,
                                          self._result_setter,
                                          story_narrative=None,
                                          analysisName=self._analysisName,
                                          scriptWeight=self._scriptWeightDict))
                print(narratives_obj)
            except:
                print("DecisionTree Analysis Failed ")
        else:
            data_dict = {
                "npred": len(predictedClasses),
                "nactual": len(list(labelMappingDict.values()))
            }
            if data_dict["nactual"] > 2:
                levelCountDict[predictedClasses[0]] = resultColLevelCount[
                    predictedClasses[0]]
                levelCountDict["Others"] = sum([
                    v for k, v in list(resultColLevelCount.items())
                    if k != predictedClasses[0]
                ])
            else:
                levelCountDict = resultColLevelCount
                otherClass = list(
                    set(labelMappingDict.values()) - set(predictedClasses))[0]
                levelCountDict[otherClass] = 0

                print(levelCountDict)

            total = float(
                sum([x for x in list(levelCountDict.values()) if x != None]))
            levelCountTuple = [({
                "name":
                k,
                "count":
                v,
                "percentage":
                humanize.apnumber(old_div(v * 100, total)) +
                "%" if old_div(v * 100, total) >= 10 else
                str(int(old_div(v * 100, total))) + "%"
            }) for k, v in list(levelCountDict.items()) if v != None]
            levelCountTuple = sorted(levelCountTuple,
                                     key=lambda x: x["count"],
                                     reverse=True)
            data_dict["blockSplitter"] = "|~NEWBLOCK~|"
            data_dict["targetcol"] = result_column
            data_dict["nlevel"] = len(list(levelCountDict.keys()))
            data_dict["topLevel"] = levelCountTuple[0]
            data_dict["secondLevel"] = levelCountTuple[1]
            maincardSummary = NarrativesUtils.get_template_output(
                "/apps/", 'scorewithoutdtree.html', data_dict)

            main_card = NormalCard()
            main_card_data = []
            main_card_narrative = NarrativesUtils.block_splitter(
                maincardSummary, "|~NEWBLOCK~|")
            main_card_data += main_card_narrative

            chartData = NormalChartData([levelCountDict]).get_data()
            chartJson = ChartJson(data=chartData)
            chartJson.set_title(result_column)
            chartJson.set_chart_type("donut")
            mainCardChart = C3ChartData(data=chartJson)
            mainCardChart.set_width_percent(33)
            main_card_data.append(mainCardChart)

            uidTable = self._result_setter.get_unique_identifier_table()
            if uidTable != None:
                main_card_data.append(uidTable)
            main_card.set_card_data(main_card_data)
            main_card.set_card_name(
                "Predicting Key Drivers of {}".format(result_column))
            self._result_setter.set_score_dtree_cards([main_card], {})
示例#19
0
def test_apnumber(test_input, expected):
    assert humanize.apnumber(test_input) == expected
示例#20
0
class UserFactory(factory.DjangoModelFactory):
    FACTORY_FOR = get_user_model()

    username = factory.Sequence(
        lambda n: "Peter The {0}".format(humanize.apnumber(n)))
示例#21
0
    def _generate_summary(self):
        data_dict = {}
        rules_dict = self._table
        data_dict["blockSplitter"] = self._blockSplitter
        data_dict["targetcol"] = self._colname
        groups = rules_dict.keys()
        probabilityCutoff = 75
        probabilityGroups = [{
            "probability": probabilityCutoff,
            "count": 0,
            "range": [probabilityCutoff, 100]
        }, {
            "probability": probabilityCutoff - 1,
            "count": 0,
            "range": [0, probabilityCutoff - 1]
        }]
        tableArray = [[
            "Prediction Rule", "Probability", "Prediction", "Freq", "group",
            "richRules"
        ]]
        dropdownData = []
        chartDict = {}
        targetLevel = self._dataframe_context.get_target_level_for_model()
        probabilityArrayAll = []

        self._completionStatus = self._dataframe_context.get_completion_status(
        )
        progressMessage = CommonUtils.create_progress_message_object(
            self._analysisName,
            "custom",
            "info",
            "Generating Prediction rules",
            self._completionStatus,
            self._completionStatus,
            display=True)
        CommonUtils.save_progress_message(self._messageURL,
                                          progressMessage,
                                          ignore=False)
        self._dataframe_context.update_completion_status(
            self._completionStatus)
        targetValues = [x for x in rules_dict.keys() if x == targetLevel
                        ] + [x for x in rules_dict.keys() if x != targetLevel]
        for idx, target in enumerate(targetValues):
            if idx == 0:
                if self._dataframe_context.get_story_on_scored_data() != True:
                    dropdownData.append({
                        "displayName": target,
                        "name": target,
                        "selected": True,
                        "id": idx + 1
                    })
                else:
                    dropdownData.append({
                        "displayName":
                        "{} : {}".format(self._colname, target),
                        "name":
                        target,
                        "selected":
                        True,
                        "id":
                        idx + 1
                    })
            else:
                if self._dataframe_context.get_story_on_scored_data() != True:
                    dropdownData.append({
                        "displayName": target,
                        "name": target,
                        "selected": False,
                        "id": idx + 1
                    })
                else:
                    dropdownData.append({
                        "displayName":
                        "{} : {}".format(self._colname, target),
                        "name":
                        target,
                        "selected":
                        False,
                        "id":
                        idx + 1
                    })
            rulesArray = rules_dict[target]
            probabilityArray = [
                round(x, 2) for x in self.success_percent[target]
            ]
            probabilityArrayAll += probabilityArray
            groupArray = [
                "strong" if x >= probabilityCutoff else "mixed"
                for x in probabilityArray
            ]
            for idx2, obj in enumerate(probabilityGroups):
                grpCount = len([
                    x for x in probabilityArray
                    if x >= obj["range"][0] and x <= obj["range"][1]
                ])
                obj["count"] += grpCount
                probabilityGroups[idx2] = obj
            predictionArray = [target] * len(rulesArray)
            freqArray = self.total_predictions[target]
            chartDict[target] = sum(freqArray)
            success = self.successful_predictions[target]
            success_percent = self.success_percent[target]
            richRulesArray = []
            crudeRuleArray = []
            analysisType = self._dataframe_context.get_analysis_type()
            targetCol = self._dataframe_context.get_result_column()
            binFlag = False
            if self._dataframe_context.get_custom_analysis_details() != None:
                binnedColObj = [
                    x["colName"] for x in
                    self._dataframe_context.get_custom_analysis_details()
                ]
                if binnedColObj != None and targetCol in binnedColObj:
                    binFlag = True
            for idx2, crudeRule in enumerate(rulesArray):
                richRule, crudeRule = NarrativesUtils.generate_rules(
                    self._colname,
                    target,
                    crudeRule,
                    freqArray[idx2],
                    success[idx2],
                    success_percent[idx2],
                    analysisType,
                    binFlag=binFlag)
                richRulesArray.append(richRule)
                crudeRuleArray.append(crudeRule)
            probabilityArray = map(
                lambda x: humanize.apnumber(x) + "%"
                if x >= 10 else str(int(x)) + "%", probabilityArray)
            # targetArray = zip(richRulesArray,probabilityArray,predictionArray,freqArray,groupArray)
            targetArray = zip(crudeRuleArray, probabilityArray,
                              predictionArray, freqArray, groupArray,
                              richRulesArray)
            targetArray = [list(x) for x in targetArray]
            tableArray += targetArray

        donutChartMaxLevel = 10
        if self._dataframe_context.get_story_on_scored_data() == True:
            chartDict = {}
            probabilityRangeForChart = GLOBALSETTINGS.PROBABILITY_RANGE_FOR_DONUT_CHART
            chartDict = dict(
                zip(probabilityRangeForChart.keys(),
                    [0] * len(probabilityRangeForChart)))
            for val in probabilityArrayAll:
                for grps, grpRange in probabilityRangeForChart.items():
                    if val > grpRange[0] and val <= grpRange[1]:
                        chartDict[grps] = chartDict[grps] + 1
            chartDict = {k: v for k, v in chartDict.items() if v != 0}
        else:
            chartDict = dict([(k, sum(v))
                              for k, v in self.total_predictions.items()])
            chartDict = {k: v for k, v in chartDict.items() if v != 0}
        if len(chartDict) > donutChartMaxLevel:
            chartDict = NarrativesUtils.restructure_donut_chart_data(
                chartDict, nLevels=donutChartMaxLevel)
        chartData = NormalChartData([chartDict]).get_data()
        chartJson = ChartJson(data=chartData)
        chartJson.set_title(self._colname)
        chartJson.set_chart_type("donut")
        mainCardChart = C3ChartData(data=chartJson)
        mainCardChart.set_width_percent(45)
        # mainCardChart = {"dataType": "c3Chart","widthPercent":33 ,"data": {"data": [chartDict],"title":self._colname,"axes":{},"label_text":{},"legend":{},"yAxisNumberFormat": ".2s","types":None,"axisRotation":False, "chart_type": "donut"}}

        dropdownDict = {
            "dataType": "dropdown",
            "label": "Showing prediction rules for",
            "data": dropdownData
        }

        data_dict["probabilityGroups"] = probabilityGroups
        if self._dataframe_context.get_story_on_scored_data() != True:
            maincardSummary = NarrativesUtils.get_template_output(self._base_dir,\
                                                        'decisiontreesummary.html',data_dict)
        else:
            predictedLevelcountArray = [(x[2], x[3]) for x in tableArray[1:]]
            predictedLevelCountDict = {}
            # predictedLevelcountDict = defaultdict(predictedLevelcountArray)
            for val in predictedLevelcountArray:
                predictedLevelCountDict.setdefault(val[0], []).append(val[1])

            levelCountDict = {}
            for k, v in predictedLevelCountDict.items():
                levelCountDict[k] = sum(v)
            # levelCountDict = self._metaParser.get_unique_level_dict(self._colname)
            total = float(
                sum([x for x in levelCountDict.values() if x != None]))
            levelCountTuple = [{
                "name": k,
                "count": v,
                "percentage": round(v * 100 / total, 2)
            } for k, v in levelCountDict.items() if v != None]
            percentageArray = [x["percentage"] for x in levelCountTuple]
            percentageArray = NarrativesUtils.ret_smart_round(percentageArray)
            levelCountTuple = [{
                "name": obj["name"],
                "count": obj["count"],
                "percentage": str(percentageArray[idx]) + "%"
            } for idx, obj in enumerate(levelCountTuple)]
            data_dict["nlevel"] = len(levelCountDict)
            print "levelCountTuple", levelCountTuple
            print "levelCountDict", levelCountDict
            if targetLevel in levelCountDict:
                data_dict["topLevel"] = [
                    x for x in levelCountTuple if x["name"] == targetLevel
                ][0]
                if len(levelCountTuple) > 1:
                    data_dict["secondLevel"] = max([
                        x for x in levelCountTuple if x["name"] != targetLevel
                    ],
                                                   key=lambda x: x["count"])
                else:
                    data_dict["secondLevel"] = None
            else:
                data_dict["topLevel"] = levelCountTuple[0]
                if len(levelCountTuple) > 1:
                    data_dict["secondLevel"] = levelCountTuple[1]
                else:
                    data_dict["secondLevel"] = None
            print data_dict
            maincardSummary = NarrativesUtils.get_template_output(
                self._base_dir, 'decisiontreescore.html', data_dict)
        main_card = NormalCard()
        main_card_data = []
        main_card_narrative = NarrativesUtils.block_splitter(
            maincardSummary, self._blockSplitter)
        main_card_data += main_card_narrative

        main_card_data.append(mainCardChart)
        main_card_data.append(dropdownDict)

        main_card_table = TableData()
        if self._dataframe_context.get_story_on_scored_data() == True:
            main_card_table.set_table_width(75)
        main_card_table.set_table_data(tableArray)
        main_card_table.set_table_type("popupDecisionTreeTable")
        main_card_data.append(main_card_table)
        uidTable = self._result_setter.get_unique_identifier_table()
        if uidTable != None:
            main_card_data.append(uidTable)
        else:
            main_card_table.set_table_width(100)
        main_card.set_card_data(main_card_data)
        main_card.set_card_name("Predicting Key Drivers of {}".format(
            self._colname))
        self._decisionTreeNode.add_a_card(main_card)