示例#1
0
def log_change():
    """ gets latest revisions and cleans them """
    apikey_data, toxicity, dlp = perspective.get_client()
    start = datetime.datetime.utcnow() - datetime.timedelta(minutes=2)
    while True:
        end = datetime.datetime.utcnow()
        page = (
            args.mediawiki +
            "api.php?action=query&list=recentchanges&rclimit=500&rcprop=title%7Cids%7Csizes%7Cflags%7Cuser&rcdir=newer&rcstart="
            + start.isoformat() + "&rcend=" + end.isoformat() + "&format=json")
        get_page = requests.get(page)
        response = json.loads(get_page.content)
        start = end
        for change in response['query']['recentchanges']:
            print('new change:')
            revid = str(change['revid'])
            old_revid = str(change['old_revid'])
            compare = (args.mediawiki + "api.php?action=compare&fromrev=" +
                       old_revid + "&torev=" + revid + "&format=json")
            get_compare = requests.get(compare)
            response = json.loads(get_compare.content.decode('utf-8'))

            if 'compare' not in response:
                continue
            revision = response['compare']['*']
            text = clean.content_clean(revision)
            dlp_response = perspective.dlp_request(dlp, apikey_data, text)
            print(text)
            try:
                perspective_response = perspective.perspective_request(
                    toxicity, text)
            # Perspective can't handle language errors at this time
            except google_api_errors.HttpError as err:
                print('Error:', err)
                return
            has_pii_bool, pii_type = perspective.contains_pii(dlp_response)
            if has_pii_bool:
                header = '==Possible Doxxing Detected: Waiting for review=='
                result = (json.dumps({
                    u"comment_text": text,
                    "contains_pii": True,
                    "pii_type": pii_type
                }) + "\n")
                wiki_write(result, header)

            if perspective.contains_toxicity(perspective_response):
                header = '==Possibly Toxic Detected: Waiting for review=='
                result = (json.dumps({
                    u"comment_text":
                    text,
                    "contains_toxicity":
                    True,
                    "summaryScore":
                    perspective_response['attributeScores']['TOXICITY']
                    ['summaryScore']['value']
                }) + "\n")
                wiki_write(result, header)
            time.sleep(120)
示例#2
0
def log_event(apikey_data, toxicity, dlp, change):
    """Logs event by printing.

  Args:
    change: a json object with the wikimedia change record.
  """
    # print(
    #     u'user:{user} namespace:{namespace} bot:{bot} comment:{comment} title:{title}'
    #     .format(**change))
    # print('\n########## change:')
    from_id = (str(change['revision']['old']))
    to_id = (str(change['revision']['new']))
    page = ('https://en.wikipedia.org/w/api.php?action=compare&fromrev=' +
            from_id + '&torev=' + to_id + '&format=json')
    get_page = requests.get(page)
    response = json.loads(get_page.content.decode('utf-8'))
    revision = response['compare']['*']

    text = clean.content_clean(revision)

    # for line in text:
    print(text)
    if not text:
        return
    dlp_response = perspective.dlp_request(dlp, apikey_data, text)
    try:
        perspective_response = perspective.perspective_request(toxicity, text)
    # Perspective can't handle language errors at this time
    except google_api_errors.HttpError as err:
        print('Error:', err)
        return
    has_pii_bool, pii_type = perspective.contains_pii(dlp_response)
    if has_pii_bool:
        header = '==Possible Doxxing Detected: Waiting for review=='
        result = (
            u'{'
            'user:{user}, namespace:{namespace}, bot:{bot}, comment:{comment}'
            + 'title:{title},'.format(**change) + ', ' + 'comment_text:' +
            str(text) + ', ' + 'contains_pii:' + 'True' + ', ' + 'pii_type:' +
            str(pii_type) + ', '
            '}'
            '\n')
        wiki_write(result, header)

    if perspective.contains_toxicity(perspective_response):
        header = '==Possibly Toxic Detected: Waiting for review=='
        result = (
            u'{'
            'user:{user}, namespace:{namespace}, bot:{bot}, comment:{comment}'
            + 'title:{title}'.format(**change) + ', ' + 'comment_text:' +
            str(text) + ', ' + 'contains_toxicity:' + 'True' + ', ' +
            'toxic_score:' + str(perspective_response['attributeScores'] +
                                 ['TOXICITY']['summaryScore']['value']) + ', '
            '}'
            '\n')
        wiki_write(result, header)
示例#3
0
 def contains_threat(perspective_response):
     perspective_response = \
     {'attributeScores': {'INSULT': {'spanScores': [{'begin': 0, 'end': 21, 'score': {'value': 0.55873775, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.55873775, 'type': 'PROBABILITY'}}, 'TOXICITY': {'spanScores': [{'begin': 0, 'end': 21, 'score': {'value': 0.9759337, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.9759337, 'type': 'PROBABILITY'}}, 'THREAT': {'spanScores': [{'begin': 0, 'end': 21, 'score': {'value': 0.9980843, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.9980843, 'type': 'PROBABILITY'}}}, 'languages': ['en'], 'detectedLanguages': ['en']}
     is_threat = perspective.contains_toxicity(perspective_response)
     self.assertTrue(is_threat)
示例#4
0
 def test_contains_toxicity_true(self):
     perspective_response = \
     {
     "attributeScores": {
       "INSULT": {
         "spanScores": [
           {
             "begin": 0,
             "end": 14,
             "score": {
               "value": 0.8521307,
               "type": "PROBABILITY"
             }
           }
         ],
         "summaryScore": {
           "value": 0.8521307,
           "type": "PROBABILITY"
         }
       },
       "TOXICITY": {
         "spanScores": [
           {
             "begin": 0,
             "end": 14,
             "score": {
               "value": 0.96624386,
               "type": "PROBABILITY"
             }
           }
         ],
         "summaryScore": {
           "value": 0.96624386,
           "type": "PROBABILITY"
         }
       },
       "THREAT": {
         "spanScores": [
           {
             "begin": 0,
             "end": 14,
             "score": {
               "value": 0.39998722,
               "type": "PROBABILITY"
             }
           }
         ],
         "summaryScore": {
           "value": 0.39998722,
           "type": "PROBABILITY"
         }
       }
     },
     "languages": [
       "en"
     ],
     "detectedLanguages": [
       "en"
     ]
     }
     is_toxic = perspective.contains_toxicity(perspective_response)
     self.assertTrue(is_toxic)
示例#5
0
 def test_contains_toxicity_false(self):
     perspective_response = \
     {'attributeScores': {'TOXICITY': {'spanScores': [{'begin': 0, 'end': 25, 'score': {'value': 0.9312127, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.9312127, 'type': 'PROBABILITY'}}, 'THREAT': {'spanScores': [{'begin': 0, 'end': 25, 'score': {'value': 0.15875438, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.15875438, 'type': 'PROBABILITY'}}, 'INSULT': {'spanScores': [{'begin': 0, 'end': 25, 'score': {'value': 0.93682694, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.93682694, 'type': 'PROBABILITY'}}}, 'languages': ['en'], 'detectedLanguages': ['en']}
     is_insult = perspective.contains_toxicity(perspective_response)
     self.assertTrue(is_insult)