Пример #1
0
def clean_pdf(file):
    researchgate_1 = "ResearchGate"
    researchgate_2 = "Some of the authors of this publication are also working on these related projects"
    researchgate_3 = "CITATIONS"
    researchgate_4 = "READS"

    researchgate_strings = (
        researchgate_1,
        researchgate_2,
        researchgate_3,
        researchgate_4,
    )
    try:
        doc = fitz.open(stream=file.read(), filetype="pdf")
        if doc.pageCount <= 1:
            return

        found_items = 0
        first_page = doc[0]
        for researchgate_str in researchgate_strings:
            if first_page.searchFor(researchgate_str):
                found_items += 1

        if found_items >= 3:
            doc.deletePage(0)
            pdf_bytes = io.BytesIO(doc.write())
            file.file = pdf_bytes
    except Exception as e:
        sentry.log_error(e)
    finally:
        file.seek(0)
 def get_promoted(self, obj):
     if self.context.get('exclude_promoted_score', False):
         return None
     try:
         return obj.get_promoted_score()
     except Exception as e:
         log_error(e)
         return None
Пример #3
0
def get_user_score(user_id):
    try:
        response = client.score(user_id)
        out = json.dumps(response.body)
        print(out)
    except sift.client.ApiException as e:
        sentry.log_error(e)
        print(e.api_error_message)
Пример #4
0
def unlabel_user(user_id):
    # TODO: Finish this by determing how we plan to use it
    try:
        response = client.unlabel(user_id, abuse_type='content_abuse')
        print(response.body)
    except sift.client.ApiException as e:
        sentry.log_error(e)
        print(e.api_error_message)
Пример #5
0
def attach_file_to_document(sender, instance, created, update_fields,
                            **kwargs):
    if not created and check_file_updated(update_fields):
        try:
            if instance.file is not None:
                response = pdf_pipeline.attach_paper_pdf(instance)
                return response.ok
        except Exception as e:
            sentry.log_error(e, 'Failed to attach file to document')
Пример #6
0
 def pdf_url_to_journal_url(cls, pdf_url):
     parts = pdf_url.split(cls.pdf_url_split_on_partial)
     try:
         uid = cls.remove_query(parts[1])
         journal_url = f'{cls.journal_url_base}{uid}{cls.journal_url_suffix}'
         return journal_url
     except Exception as e:
         sentry.log_error(e, message=pdf_url)
         return None
Пример #7
0
 def _build_pipeline_if_not_exists(self):
     try:
         if self._check_pipeline_exists() is False:
             self._build_pipeline()
     except Exception as e:
         sentry.log_error(
             e,
             'Failed to build pipeline'
         )
Пример #8
0
 def pdf_url_to_journal_url(cls, pdf_url):
     parts = pdf_url.split(cls.pdf_url_split_on)
     try:
         uid_parts = cls.remove_query(parts[1]).split(cls.pdf_url_suffix)
         uid = uid_parts[0]
         return f'{cls.journal_url_base}{uid}'
     except Exception as e:
         sentry.log_error(e, message=pdf_url)
         return None
Пример #9
0
 def registry_update_related_task(pk, app_label, model_name):
     try:
         model = apps.get_model(app_label, model_name)
     except LookupError as e:
         sentry.log_error(e)
         pass
     else:
         registry.update_related(
             model.objects.get(pk=pk)
         )
Пример #10
0
 def journal_url_to_pdf_url(cls, journal_url):
     parts = journal_url.split(cls.journal_url_split_on)
     try:
         uid = parts[1]
         uid = cls.remove_query(uid)
         pdf_url = f'{cls.pdf_url_base}{uid}{cls.pdf_url_suffix}'
         return pdf_url
     except Exception as e:
         sentry.log_error(e, message=journal_url)
         return None
Пример #11
0
 def _add_citation(self, user, hypothesis_id, unified_document,
                   citation_type):
     try:
         hypothesis = Hypothesis.objects.get(id=hypothesis_id)
         citation = Citation.objects.create(created_by=user,
                                            source=unified_document,
                                            citation_type=citation_type)
         citation.hypothesis.set([hypothesis])
     except Exception as e:
         sentry.log_error(e)
Пример #12
0
    def get_uploaded_by(self, hit):
        try:
            paper = Paper.objects.get(id=hit["id"])
            uploaded_by = paper.uploaded_by

            if uploaded_by:
                user = User.objects.get(id=paper.uploaded_by.id)
                return UserSerializer(user, read_only=True).data
            return None
        except Exception as e:
            log_error(e, "Paper is missing uploaded_by")
Пример #13
0
 def get_author_profile(self, user):
     context = self.context
     _context_fields = context.get("usr_dus_get_author_profile", {})
     try:
         serializer = DynamicAuthorSerializer(user.author_profile,
                                              context=context,
                                              **_context_fields)
         return serializer.data
     except Exception as e:
         sentry.log_error(e)
         return {}
Пример #14
0
 def get_latest_submission_date(self, hub):
     try:
         return (
             Contribution.objects.filter(
                 contribution_type=Contribution.SUBMITTER,
                 unified_document__hubs=hub.id
             ).latest('created_date').created_date
         )
     except Exception as error:
         log_error(error)
         return None
Пример #15
0
    def __call__(self, execute, sql, params, many, context):
        start = time.monotonic()
        self.start = start

        try:
            result = execute(sql, params, many, context)
        except Exception as e:
            sentry.log_error(e)
            raise

        self.capture_traceback()
        return result
Пример #16
0
def mailchimp_add_user(request, user, **kwargs):
    """Adds user email to MailChimp"""
    mailchimp = Client()
    mailchimp.set_config({
        'api_key': keys.MAILCHIMP_KEY,
        'server': MAILCHIMP_SERVER
    })

    try:
        member_info = {'email_address': user.email, 'status': 'subscribed'}
        mailchimp.lists.add_list_member(MAILCHIMP_LIST_ID, member_info)
    except Exception as error:
        sentry.log_error(error, message=error.text)
Пример #17
0
    def generate_distribution(self, item, amount=1, distribute=True):
        from paper.models import Paper, Vote
        from user.models import User, Author
        from bullet_point.models import BulletPoint, Vote as BulletPointVote
        from summary.models import Summary, Vote as SummaryVote
        from discussion.models import Thread, Comment, Reply

        item_type = type(item)

        if item_type is Contribution:
            content_type = item.content_type
            try:
                item = content_type.get_object_for_this_type(id=item.object_id)
                item_type = type(item)
            except Exception as e:
                print(e)
                return None

        if item_type is Paper:
            recipient = item.uploaded_by
        elif item_type is BulletPoint:
            recipient = item.created_by
        elif item_type is BulletPointVote:
            recipient = item.created_by
        elif item_type is Summary:
            recipient = item.proposed_by
        elif item_type is SummaryVote:
            recipient = item.created_by
        elif item_type is Vote:
            recipient = item.created_by
        elif item_type is User:
            recipient = item
        elif item_type is Author:
            recipient = item.user
        elif item_type in (Thread, Comment, Reply):
            recipient = item.created_by
        else:
            error = Exception(f'Missing instance type: {str(item_type)}')
            sentry.log_error(error)
            raise error

        distributor = Distributor(dist('REWARD', amount, False), recipient,
                                  item, time.time())

        if distribute:
            distribution = distributor.distribute()
        else:
            distribution = distributor

        return distribution
Пример #18
0
    def apply_bad_content_decision(self, content_creator, content_id, source='AUTOMATED_RULE', reporter=None):
        applyDecisionRequest = {
            'decision_id': 'content_looks_bad_content_abuse',
            'source': source,
            'analyst': reporter.email if reporter else '*****@*****.**',
            'description': 'Auto flag of moderator-removed content',
            'reason': 'Auto flag of moderator-removed content',
        }

        try:
            client.apply_content_decision(str(content_creator.id), content_id, applyDecisionRequest)
        except sift.client.ApiException as e:
            sentry.log_error(e)
            print(e)
Пример #19
0
    def apply_bad_user_decision(self, content_creator, source='AUTOMATED_RULE', reporter=None):
        applyDecisionRequest = {
            'decision_id': 'looks_bad_content_abuse',
            'source': source,
            'analyst': reporter.email if reporter else '*****@*****.**',
            'description': 'User looks risky for content abuse',
            'reason': 'User looks risky for content abuse',
        }

        try:
            client.apply_user_decision(str(content_creator.id), applyDecisionRequest)
        except sift.client.ApiException as e:
            sentry.log_error(e)
            print(e)
Пример #20
0
def get_discussion_hubs(instance):
    hubs = None
    if isinstance(instance, BulletPoint):
        hubs = instance.paper.hubs
    elif isinstance(instance, Comment):
        hubs = instance.parent.paper.hubs
    elif isinstance(instance, Reply):
        try:
            hubs = instance.parent.parent.paper.hubs
        except Exception as e:
            sentry.log_error(e)
    elif isinstance(instance, Thread):
        hubs = instance.paper.hubs
    return hubs
Пример #21
0
    def distribute(self):
        record = self._record_distribution()
        try:
            record.set_distributed_pending()
            self._update_reputation_and_balance(record)
            record.set_distributed()
        except Exception as e:
            record.set_distributed_failed()

            error_message = f'Distribution {record.id} failed'
            error = ReputationDistributorError(e, error_message)
            sentry.log_error(error)
            print(error_message, e)
        return record
Пример #22
0
    def celery_track_content_comment(
        user_id,
        comment_id,
        comment_type,
        meta,
        is_thread,
        update,
    ):
        User = apps.get_model('user.User')
        user = User.objects.get(id=user_id)
        Discussion = apps.get_model(f'discussion.{comment_type}')
        comment = Discussion.objects.get(id=comment_id)

        root_content_id = ''
        if comment.paper is not None:
            root_content_id = (
                f'{type(comment.paper).__name__}_{comment.paper.id}'
            )

        comment_properties = {
            # Required fields
            '$user_id': str(user.id),
            # must be unique across all content types
            '$content_id': f'{type(comment).__name__}_{comment.id}',

            # Recommended fields
            '$status': '$active',

            # Required $comment object
            '$comment': {
                '$body': comment.plain_text,
                '$contact_email': user.email,
                '$root_content_id': root_content_id,
            }
        }
        if not is_thread:
            comment_properties['$comment']['$parent_comment_id'] = (
                f'{type(comment.parent).__name__}_{comment.parent.id}'
            )

        track_type = '$update_content' if update else '$create_content'

        try:
            response = client.track(track_type, comment_properties, return_score=False)
            print(response.body)
            return response.body
        except sift.client.ApiException as e:
            sentry.log_error(e)
            print(e.api_error_message)
Пример #23
0
def label_bad_user(user_id, abuse_type, description=''):
    # TODO: Finish this by determing how we plan to use it
    try:
        response = client.label(user_id, {
            '$is_bad': True,
            # optional fields
            '$abuse_type': abuse_type,
            '$description': description,
            '$source': 'django',
            '$analyst': '*****@*****.**'
        })
        print(response.body)
    except sift.client.ApiException as e:
        sentry.log_error(e)
        print(e.api_error_message)
Пример #24
0
def after_approval_flow(case_id):
    instance = AuthorClaimCase.objects.get(id=case_id)
    if instance.status == APPROVED:
        try:
            requestor_author = instance.requestor.author_profile

            reward_author_claim_case(requestor_author, instance.target_paper,
                                     instance)
            if instance.target_paper is None:
                raise Exception(
                    f'Cannot approve claim because paper was not found')

            send_approval_email(instance)
        except Exception as exception:
            sentry.log_error(exception)
Пример #25
0
def trigger_email_validation_flow(case_id, ):
    instance = AuthorClaimCase.objects.get(id=case_id)

    if instance.status == INITIATED:
        try:
            [generated_time, token] = get_new_validation_token()
            instance.token_generated_time = generated_time
            instance.validation_token = token
            # Note: intentionally sending email before incrementing attempt
            send_validation_email(instance)
            instance.validation_attempt_count += 1
            instance.save()
        except Exception as exception:
            print('exception', exception)
            sentry.log_error(exception)
Пример #26
0
    def track_flag_content(self, user, content_id, referer_id):
        # https://sift.com/developers/docs/curl/events-api/reserved-events/flag-content
        if not user:
            return None
        properties = {
            '$user_id': str(user.id),
            '$content_id': content_id,
            '$flagged_by': str(referer_id),
        }

        try:
            response = client.track('$flag_content', properties)
            print(response.body)
        except sift.client.ApiException as e:
            sentry.log_error(e)
            print(e.api_error_message)
Пример #27
0
def email_notifications(request):
    """Handles AWS SNS email notifications."""

    data = request.data
    if type(request.data) is not dict:
        data = json.loads(request.data)

    data_type = None
    try:
        data_type = data['Type']
    except KeyError:
        raise ParseError(f'Did not find key `Type` in {data}')

    if data_type == 'SubscriptionConfirmation':
        url = data['SubscribeURL']
        resp = http_request('GET', url)
        if resp.status_code != 200:
            message = 'Failed to subscribe to SNS'
            log_request_error(resp, message)

    elif data_type == 'Notification':
        data_message = json.loads(data['Message'])
        if data_message['notificationType'] == 'Bounce':
            bounced_recipients = data_message['bounce']['bouncedRecipients']

            for b_r in bounced_recipients:
                email_address = b_r['emailAddress']
                # TODO: Sanitize email address before putting it in the db
                try:
                    recipient, created = EmailRecipient.objects.get_or_create(
                        email=email_address)
                    recipient.bounced()
                except Exception as e:
                    message = (
                        f'Failed handling bounced recipient: {email_address}')
                    error = EmailNotificationError(e, message)
                    print(error)
                    log_error(error, base_error=e)
    elif data_type == 'Complaint':
        message = (f'`email_notifications` received {data_type}')
        log_info(message)
    else:
        message = (
            f'`email_notifications` received unsupported type {data_type}')
        print(message)

    return Response({})
Пример #28
0
def log_traceback(data, total_view_time, traceback):
    queries = data['queries']
    view_name = data['view_name']
    path = data['path']
    http_method = data['http_method']
    total_queries = str(data['total_queries'])
    total_sql_time = data['total_time']
    total_view_time = total_view_time

    try:
        profile = Profile.objects.create(
            view_name=view_name,
            path=path,
            http_method=http_method,
            total_queries=total_queries,
            total_sql_time=total_sql_time,
            total_view_time=total_view_time
        )

        filename = f'/tmp/trace_logs/{str(datetime.now())}.log'
        with open(filename, 'wb+') as f:
            f.write(traceback.encode())
            Traceback.objects.create(
                profile=profile,
                choice_type=Traceback.VIEW_TRACE,
                time=total_view_time,
                trace=File(f)
            )
        os.remove(filename)
        for i, query in enumerate(queries):
            choice_type = Traceback.SQL_TRACE
            sql = query.get('sql')
            trace = query.get('traceback', '')
            time = query.get('time')
            filename = f'/tmp/trace_logs/{str(datetime.now())}.log'
            with open(filename, 'wb+') as f:
                f.write(trace.encode())
                Traceback.objects.create(
                    profile=profile,
                    choice_type=choice_type,
                    time=time,
                    trace=File(f),
                    sql=sql
                )
            os.remove(filename)
    except Exception as e:
        sentry.log_error(e)
Пример #29
0
    def journal_url_to_pdf_url(cls, journal_url):
        subdomain = cls.get_subdomain(journal_url)
        try:
            sub_key = SUB_KEYS[subdomain]
        except KeyError:
            sub_key = subdomain

        parts = journal_url.split(cls.journal_url_split_on)
        try:
            uid = parts[1]
            uid = cls.remove_query(uid)
            pdf_url_base = cls.build_pdf_url_base(subdomain, sub_key)
            pdf_url = f'{pdf_url_base}{uid}{cls.pdf_url_suffix}'
            return pdf_url
        except Exception as e:
            sentry.log_error(e, message=journal_url)
            return None
Пример #30
0
    def pdf_url_to_journal_url(cls, pdf_url):
        subdomain = cls.get_subdomain(pdf_url)
        try:
            sub_key = SUB_KEYS[subdomain]
        except KeyError:
            sub_key = subdomain

        pdf_url_split_on = cls.build_pdf_url_split_on(sub_key)
        parts = pdf_url.split(pdf_url_split_on)
        try:
            uid_parts = cls.remove_query(parts[1]).split(cls.pdf_url_suffix)
            uid = uid_parts[0]
            journal_url_base = cls.build_journal_url_base(subdomain)
            journal_url = f'{journal_url_base}{uid}'
            return journal_url
        except Exception as e:
            sentry.log_error(e, message=pdf_url)
            return None