Exemplo n.º 1
0
    def pedalboard(self):
        bundlepath = self.get_argument('bundlepath')

        with open(DEFAULT_ICON_TEMPLATE, 'r') as fh:
            default_icon_template = squeeze(fh.read().replace("'", "\\'"))

        with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fh:
            default_settings_template = squeeze(fh.read().replace("'", "\\'"))

        try:
            pedalboard = get_pedalboard_info(bundlepath)
        except:
            print("ERROR in webserver.py: get_pedalboard_info failed")
            pedalboard = {
                'height': 0,
                'width': 0,
                'title': "",
                'connections': [],
                'plugins': [],
                'hardware': {},
            }

        context = {
            'default_icon_template': default_icon_template,
            'default_settings_template': default_settings_template,
            'pedalboard': b64encode(json.dumps(pedalboard).encode("utf-8"))
        }

        return context
Exemplo n.º 2
0
    def post(self):
        user = self.get_current_user()

        if user.username:
            self.redirect('/')

        username = squeeze(self.get_argument('username', ''))
        fullname = squeeze(self.get_argument('fullname', ''))

        errors = []

        if not username or not fullname:
            errors.append('Username and full name are mandatory')
        else:
            try:
                user.username = username
                user.fullname = fullname
                user.save()
                self.redirect('/')
                return
            except InvalidUsernameError:
                errors.append('Username must contain only letters, numbers and underscores (a-z, 0-9, _)')
            except InvalidFullnameError:
                errors.append('Full name must contain only letters, numbers, underscores and spaces (a-z, 0-9, _)')
            except UsernameTakenError:
                errors.append('This username is already taken')
            except Exception as e:
                errors.append('Unexpected error')

        self.render("signup.html",
                    user=user,
                    errors=errors)
Exemplo n.º 3
0
    def get(self, path):
        if not path:
            path = 'index.html'
        section = path.split('.', 1)[0]

        if section == 'index':
            # Caching strategy.
            # 1. If we don't have a version parameter, redirect
            curVersion = self.get_version()
            try:
                version = url_escape(self.get_argument('v'))
            except web.MissingArgumentError:
                uri = self.request.uri
                uri += '&' if self.request.query else '?'
                uri += 'v=%s' % curVersion
                self.redirect(uri)
                return
            # 2. Make sure version is correct
            if IMAGE_VERSION is not None and version != curVersion:
                uri = self.request.uri.replace('v=%s' % version,
                                               'v=%s' % curVersion)
                self.redirect(uri)
                return

            lv2_cleanup()
            lv2_init()

        else:
            version = self.get_version()

        loader = template.Loader(HTML_DIR)

        with open(DEFAULT_ICON_TEMPLATE, 'r') as fd:
            default_icon_template = squeeze(fd.read().replace("'", "\\'"))

        with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fd:
            default_settings_template = squeeze(fd.read().replace("'", "\\'"))

        with open(WIZARD_DB, 'r') as fh:
            wizard_db = json.load(fh)

        context = {
            'default_device': DEFAULT_DEVICE,
            'default_icon_template': default_icon_template,
            'default_settings_template': default_settings_template,
            'version': version,
            'wizard_db': json.dumps(wizard_db),
            'device_mode': 'true' if DEVICE_MODE else 'false',
            'write_access': 'true' if LV2_DIR else 'false',
        }

        self.write(loader.load(path).generate(**context))
Exemplo n.º 4
0
    def get(self, path):
        if not path:
            path = 'index.html'
        section = path.split('.',1)[0]

        if section == 'index':
            # Caching strategy.
            # 1. If we don't have a version parameter, redirect
            curVersion = self.get_version()
            try:
                version = url_escape(self.get_argument('v'))
            except web.MissingArgumentError:
                uri  = self.request.uri
                uri += '&' if self.request.query else '?'
                uri += 'v=%s' % curVersion
                self.redirect(uri)
                return
            # 2. Make sure version is correct
            if IMAGE_VERSION is not None and version != curVersion:
                uri = self.request.uri.replace('v=%s' % version, 'v=%s' % curVersion)
                self.redirect(uri)
                return

            lv2_cleanup()
            lv2_init()

        else:
            version = self.get_version()

        loader = template.Loader(HTML_DIR)

        with open(DEFAULT_ICON_TEMPLATE, 'r') as fd:
            default_icon_template = squeeze(fd.read().replace("'", "\\'"))

        with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fd:
            default_settings_template = squeeze(fd.read().replace("'", "\\'"))

        with open(WIZARD_DB, 'r') as fh:
            wizard_db = json.load(fh)

        context = {
            'default_device': DEFAULT_DEVICE,
            'default_icon_template': default_icon_template,
            'default_settings_template': default_settings_template,
            'version': version,
            'wizard_db': json.dumps(wizard_db),
            'device_mode': 'true' if DEVICE_MODE else 'false',
            'write_access': 'true' if LV2_DIR else 'false',
        }

        self.write(loader.load(path).generate(**context))
Exemplo n.º 5
0
def tlinject(handler, s, key_is_pt=False, *, mock=False):
    try:
        base, alt_set = handler._tlinject_base
    except AttributeError:
        base = TLINJECT_EMPTY
        alt_set = TLINJECT_ALT_EMPTY
        logging.warn("TLInject users should have a _tlinject_base dict")

    secret = handler.settings["db_coordinator"].tlinject_database.secret
    if secret:
        my = hmac.new(secret, s.encode("utf8"), hashlib.sha224).digest()[:12]
        my = base64.urlsafe_b64encode(my).decode("ascii").rstrip("=")
    else:
        my = ""

    data = []

    if mock:
        data.append('data-mock="1"')

    if s in alt_set:
        data.append('data-overlay="1"')

    pretranslated = base.get(s, None)
    if pretranslated:
        if not key_is_pt:
            data.append(f'data-tlik="{xhtml_escape(s)}"')
        s = pretranslated

    all_data = " ".join(data)
    return squeeze(
        f"""<span class="tlinject" data-assr="{my}" {all_data}>{xhtml_escape(s)}</span>"""
    )
Exemplo n.º 6
0
def SendEmailToUser(template, user):
    assert user.email is not None, user

    unsubscribe_cookie = User.CreateUnsubscribeCookie(user.user_id, AccountSettings.MARKETING)
    unsubscribe_url = "https://%s/unsubscribe?%s" % (options.domain, urlencode(dict(cookie=unsubscribe_cookie)))

    # Create arguments for the email template.
    fmt_args = {"first_name": user.given_name, "unsubscribe_url": unsubscribe_url}

    # Create arguments for the email.
    args = {
        "from": EmailManager.Instance().GetInfoAddress(),
        "fromname": "Viewfinder",
        "to": user.email,
        "subject": options.email_subject,
    }
    util.SetIfNotNone(args, "toname", user.name)

    args["html"] = template.generate(is_html=True, **fmt_args)
    args["text"] = template.generate(is_html=False, **fmt_args)

    print "Sending marketing email to %s (%s) (#%d)" % (user.email, user.name, user.user_id)

    if options.test_mode:
        global _is_first_email
        if _is_first_email:
            print args["html"]
            _is_first_email = False
    else:
        # Remove extra whitespace in the HTML (seems to help it avoid Gmail spam filter).
        args["html"] = escape.squeeze(args["html"])

        yield gen.Task(EmailManager.Instance().SendEmail, description="marketing email", **args)
Exemplo n.º 7
0
 def get(self):
     self.set_header("Content-Type", "text/plain; charset=UTF-8")
     basedir = os.path.join(HTML_DIR, 'include')
     for template in os.listdir(basedir):
         if not re.match('^[a-z_]+\.html$', template):
             continue
         with open(os.path.join(basedir, template), 'r') as fh:
             contents = fh.read()
         self.write("TEMPLATES['%s'] = '%s';\n\n" %
                    (template[:-5], squeeze(contents.replace("'", "\\'"))))
     self.finish()
Exemplo n.º 8
0
    def index(self):
        context = {}

        user_id = safe_json_load(USER_ID_JSON_FILE, dict)
        prefs   = safe_json_load(PREFERENCES_JSON_FILE, dict)

        with open(DEFAULT_ICON_TEMPLATE, 'r') as fh:
            default_icon_template = squeeze(fh.read().replace("'", "\\'"))

        with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fh:
            default_settings_template = squeeze(fh.read().replace("'", "\\'"))

        pbname = xhtml_escape(SESSION.host.pedalboard_name)
        prname = SESSION.host.pedalpreset_name()

        fullpbname = pbname or "Untitled"
        if prname:
            fullpbname += " - " + prname

        context = {
            'default_icon_template': default_icon_template,
            'default_settings_template': default_settings_template,
            'default_pedalboard': DEFAULT_PEDALBOARD,
            'cloud_url': CLOUD_HTTP_ADDRESS,
            'pedalboards_url': PEDALBOARDS_HTTP_ADDRESS,
            'hardware_profile': b64encode(json.dumps(SESSION.get_hardware_actuators()).encode("utf-8")),
            'version': self.get_argument('v'),
            'lv2_plugin_dir': LV2_PLUGIN_DIR,
            'bundlepath': SESSION.host.pedalboard_path,
            'title':  pbname,
            'size': json.dumps(SESSION.host.pedalboard_size),
            'fulltitle':  fullpbname,
            'titleblend': '' if SESSION.host.pedalboard_name else 'blend',
            'using_app': 'true' if APP else 'false',
            'using_mod': 'true' if DEVICE_KEY else 'false',
            'user_name': xhtml_escape(user_id.get("name", "")),
            'user_email': xhtml_escape(user_id.get("email", "")),
            'favorites': json.dumps(gState.favorites),
            'preferences': json.dumps(prefs),
        }
        return context
Exemplo n.º 9
0
    def _GetAuthEmail(cls, client, action, use_short_token, user_name,
                      identity, short_url):
        """Returns a dict of parameters that will be passed to EmailManager.SendEmail in order to
    email an access token to a user who is verifying his/her account.
    """
        action_info = VerifyIdBaseHandler.ACTION_MAP[action]
        identity_type, identity_value = Identity.SplitKey(identity.key)

        # Create arguments for the email.
        args = {
            'from': EmailManager.Instance().GetInfoAddress(),
            'fromname': 'Viewfinder',
            'to': identity_value
        }
        util.SetIfNotNone(args, 'toname', user_name)

        # Create arguments for the email template.
        fmt_args = {
            'user_name':
            user_name or identity_value,
            'user_email':
            identity_value,
            'url':
            'https://%s/%s%s' % (ServerEnvironment.GetHost(),
                                 short_url.group_id, short_url.random_key),
            'title':
            action_info.title,
            'use_short_token':
            use_short_token,
            'access_token':
            identity.access_token
        }

        # The email html format is designed to meet these requirements:
        #   1. It must be viewable on even the most primitive email html viewer. Avoid fancy CSS.
        #   2. It cannot contain any images. Some email systems (like Gmail) do not show images by default.
        #   3. It must be short and look good on an IPhone 4S screen. The action button should be visible
        #      without any scrolling necessary.
        resources_mgr = ResourcesManager.Instance()
        if use_short_token:
            args['subject'] = 'Viewfinder Code: %s' % identity.access_token
        else:
            args['subject'] = action_info.title
        args['html'] = resources_mgr.GenerateTemplate(
            action_info.email_template, is_html=True, **fmt_args)
        args['text'] = resources_mgr.GenerateTemplate(
            action_info.email_template, is_html=False, **fmt_args)

        # Remove extra whitespace in the HTML (seems to help it avoid Gmail spam filter).
        args['html'] = escape.squeeze(args['html'])

        return args
Exemplo n.º 10
0
 def get(self):
     artists = [
         squeeze(artist).replace(' ', '+')
         for artist in self.get_arguments('artist') if artist
     ]
     get_profile_responses = yield [
         get_artist_profile(artist) for artist in artists
     ]
     decoded_response_bodies = [
         json_decode(response.body) for response in get_profile_responses
     ]
     artist_profiles = format_artist_profiles(decoded_response_bodies)
     self.render('mainstream.html', snark=(how_mainstream(artist_profiles)))
Exemplo n.º 11
0
    def get(self, path):
        if not path:
            path = 'index.html'
        loader = template.Loader(HTML_DIR)

        with open(DEFAULT_ICON_TEMPLATE, 'r') as fd:
            default_icon_template = squeeze(fd.read().replace("'", "\\'"))

        with open(DEFAULT_SETTINGS_TEMPLATE, 'r') as fd:
            default_settings_template = squeeze(fd.read().replace("'", "\\'"))

        with open(WIZARD_DB, 'r') as fh:
            wizard_db = json.load(fh)

        context = {
            'default_device': DEFAULT_DEVICE,
            'default_icon_template': default_icon_template,
            'default_settings_template': default_settings_template,
            'wizard_db': json.dumps(wizard_db),
            'write_access': 1 if LV2_DIR else 0,
        }

        self.write(loader.load(path).generate(**context))
Exemplo n.º 12
0
def tlinject(handler, s, key_is_pt=False, *, mock=False):
    try:
        base = handler._tlinject_base
    except AttributeError:
        base = {}
        logging.warn("TLInject users should have a _tlinject_base dict")

    secret = handler.settings.get("tlinject_context").secret
    if secret:
        my = hmac.new(secret, s.encode("utf8"), hashlib.sha224).digest()[:12]
        my = base64.urlsafe_b64encode(my).decode("ascii").rstrip("=")
    else:
        my = ""

    if mock:
        extra = 'data-mock="1"'
    else:
        extra = ""

    pretranslated = base.get(s, None)
    if pretranslated:
        if key_is_pt:
            return squeeze(
                f"""<span class="tlinject" data-assr="{my}" {extra}>
                {xhtml_escape(pretranslated)}
            </span>"""
            )

        return squeeze(
            f"""<span class="tlinject" data-assr="{my}" data-tlik="{xhtml_escape(s)}" {extra}>
            {xhtml_escape(pretranslated)}
        </span>"""
        )
    else:
        return squeeze(
            f"""<span class="tlinject" data-assr="{my}" {extra}>{xhtml_escape(s)}</span>"""
        )
Exemplo n.º 13
0
    def post(self):
        email = squeeze(self.get_argument('email', ''))
        password = squeeze(self.get_argument('password', ''))
        action = self.get_argument('action')

        response = {'status': 'failed'}

        if not email or not password:
            response['error'] = 'Email and password are mandatory'
            self.write(json_encode(response))
            return

        if action == 'Login':
            id = get_identity(email, password)

            if not id:
                response['error'] = 'Wrong Username/Email and password combination'
                self.write(json_encode(response))
                return

            user = User.objects.find(id=id)
            if user:
                token = generate_token()
                user.authenticate(token)
                self.set_secure_cookie("auth", token)
                self.redirect('/')
        elif action == 'Signup':
            already_exists = email_exists(email)

            if not already_exists:
                fullname = email.split('@')[0].replace('.', ' ').replace('_', ' ')
                user = User.objects.create('', fullname)
                store_credentials(user.id, email, password)
                token = generate_token()
                user.authenticate(token)
                self.set_secure_cookie("auth", token)
                self.redirect('/signup')
Exemplo n.º 14
0
def get_local_authors_from_db(database=None):
    '''
    This just pulls out the authors from the local_authors table.

    Normalizes the form so they can be matched against the paper authors.

    '''

    # open the database if needed and get a cursor
    if not database:
        database, cursor = opendb()
        closedb = True
    else:
        cursor = database.cursor()
        closedb = False

    # get all local authors first
    query = 'select author, email from local_authors'
    cursor.execute(query)
    rows = cursor.fetchall()
    if rows and len(rows) > 0:
        local_authors, author_emails = list(zip(*rows))
        local_authors = [x.lower() for x in local_authors]
        local_authors = [x.replace('.', ' ') for x in local_authors]
        local_authors = [squeeze(x) for x in local_authors]

        # this contains firstinitial-lastname pairs
        local_author_fnames = [x.split() for x in local_authors]
        local_author_fnames = [
            ''.join([x[0][0], x[-1]]) for x in local_author_fnames
        ]
        local_authors = [x.replace(' ', '') for x in local_authors]

    else:
        local_authors, local_author_fnames, author_emails = [], [], []

    # at the end, close the cursor and DB connection
    if closedb:
        cursor.close()
        database.close()

    return local_authors, local_author_fnames, author_emails
Exemplo n.º 15
0
def SendEmailToUser(template, user):
    assert user.email is not None, user

    unsubscribe_cookie = User.CreateUnsubscribeCookie(
        user.user_id, AccountSettings.MARKETING)
    unsubscribe_url = 'https://%s/unsubscribe?%s' % (
        options.domain, urlencode(dict(cookie=unsubscribe_cookie)))

    # Create arguments for the email template.
    fmt_args = {
        'first_name': user.given_name,
        'unsubscribe_url': unsubscribe_url
    }

    # Create arguments for the email.
    args = {
        'from': EmailManager.Instance().GetInfoAddress(),
        'fromname': 'Viewfinder',
        'to': user.email,
        'subject': options.email_subject
    }
    util.SetIfNotNone(args, 'toname', user.name)

    args['html'] = template.generate(is_html=True, **fmt_args)
    args['text'] = template.generate(is_html=False, **fmt_args)

    print 'Sending marketing email to %s (%s) (#%d)' % (user.email, user.name,
                                                        user.user_id)

    if options.test_mode:
        global _is_first_email
        if _is_first_email:
            print args['html']
            _is_first_email = False
    else:
        # Remove extra whitespace in the HTML (seems to help it avoid Gmail spam filter).
        args['html'] = escape.squeeze(args['html'])

        yield gen.Task(EmailManager.Instance().SendEmail,
                       description='marketing email',
                       **args)
Exemplo n.º 16
0
  def _GetAuthEmail(cls, client, action, use_short_token, user_name, identity, short_url):
    """Returns a dict of parameters that will be passed to EmailManager.SendEmail in order to
    email an access token to a user who is verifying his/her account.
    """
    action_info = VerifyIdBaseHandler.ACTION_MAP[action]
    identity_type, identity_value = Identity.SplitKey(identity.key)

    # Create arguments for the email.
    args = {'from': EmailManager.Instance().GetInfoAddress(),
            'fromname': 'Viewfinder',
            'to': identity_value}
    util.SetIfNotNone(args, 'toname', user_name)

    # Create arguments for the email template.
    fmt_args = {'user_name': user_name or identity_value,
                'user_email': identity_value,
                'url': 'https://%s/%s%s' % (ServerEnvironment.GetHost(), short_url.group_id, short_url.random_key),
                'title': action_info.title,
                'use_short_token': use_short_token,
                'access_token': identity.access_token}

    # The email html format is designed to meet these requirements:
    #   1. It must be viewable on even the most primitive email html viewer. Avoid fancy CSS.
    #   2. It cannot contain any images. Some email systems (like Gmail) do not show images by default.
    #   3. It must be short and look good on an IPhone 4S screen. The action button should be visible
    #      without any scrolling necessary.
    resources_mgr = ResourcesManager.Instance()
    if use_short_token:
      args['subject'] = 'Viewfinder Code: %s' % identity.access_token
    else:
      args['subject'] = action_info.title
    args['html'] = resources_mgr.GenerateTemplate(action_info.email_template, is_html=True, **fmt_args)
    args['text'] = resources_mgr.GenerateTemplate(action_info.email_template, is_html=False, **fmt_args)

    # Remove extra whitespace in the HTML (seems to help it avoid Gmail spam filter).
    args['html'] = escape.squeeze(args['html'])

    return args
Exemplo n.º 17
0
def tag_local_authors(arxiv_date,
                      database=None,
                      firstname_match_threshold=99,
                      fullname_match_threshold=99,
                      update_db=False,
                      verbose=False):
    '''
    This finds all local authors for all papers on the date arxiv_date and tags
    the rows for them in the DB.

    '''

    # open the database if needed and get a cursor
    if not database:
        database, cursor = opendb()
        closedb = True
    else:
        cursor = database.cursor()
        closedb = False

    # get all local authors first and normalize their form
    local_authors, local_author_fnames, local_emails = (
        get_local_authors_from_db(database=database))

    if len(local_authors) > 0:

        # get all the authors for this date
        query = 'select arxiv_id, authors from arxiv where utcdate = date(?)'
        query_params = (arxiv_date, )
        cursor.execute(query, query_params)
        rows = cursor.fetchall()

        if rows and len(rows) > 0:

            local_author_articles = []

            for row in rows:

                paper_authors = row[1]

                # get rid of the affiliations for matching to local authors
                paper_authors = strip_affils(paper_authors)

                # we'll save this initial cleaned version back to the database
                # for local matched papers so all the author indices line up
                # correctly
                cleaned_paper_authors = paper_authors[::]

                if verbose:
                    print('%s authors: %s' %
                          (row[0], repr(cleaned_paper_authors)))

                # normalize these names so we can compare them more robustly to
                # the local authors
                paper_authors = [x.lower().strip() for x in paper_authors]
                paper_authors = [
                    x.strip() for x in paper_authors if len(x) > 1
                ]
                paper_authors = [x.replace('.', ' ') for x in paper_authors]
                paper_authors = [squeeze(x) for x in paper_authors]

                paper_author_fnames = [x.split() for x in paper_authors]
                paper_author_fnames = [
                    ''.join([x[0][0], x[-1]]) for x in paper_author_fnames
                ]
                paper_authors = [x.replace(' ', '') for x in paper_authors]

                if verbose:
                    print("%s normalized authors: %s" %
                          (row[0], repr(paper_authors)))

                local_matched_author_inds = []
                local_matched_author_affils = []

                # match to the flastname first, then if that works, try another
                # match with fullname. if both work, then we accept this as a
                # local author match
                for paper_author, paper_fname, paper_author_ind in zip(
                        paper_authors, paper_author_fnames,
                        range(len(paper_authors))):

                    matched_author_fname = process.extractOne(
                        paper_fname,
                        local_author_fnames,
                        score_cutoff=firstname_match_threshold)

                    matched_author_full = process.extractOne(
                        paper_author,
                        local_authors,
                        score_cutoff=fullname_match_threshold)

                    if matched_author_fname and matched_author_full:

                        print('%s: %s, matched paper author: %s '
                              'to local author: %s. '
                              'first name score: %s, full name score: %s' % (
                                  row[0],
                                  paper_authors,
                                  paper_author,
                                  matched_author_full[0],
                                  matched_author_fname[1],
                                  matched_author_full[1],
                              ))

                        # update the paper author index column so we can
                        # highlight them in the frontend
                        local_matched_author_inds.append(paper_author_ind)

                        # also update the affilation tag for this author
                        local_authind = local_authors.index(
                            matched_author_full[0])

                        # get the corresponding email
                        local_matched_email = local_emails[local_authind]

                        # split to get the affil tag
                        local_matched_affil = (
                            local_matched_email.split('@')[-1])

                        if local_matched_affil in AFFIL_DICT:

                            local_matched_author_affils.append(
                                AFFIL_DICT[local_matched_affil])

                        # now that we have all the special affils, compress
                        # them into only the unique ones
                        local_matched_author_affils = list(
                            set(local_matched_author_affils))
                #
                # done with all authors for this paper
                #

                # now update the info for this paper
                if len(local_matched_author_inds) > 0 and update_db:

                    # arxivid of this article that has local authors
                    local_author_articles.append((row[0]))

                    # these encode the positions of the local authors in the
                    # author list
                    local_author_indices = (','.join(
                        ['%s' % x for x in local_matched_author_inds]))
                    local_author_special_affils = ','.join(
                        local_matched_author_affils)

                    cursor.execute(
                        'update arxiv set '
                        'authors = ?, '
                        'local_authors = ?, '
                        'local_author_indices = ?, '
                        'local_author_specaffils = ? '
                        'where '
                        'arxiv_id = ?', (','.join(cleaned_paper_authors), True,
                                         local_author_indices,
                                         local_author_special_affils, row[0]))

            #
            # done with all papers for the day
            #

            # commit the transaction at the end
            if update_db:
                database.commit()

            return local_author_articles

        else:

            print('no articles for this date')
            return False

    else:

        print('no local authors defined')
        return False

    # at the end, close the cursor and DB connection
    if closedb:
        cursor.close()
        database.close()
Exemplo n.º 18
0
def get_periodicfeatures(
        pfpickle,
        lcbasedir,
        outdir,
        fourierorder=5,
        # these are depth, duration, ingress duration
        transitparams=(-0.01, 0.1, 0.1),
        # these are depth, duration, depth ratio, secphase
        ebparams=(-0.2, 0.3, 0.7, 0.5),
        pdiff_threshold=1.0e-4,
        sidereal_threshold=1.0e-4,
        sampling_peak_multiplier=5.0,
        sampling_startp=None,
        sampling_endp=None,
        starfeatures=None,
        timecols=None,
        magcols=None,
        errcols=None,
        lcformat='hat-sql',
        lcformatdir=None,
        sigclip=10.0,
        verbose=True,
        raiseonfail=False):
    '''This gets all periodic features for the object.

    Parameters
    ----------

    pfpickle : str
        The period-finding result pickle containing period-finder results to use
        for the calculation of LC fit, periodogram, and phased LC features.

    lcbasedir : str
        The base directory where the light curve for the current object is
        located.

    outdir : str
        The output directory where the results will be written.

    fourierorder : int
        The Fourier order to use to generate sinusoidal function and fit that to
        the phased light curve.

    transitparams : list of floats
        The transit depth, duration, and ingress duration to use to generate a
        trapezoid planet transit model fit to the phased light curve. The period
        used is the one provided in `period`, while the epoch is automatically
        obtained from a spline fit to the phased light curve.

    ebparams : list of floats
        The primary eclipse depth, eclipse duration, the primary-secondary depth
        ratio, and the phase of the secondary eclipse to use to generate an
        eclipsing binary model fit to the phased light curve. The period used is
        the one provided in `period`, while the epoch is automatically obtained
        from a spline fit to the phased light curve.

    pdiff_threshold : float
        This is the max difference between periods to consider them the same.

    sidereal_threshold : float
        This is the max difference between any of the 'best' periods and the
        sidereal day periods to consider them the same.

    sampling_peak_multiplier : float
        This is the minimum multiplicative factor of a 'best' period's
        normalized periodogram peak over the sampling periodogram peak at the
        same period required to accept the 'best' period as possibly real.

    sampling_startp, sampling_endp : float
        If the `pgramlist` doesn't have a time-sampling Lomb-Scargle
        periodogram, it will be obtained automatically. Use these kwargs to
        control the minimum and maximum period interval to be searched when
        generating this periodogram.

    starfeatures : str or None
        If not None, this should be the filename of the
        `starfeatures-<objectid>.pkl` created by
        :py:func:`astrobase.lcproc.lcsfeatures.get_starfeatures` for this
        object. This is used to get the neighbor's light curve and phase it with
        this object's period to see if this object is blended.

    timecols : list of str or None
        The timecol keys to use from the lcdict in calculating the features.

    magcols : list of str or None
        The magcol keys to use from the lcdict in calculating the features.

    errcols : list of str or None
        The errcol keys to use from the lcdict in calculating the features.

    lcformat : str
        This is the `formatkey` associated with your light curve format, which
        you previously passed in to the `lcproc.register_lcformat`
        function. This will be used to look up how to find and read the light
        curves specified in `basedir` or `use_list_of_filenames`.

    lcformatdir : str or None
        If this is provided, gives the path to a directory when you've stored
        your lcformat description JSONs, other than the usual directories lcproc
        knows to search for them in. Use this along with `lcformat` to specify
        an LC format JSON file that's not currently registered with lcproc.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    verbose : bool
        If True, will indicate progress while working.

    raiseonfail : bool
        If True, will raise an Exception if something goes wrong.

    Returns
    -------

    str
        Returns a filename for the output pickle containing all of the periodic
        features for the input object's LC.

    '''

    try:
        formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir)
        if formatinfo:
            (fileglob, readerfunc, dtimecols, dmagcols, derrcols,
             magsarefluxes, normfunc) = formatinfo
        else:
            LOGERROR("can't figure out the light curve format")
            return None
    except Exception:
        LOGEXCEPTION("can't figure out the light curve format")
        return None

    # open the pfpickle
    if pfpickle.endswith('.gz'):
        infd = gzip.open(pfpickle)
    else:
        infd = open(pfpickle)
    pf = pickle.load(infd)
    infd.close()

    lcfile = os.path.join(lcbasedir, pf['lcfbasename'])
    objectid = pf['objectid']

    if 'kwargs' in pf:
        kwargs = pf['kwargs']
    else:
        kwargs = None

    # override the default timecols, magcols, and errcols
    # using the ones provided to the periodfinder
    # if those don't exist, use the defaults from the lcformat def
    if kwargs and 'timecols' in kwargs and timecols is None:
        timecols = kwargs['timecols']
    elif not kwargs and not timecols:
        timecols = dtimecols

    if kwargs and 'magcols' in kwargs and magcols is None:
        magcols = kwargs['magcols']
    elif not kwargs and not magcols:
        magcols = dmagcols

    if kwargs and 'errcols' in kwargs and errcols is None:
        errcols = kwargs['errcols']
    elif not kwargs and not errcols:
        errcols = derrcols

    # check if the light curve file exists
    if not os.path.exists(lcfile):
        LOGERROR("can't find LC %s for object %s" % (lcfile, objectid))
        return None

    # check if we have neighbors we can get the LCs for
    if starfeatures is not None and os.path.exists(starfeatures):

        with open(starfeatures, 'rb') as infd:
            starfeat = pickle.load(infd)

        if starfeat['closestnbrlcfname'].size > 0:

            nbr_full_lcf = starfeat['closestnbrlcfname'][0]

            # check for this LC in the lcbasedir
            if os.path.exists(
                    os.path.join(lcbasedir, os.path.basename(nbr_full_lcf))):
                nbrlcf = os.path.join(lcbasedir,
                                      os.path.basename(nbr_full_lcf))
            # if it's not there, check for this file at the full LC location
            elif os.path.exists(nbr_full_lcf):
                nbrlcf = nbr_full_lcf
            # otherwise, we can't find it, so complain
            else:
                LOGWARNING("can't find neighbor light curve file: %s in "
                           "its original directory: %s, or in this object's "
                           "lcbasedir: %s, skipping neighbor processing..." %
                           (os.path.basename(nbr_full_lcf),
                            os.path.dirname(nbr_full_lcf), lcbasedir))
                nbrlcf = None

        else:
            nbrlcf = None

    else:
        nbrlcf = None

    # now, start processing for periodic feature extraction
    try:

        # get the object LC into a dict
        lcdict = readerfunc(lcfile)

        # this should handle lists/tuples being returned by readerfunc
        # we assume that the first element is the actual lcdict
        # FIXME: figure out how to not need this assumption
        if ((isinstance(lcdict, (list, tuple)))
                and (isinstance(lcdict[0], dict))):
            lcdict = lcdict[0]

        # get the nbr object LC into a dict if there is one
        if nbrlcf is not None:

            nbrlcdict = readerfunc(nbrlcf)

            # this should handle lists/tuples being returned by readerfunc
            # we assume that the first element is the actual lcdict
            # FIXME: figure out how to not need this assumption
            if ((isinstance(nbrlcdict, (list, tuple)))
                    and (isinstance(nbrlcdict[0], dict))):
                nbrlcdict = nbrlcdict[0]

        # this will be the output file
        outfile = os.path.join(
            outdir,
            'periodicfeatures-%s.pkl' % squeeze(objectid).replace(' ', '-'))

        # normalize using the special function if specified
        if normfunc is not None:
            lcdict = normfunc(lcdict)

            if nbrlcf:
                nbrlcdict = normfunc(nbrlcdict)

        resultdict = {}

        for tcol, mcol, ecol in zip(timecols, magcols, errcols):

            # dereference the columns and get them from the lcdict
            if '.' in tcol:
                tcolget = tcol.split('.')
            else:
                tcolget = [tcol]
            times = _dict_get(lcdict, tcolget)

            if nbrlcf:
                nbrtimes = _dict_get(nbrlcdict, tcolget)
            else:
                nbrtimes = None

            if '.' in mcol:
                mcolget = mcol.split('.')
            else:
                mcolget = [mcol]

            mags = _dict_get(lcdict, mcolget)

            if nbrlcf:
                nbrmags = _dict_get(nbrlcdict, mcolget)
            else:
                nbrmags = None

            if '.' in ecol:
                ecolget = ecol.split('.')
            else:
                ecolget = [ecol]

            errs = _dict_get(lcdict, ecolget)

            if nbrlcf:
                nbrerrs = _dict_get(nbrlcdict, ecolget)
            else:
                nbrerrs = None

            #
            # filter out nans, etc. from the object and any neighbor LC
            #

            # get the finite values
            finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs)
            ftimes, fmags, ferrs = times[finind], mags[finind], errs[finind]

            if nbrlcf:

                nfinind = (np.isfinite(nbrtimes) & np.isfinite(nbrmags)
                           & np.isfinite(nbrerrs))
                nbrftimes, nbrfmags, nbrferrs = (nbrtimes[nfinind],
                                                 nbrmags[nfinind],
                                                 nbrerrs[nfinind])

            # get nonzero errors
            nzind = np.nonzero(ferrs)
            ftimes, fmags, ferrs = ftimes[nzind], fmags[nzind], ferrs[nzind]

            if nbrlcf:

                nnzind = np.nonzero(nbrferrs)
                nbrftimes, nbrfmags, nbrferrs = (nbrftimes[nnzind],
                                                 nbrfmags[nnzind],
                                                 nbrferrs[nnzind])

            # normalize here if not using special normalization
            if normfunc is None:

                ntimes, nmags = normalize_magseries(
                    ftimes, fmags, magsarefluxes=magsarefluxes)

                times, mags, errs = ntimes, nmags, ferrs

                if nbrlcf:
                    nbrntimes, nbrnmags = normalize_magseries(
                        nbrftimes, nbrfmags, magsarefluxes=magsarefluxes)
                    nbrtimes, nbrmags, nbrerrs = nbrntimes, nbrnmags, nbrferrs
                else:
                    nbrtimes, nbrmags, nbrerrs = None, None, None

            else:
                times, mags, errs = ftimes, fmags, ferrs

            if times.size > 999:

                #
                # now we have times, mags, errs (and nbrtimes, nbrmags, nbrerrs)
                #
                available_pfmethods = []
                available_pgrams = []
                available_bestperiods = []

                for k in pf[mcol].keys():

                    if k in PFMETHODS:

                        available_pgrams.append(pf[mcol][k])

                        if k != 'win':
                            available_pfmethods.append(pf[mcol][k]['method'])
                            available_bestperiods.append(
                                pf[mcol][k]['bestperiod'])

                #
                # process periodic features for this magcol
                #
                featkey = 'periodicfeatures-%s' % mcol
                resultdict[featkey] = {}

                # first, handle the periodogram features
                pgramfeat = periodicfeatures.periodogram_features(
                    available_pgrams,
                    times,
                    mags,
                    errs,
                    sigclip=sigclip,
                    pdiff_threshold=pdiff_threshold,
                    sidereal_threshold=sidereal_threshold,
                    sampling_peak_multiplier=sampling_peak_multiplier,
                    sampling_startp=sampling_startp,
                    sampling_endp=sampling_endp,
                    verbose=verbose)
                resultdict[featkey].update(pgramfeat)

                resultdict[featkey]['pfmethods'] = available_pfmethods

                # then for each bestperiod, get phasedlc and lcfit features
                for _ind, pfm, bp in zip(range(len(available_bestperiods)),
                                         available_pfmethods,
                                         available_bestperiods):

                    resultdict[featkey][pfm] = periodicfeatures.lcfit_features(
                        times,
                        mags,
                        errs,
                        bp,
                        fourierorder=fourierorder,
                        transitparams=transitparams,
                        ebparams=ebparams,
                        sigclip=sigclip,
                        magsarefluxes=magsarefluxes,
                        verbose=verbose)

                    phasedlcfeat = periodicfeatures.phasedlc_features(
                        times,
                        mags,
                        errs,
                        bp,
                        nbrtimes=nbrtimes,
                        nbrmags=nbrmags,
                        nbrerrs=nbrerrs)

                    resultdict[featkey][pfm].update(phasedlcfeat)

            else:

                LOGERROR('not enough finite measurements in magcol: %s, for '
                         'pfpickle: %s, skipping this magcol' %
                         (mcol, pfpickle))
                featkey = 'periodicfeatures-%s' % mcol
                resultdict[featkey] = None

        #
        # end of per magcol processing
        #
        # write resultdict to pickle
        outfile = os.path.join(
            outdir,
            'periodicfeatures-%s.pkl' % squeeze(objectid).replace(' ', '-'))
        with open(outfile, 'wb') as outfd:
            pickle.dump(resultdict, outfd, pickle.HIGHEST_PROTOCOL)

        return outfile

    except Exception:

        LOGEXCEPTION('failed to run for pf: %s, lcfile: %s' %
                     (pfpickle, lcfile))
        if raiseonfail:
            raise
        else:
            return None
Exemplo n.º 19
0
 def get(self):
     artists = [squeeze(artist).replace(' ', '+') for artist in self.get_arguments('artist') if artist]
     get_profile_responses = yield [get_artist_profile(artist) for artist in artists]
     decoded_response_bodies = [json_decode(response.body) for response in get_profile_responses]
     artist_profiles = format_artist_profiles(decoded_response_bodies)
     self.render('mainstream.html', snark=(how_mainstream(artist_profiles)))
Exemplo n.º 20
0
    def post(self):
        '''This handles the POST to /admin/email and
        updates the site-settings.json file on disk.

        '''
        if not self.current_user:
            self.redirect('/')

        if ((not self.keycheck['status'] == 'ok')
                or (not self.xsrf_type == 'session')):

            self.set_status(403)
            retdict = {
                'status':
                'failed',
                'result':
                None,
                'message': ("Sorry, you don't have access. "
                            "API keys are not allowed for this endpoint.")
            }
            self.write(retdict)
            raise tornado.web.Finish()

        # get the current user
        current_user = self.current_user

        # only allow in superuser roles
        if current_user and current_user['user_role'] == 'superuser':

            try:

                # get the form inputs
                loginval = xhtml_escape(
                    self.get_argument('loginradio')).strip().lower()
                signupval = xhtml_escape(
                    self.get_argument('signupradio')).strip().lower()
                emailsender = xhtml_escape(
                    self.get_argument('emailsender')).strip()
                emailserver = xhtml_escape(
                    self.get_argument('emailserver')).strip().lower()
                emailport = abs(
                    int(xhtml_escape(self.get_argument('emailport')).strip()))
                emailuser = xhtml_escape(
                    self.get_argument('emailuser')).strip()
                emailpass = self.get_argument('emailpass').strip()

                if loginval == 'login-allowed':
                    loginval = True
                elif loginval == 'login-disallowed':
                    loginval = False
                else:
                    loginval = False

                if signupval == 'signup-allowed':
                    signupval = True
                elif signupval == 'signup-disallowed':
                    signupval = False
                else:
                    signupval = False

                # make sure to check if the email settings are valid if signups
                # are enabled
                if signupval and (len(emailsender) == 0 or len(emailserver)
                                  == 0 or emailserver == 'smtp.emailserver.org'
                                  or emailport == 0 or len(emailuser) == 0
                                  or len(emailpass) == 0):

                    LOGGER.error('invalid items in the '
                                 'admin-email-update-form')

                    self.set_status(400)
                    retdict = {
                        'status':
                        'failed',
                        'result':
                        None,
                        'message': ("Invalid input in the "
                                    "email settings form. "
                                    "All fields are required "
                                    "if new user sign-ups are "
                                    "to be enabled.")
                    }
                    self.write(retdict)
                    raise tornado.web.Finish()

                # email addresses
                allowed_email_addrs = self.get_argument(
                    'allowedemailaddr', None)
                if allowed_email_addrs is not None:
                    allowed_email_addrs = squeeze(
                        xhtml_escape(allowed_email_addrs)).split(',')
                    allowed_email_addrs = [
                        x.strip() for x in allowed_email_addrs
                    ]
                else:
                    allowed_email_addrs = []

                # to update site-info.json
                updatedict = {
                    "logins_allowed": loginval,
                    "signups_allowed": signupval,
                    "allowed_user_emailaddr": allowed_email_addrs,
                }

                # to update email-settings file
                emailupdatedict = {
                    "email_sender": emailsender,
                    "email_server": emailserver,
                    "email_port": emailport,
                    "email_user": emailuser,
                    "email_pass": emailpass,
                }

                # update the siteinfo dict
                self.siteinfo.update(updatedict)

                # update the site-info.json file on disk
                siteinfojson = os.path.join(self.basedir, 'site-info.json')
                with open(siteinfojson, 'r') as infd:
                    siteinfo_disk = json.load(infd)

                # update site-info.json only with values of logins-allowed,
                # signups-allowed
                siteinfo_disk.update(updatedict)

                LOGGER.warning(
                    'updating site-info.json from admin-email-update-form')

                with open(siteinfojson, 'w') as outfd:
                    json.dump(siteinfo_disk, outfd, indent=4)

                # update the email-settings in the siteinfo dict and the
                # email-settings file next
                self.siteinfo.update(emailupdatedict)

                email_settings_file = os.path.join(
                    self.basedir, self.siteinfo['email_settings_file'])

                if not os.path.exists(email_settings_file):

                    LOGGER.error('no email settings file found '
                                 'at expected path indicated '
                                 'in site-info.json. Making a new one...')

                    with open(email_settings_file, 'w') as outfd:
                        json.dump(emailupdatedict, outfd, indent=4)

                else:

                    # make sure we can write to the email settings file
                    os.chmod(email_settings_file, 0o100600)

                    with open(email_settings_file, 'r') as infd:
                        emailsettings_disk = json.load(infd)

                    emailsettings_disk.update(emailupdatedict)

                    LOGGER.warning('updating email settings file '
                                   'from admin-email-update-form')

                    with open(email_settings_file, 'w') as outfd:
                        json.dump(emailsettings_disk, outfd, indent=4)

                # set email settings file permissions back to readonly
                os.chmod(email_settings_file, 0o100400)

                updatedict.update(emailupdatedict)

                returndict = {
                    'status':
                    'ok',
                    'message': ('Email and user sign-up/sign-in '
                                'settings successfully updated.'),
                    'result':
                    updatedict
                }

                self.write(returndict)
                self.finish()

            except Exception as e:

                LOGGER.exception('failed to update site-info.json')

                self.set_status(400)
                retdict = {
                    'status':
                    'failed',
                    'result':
                    None,
                    'message': ("Invalid input provided for "
                                "email-settings form.")
                }
                self.write(retdict)
                raise tornado.web.Finish()

        # anything else is probably the locked user, turn them away
        else:
            self.set_status(403)
            retdict = {
                'status':
                'failed',
                'result':
                None,
                'message': ("Sorry, you don't have access. "
                            "API keys are not allowed for this endpoint.")
            }
            self.write(retdict)
            raise tornado.web.Finish()
Exemplo n.º 21
0
def apply_epd_magseries(lcfile,
                        timecol,
                        magcol,
                        errcol,
                        externalparams,
                        lcformat='hat-sql',
                        lcformatdir=None,
                        epdsmooth_sigclip=3.0,
                        epdsmooth_windowsize=21,
                        epdsmooth_func=smooth_magseries_savgol,
                        epdsmooth_extraparams=None):
    '''This applies external parameter decorrelation (EPD) to a light curve.

    Parameters
    ----------

    lcfile : str
        The filename of the light curve file to process.

    timecol,magcol,errcol : str
        The keys in the lcdict produced by your light curve reader function that
        correspond to the times, mags/fluxes, and associated measurement errors
        that will be used as input to the EPD process.

    externalparams : dict or None
        This is a dict that indicates which keys in the lcdict obtained from the
        lcfile correspond to the required external parameters. As with timecol,
        magcol, and errcol, these can be simple keys (e.g. 'rjd') or compound
        keys ('magaperture1.mags'). The dict should look something like::

          {'fsv':'<lcdict key>' array: S values for each observation,
           'fdv':'<lcdict key>' array: D values for each observation,
           'fkv':'<lcdict key>' array: K values for each observation,
           'xcc':'<lcdict key>' array: x coords for each observation,
           'ycc':'<lcdict key>' array: y coords for each observation,
           'bgv':'<lcdict key>' array: sky background for each observation,
           'bge':'<lcdict key>' array: sky background err for each observation,
           'iha':'<lcdict key>' array: hour angle for each observation,
           'izd':'<lcdict key>' array: zenith distance for each observation}

        Alternatively, if these exact keys are already present in the lcdict,
        indicate this by setting externalparams to None.

    lcformat : str
        This is the `formatkey` associated with your light curve format, which
        you previously passed in to the `lcproc.register_lcformat`
        function. This will be used to look up how to find and read the light
        curves specified in `basedir` or `use_list_of_filenames`.

    lcformatdir : str or None
        If this is provided, gives the path to a directory when you've stored
        your lcformat description JSONs, other than the usual directories lcproc
        knows to search for them in. Use this along with `lcformat` to specify
        an LC format JSON file that's not currently registered with lcproc.

    epdsmooth_sigclip : float or int or sequence of two floats/ints or None
        This specifies how to sigma-clip the input LC before fitting the EPD
        function to it.

        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    epdsmooth_windowsize : int
        This is the number of LC points to smooth over to generate a smoothed
        light curve that will be used to fit the EPD function.

    epdsmooth_func : Python function
        This sets the smoothing filter function to use. A Savitsky-Golay filter
        is used to smooth the light curve by default. The functions that can be
        used with this kwarg are listed in `varbase.trends`. If you want to use
        your own function, it MUST have the following signature::

                def smoothfunc(mags_array, window_size, **extraparams)

        and return a numpy array of the same size as `mags_array` with the
        smoothed time-series. Any extra params can be provided using the
        `extraparams` dict.

    epdsmooth_extraparams : dict
        This is a dict of any extra filter params to supply to the smoothing
        function.

    Returns
    -------

    str
        Writes the output EPD light curve to a pickle that contains the lcdict
        with an added `lcdict['epd']` key, which contains the EPD times,
        mags/fluxes, and errs as `lcdict['epd']['times']`,
        `lcdict['epd']['mags']`, and `lcdict['epd']['errs']`. Returns the
        filename of this generated EPD LC pickle file.

    Notes
    -----

    - S -> measure of PSF sharpness (~1/sigma^2 sosmaller S = wider PSF)
    - D -> measure of PSF ellipticity in xy direction
    - K -> measure of PSF ellipticity in cross direction

    S, D, K are related to the PSF's variance and covariance, see eqn 30-33 in
    A. Pal's thesis: https://arxiv.org/abs/0906.3486

    '''
    try:
        formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir)
        if formatinfo:
            (dfileglob, readerfunc, dtimecols, dmagcols, derrcols,
             magsarefluxes, normfunc) = formatinfo
        else:
            LOGERROR("can't figure out the light curve format")
            return None
    except Exception as e:
        LOGEXCEPTION("can't figure out the light curve format")
        return None

    lcdict = readerfunc(lcfile)
    if ((isinstance(lcdict, (tuple, list))) and isinstance(lcdict[0], dict)):
        lcdict = lcdict[0]

    objectid = lcdict['objectid']
    times, mags, errs = lcdict[timecol], lcdict[magcol], lcdict[errcol]

    if externalparams is not None:

        fsv = lcdict[externalparams['fsv']]
        fdv = lcdict[externalparams['fdv']]
        fkv = lcdict[externalparams['fkv']]

        xcc = lcdict[externalparams['xcc']]
        ycc = lcdict[externalparams['ycc']]

        bgv = lcdict[externalparams['bgv']]
        bge = lcdict[externalparams['bge']]

        iha = lcdict[externalparams['iha']]
        izd = lcdict[externalparams['izd']]

    else:

        fsv = lcdict['fsv']
        fdv = lcdict['fdv']
        fkv = lcdict['fkv']

        xcc = lcdict['xcc']
        ycc = lcdict['ycc']

        bgv = lcdict['bgv']
        bge = lcdict['bge']

        iha = lcdict['iha']
        izd = lcdict['izd']

    # apply the corrections for EPD
    epd = epd_magseries(times,
                        mags,
                        errs,
                        fsv,
                        fdv,
                        fkv,
                        xcc,
                        ycc,
                        bgv,
                        bge,
                        iha,
                        izd,
                        magsarefluxes=magsarefluxes,
                        epdsmooth_sigclip=epdsmooth_sigclip,
                        epdsmooth_windowsize=epdsmooth_windowsize,
                        epdsmooth_func=epdsmooth_func,
                        epdsmooth_extraparams=epdsmooth_extraparams)

    # save the EPD magseries to a pickle LC
    lcdict['epd'] = epd
    outfile = os.path.join(
        os.path.dirname(lcfile),
        '%s-epd-%s-pklc.pkl' % (squeeze(objectid).replace(' ', '-'), magcol))
    with open(outfile, 'wb') as outfd:
        pickle.dump(lcdict, outfd, protocol=pickle.HIGHEST_PROTOCOL)

    return outfile
Exemplo n.º 22
0
def runpf(lcfile,
          outdir,
          timecols=None,
          magcols=None,
          errcols=None,
          lcformat='hat-sql',
          lcformatdir=None,
          pfmethods=('gls','pdm','mav','win'),
          pfkwargs=({},{},{},{}),
          sigclip=10.0,
          getblssnr=False,
          nworkers=NCPUS,
          minobservations=500,
          excludeprocessed=False,
          raiseonfail=False):
    '''This runs the period-finding for a single LC.

    Parameters
    ----------

    lcfile : str
        The light curve file to run period-finding on.

    outdir : str
        The output directory where the result pickle will go.

    timecols : list of str or None
        The timecol keys to use from the lcdict in calculating the features.

    magcols : list of str or None
        The magcol keys to use from the lcdict in calculating the features.

    errcols : list of str or None
        The errcol keys to use from the lcdict in calculating the features.

    lcformat : str
        This is the `formatkey` associated with your light curve format, which
        you previously passed in to the `lcproc.register_lcformat`
        function. This will be used to look up how to find and read the light
        curves specified in `basedir` or `use_list_of_filenames`.

    lcformatdir : str or None
        If this is provided, gives the path to a directory when you've stored
        your lcformat description JSONs, other than the usual directories lcproc
        knows to search for them in. Use this along with `lcformat` to specify
        an LC format JSON file that's not currently registered with lcproc.

    pfmethods : list of str
        This is a list of period finding methods to run. Each element is a
        string matching the keys of the `PFMETHODS` dict above. By default, this
        runs GLS, PDM, AoVMH, and the spectral window Lomb-Scargle periodogram.

    pfkwargs : list of dicts
        This is used to provide any special kwargs as dicts to each
        period-finding method function specified in `pfmethods`.

    sigclip : float or int or sequence of two floats/ints or None
        If a single float or int, a symmetric sigma-clip will be performed using
        the number provided as the sigma-multiplier to cut out from the input
        time-series.

        If a list of two ints/floats is provided, the function will perform an
        'asymmetric' sigma-clip. The first element in this list is the sigma
        value to use for fainter flux/mag values; the second element in this
        list is the sigma value to use for brighter flux/mag values. For
        example, `sigclip=[10., 3.]`, will sigclip out greater than 10-sigma
        dimmings and greater than 3-sigma brightenings. Here the meaning of
        "dimming" and "brightening" is set by *physics* (not the magnitude
        system), which is why the `magsarefluxes` kwarg must be correctly set.

        If `sigclip` is None, no sigma-clipping will be performed, and the
        time-series (with non-finite elems removed) will be passed through to
        the output.

    getblssnr : bool
        If this is True and BLS is one of the methods specified in `pfmethods`,
        will also calculate the stats for each best period in the BLS results:
        transit depth, duration, ingress duration, refit period and epoch, and
        the SNR of the transit.

    nworkers : int
        The number of parallel period-finding workers to launch.

    minobservations : int
        The minimum number of finite LC points required to process a light
        curve.

    excludeprocessed : bool
        If this is True, light curves that have existing period-finding result
        pickles in `outdir` will not be processed.

        FIXME: currently, this uses a dumb method of excluding already-processed
        files. A smarter way to do this is to (i) generate a SHA512 cachekey
        based on a repr of `{'lcfile', 'timecols', 'magcols', 'errcols',
        'lcformat', 'pfmethods', 'sigclip', 'getblssnr', 'pfkwargs'}`, (ii) make
        sure all list kwargs in the dict are sorted, (iii) check if the output
        file has the same cachekey in its filename (last 8 chars of cachekey
        should work), so the result was processed in exactly the same way as
        specifed in the input to this function, and can therefore be
        ignored. Will implement this later.

    raiseonfail : bool
        If something fails and this is True, will raise an Exception instead of
        returning None at the end.

    Returns
    -------

    str
        The path to the output period-finding result pickle.

    '''

    try:
        formatinfo = get_lcformat(lcformat,
                                  use_lcformat_dir=lcformatdir)
        if formatinfo:
            (dfileglob, readerfunc,
             dtimecols, dmagcols, derrcols,
             magsarefluxes, normfunc) = formatinfo
        else:
            LOGERROR("can't figure out the light curve format")
            return None
    except Exception as e:
        LOGEXCEPTION("can't figure out the light curve format")
        return None

    # override the default timecols, magcols, and errcols
    # using the ones provided to the function
    if timecols is None:
        timecols = dtimecols
    if magcols is None:
        magcols = dmagcols
    if errcols is None:
        errcols = derrcols

    try:

        # get the LC into a dict
        lcdict = readerfunc(lcfile)

        # this should handle lists/tuples being returned by readerfunc
        # we assume that the first element is the actual lcdict
        # FIXME: figure out how to not need this assumption
        if ( (isinstance(lcdict, (list, tuple))) and
             (isinstance(lcdict[0], dict)) ):
            lcdict = lcdict[0]

        outfile = os.path.join(outdir, 'periodfinding-%s.pkl' %
                               squeeze(lcdict['objectid']).replace(' ', '-'))

        # if excludeprocessed is True, return the output file if it exists and
        # has a size that is at least 100 kilobytes (this should be enough to
        # contain the minimal results of this function).
        if excludeprocessed:

            test_outfile = os.path.exists(outfile)
            test_outfile_gz = os.path.exists(outfile+'.gz')

            if (test_outfile and os.stat(outfile).st_size > 102400):

                LOGWARNING('periodfinding result for %s already exists at %s, '
                           'skipping because excludeprocessed=True'
                           % (lcfile, outfile))
                return outfile

            elif (test_outfile_gz and os.stat(outfile+'.gz').st_size > 102400):

                LOGWARNING(
                    'gzipped periodfinding result for %s already '
                    'exists at %s, skipping because excludeprocessed=True'
                    % (lcfile, outfile+'.gz')
                )
                return outfile+'.gz'


        # this is the final returndict
        resultdict = {
            'objectid':lcdict['objectid'],
            'lcfbasename':os.path.basename(lcfile),
            'kwargs':{'timecols':timecols,
                      'magcols':magcols,
                      'errcols':errcols,
                      'lcformat':lcformat,
                      'lcformatdir':lcformatdir,
                      'pfmethods':pfmethods,
                      'pfkwargs':pfkwargs,
                      'sigclip':sigclip,
                      'getblssnr':getblssnr}
        }

        # normalize using the special function if specified
        if normfunc is not None:
            lcdict = normfunc(lcdict)

        for tcol, mcol, ecol in zip(timecols, magcols, errcols):

            # dereference the columns and get them from the lcdict
            if '.' in tcol:
                tcolget = tcol.split('.')
            else:
                tcolget = [tcol]
            times = _dict_get(lcdict, tcolget)

            if '.' in mcol:
                mcolget = mcol.split('.')
            else:
                mcolget = [mcol]
            mags = _dict_get(lcdict, mcolget)

            if '.' in ecol:
                ecolget = ecol.split('.')
            else:
                ecolget = [ecol]
            errs = _dict_get(lcdict, ecolget)


            # normalize here if not using special normalization
            if normfunc is None:
                ntimes, nmags = normalize_magseries(
                    times, mags,
                    magsarefluxes=magsarefluxes
                )

                times, mags, errs = ntimes, nmags, errs


            # run each of the requested period-finder functions
            resultdict[mcol] = {}

            # check if we have enough non-nan observations to proceed
            finmags = mags[np.isfinite(mags)]

            if finmags.size < minobservations:

                LOGERROR('not enough non-nan observations for '
                         'this LC. have: %s, required: %s, '
                         'magcol: %s, skipping...' %
                         (finmags.size, minobservations, mcol))
                continue

            pfmkeys = []

            for pfmind, pfm, pfkw in zip(range(len(pfmethods)),
                                         pfmethods,
                                         pfkwargs):

                pf_func = PFMETHODS[pfm]

                # get any optional kwargs for this function
                pf_kwargs = pfkw
                pf_kwargs.update({'verbose':False,
                                  'nworkers':nworkers,
                                  'magsarefluxes':magsarefluxes,
                                  'sigclip':sigclip})

                # we'll always prefix things with their index to allow multiple
                # invocations and results from the same period-finder (for
                # different period ranges, for example).
                pfmkey = '%s-%s' % (pfmind, pfm)
                pfmkeys.append(pfmkey)

                # run this period-finder and save its results to the output dict
                resultdict[mcol][pfmkey] = pf_func(
                    times, mags, errs,
                    **pf_kwargs
                )


            #
            # done with running the period finders
            #
            # append the pfmkeys list to the magcol dict
            resultdict[mcol]['pfmethods'] = pfmkeys

            # check if we need to get the SNR from any BLS pfresults
            if 'bls' in pfmethods and getblssnr:

                # we need to scan thru the pfmethods to get to any BLS pfresults
                for pfmk in resultdict[mcol]['pfmethods']:

                    if 'bls' in pfmk:

                        try:

                            bls = resultdict[mcol][pfmk]

                            # calculate the SNR for the BLS as well
                            blssnr = bls_snr(bls, times, mags, errs,
                                             magsarefluxes=magsarefluxes,
                                             verbose=False)

                            # add the SNR results to the BLS result dict
                            resultdict[mcol][pfmk].update({
                                'snr':blssnr['snr'],
                                'transitdepth':blssnr['transitdepth'],
                                'transitduration':blssnr['transitduration'],
                            })

                            # update the BLS result dict with the refit periods
                            # and epochs using the results from bls_snr
                            resultdict[mcol][pfmk].update({
                                'nbestperiods':blssnr['period'],
                                'epochs':blssnr['epoch']
                            })

                        except Exception as e:

                            LOGEXCEPTION('could not calculate BLS SNR for %s' %
                                         lcfile)
                            # add the SNR null results to the BLS result dict
                            resultdict[mcol][pfmk].update({
                                'snr':[np.nan,np.nan,np.nan,np.nan,np.nan],
                                'transitdepth':[np.nan,np.nan,np.nan,
                                                np.nan,np.nan],
                                'transitduration':[np.nan,np.nan,np.nan,
                                                   np.nan,np.nan],
                            })

            elif 'bls' in pfmethods:

                # we need to scan thru the pfmethods to get to any BLS pfresults
                for pfmk in resultdict[mcol]['pfmethods']:

                    if 'bls' in pfmk:

                        # add the SNR null results to the BLS result dict
                        resultdict[mcol][pfmk].update({
                            'snr':[np.nan,np.nan,np.nan,np.nan,np.nan],
                            'transitdepth':[np.nan,np.nan,np.nan,
                                            np.nan,np.nan],
                            'transitduration':[np.nan,np.nan,np.nan,
                                               np.nan,np.nan],
                        })


        # once all mag cols have been processed, write out the pickle
        with open(outfile, 'wb') as outfd:
            pickle.dump(resultdict, outfd, protocol=pickle.HIGHEST_PROTOCOL)

        return outfile

    except Exception as e:

        LOGEXCEPTION('failed to run for %s, because: %s' % (lcfile, e))

        if raiseonfail:
            raise

        return None
Exemplo n.º 23
0
 def test_squeeze(self):
     self.assertEqual(squeeze(u('sequences     of    whitespace   chars'))
         , u('sequences of whitespace chars'))
Exemplo n.º 24
0
    def post(self):
        '''This handles the POST request to /users/new.

        '''
        if not self.current_user:
            self.redirect('/')

        if ((not self.keycheck['status'] == 'ok')
                or (not self.xsrf_type == 'session')):

            self.set_status(403)
            retdict = {
                'status':
                'failed',
                'result':
                None,
                'message': ("Sorry, you don't have access. "
                            "API keys are not allowed for this endpoint.")
            }
            self.write(retdict)
            raise tornado.web.Finish()

        current_user = self.current_user

        # get the provided email and password
        try:

            username = xhtml_escape(self.get_argument('username'))
            email = xhtml_escape(self.get_argument('email'))
            password = self.get_argument('password')

        except Exception as e:

            LOGGER.error('username, email, and password are all required.')
            self.save_flash_messages(
                "A user name, email address, and "
                "strong password are all required.", "warning")
            self.redirect('/users/new')

        # check if this email address is allowed to sign up for an account
        if ('allowed_user_emailaddr' in self.siteinfo
                and len(self.siteinfo['allowed_user_emailaddr']) > 0):

            if (squeeze(email.lower().strip())
                    not in self.siteinfo['allowed_user_emailaddr']):

                LOGGER.error("Email: %s is not allowed to sign up." % email)

                self.save_flash_messages(
                    "Sorry, the email address you entered wasn't found in "
                    "the list of people allowed to "
                    "sign up for an account here.", "danger")
                self.redirect('/users/new')
                raise tornado.web.Finish()

        # talk to the authnzerver to sign this user up
        ok, resp, msgs = yield self.authnzerver_request(
            'user-new', {
                'session_token': current_user['session_token'],
                'username': username,
                'email': squeeze(email.lower().strip()),
                'password': password
            })

        # FIXME: don't generate a new sesion token here yet
        # # generate a new anon session token in any case
        # new_session = yield self.new_session_token(
        #     user_id=2,
        #     expires_days=self.session_expiry,
        # )

        # if the sign up request is successful, send the email
        if ok:

            #
            # send the background request to authnzerver to send an email
            #

            # get the email info from site-info.json
            smtp_sender = self.siteinfo['email_sender']
            smtp_user = self.siteinfo['email_user']
            smtp_pass = self.siteinfo['email_pass']
            smtp_server = self.siteinfo['email_server']
            smtp_port = self.siteinfo['email_port']

            # generate a fernet verification token that is timestamped. we'll
            # give it 15 minutes to expire and decrypt it using:
            # self.ferneter.decrypt(token, ttl=15*60)
            fernet_verification_token = self.ferneter.encrypt(
                secrets.token_urlsafe(32).encode())

            # get this server's base URL
            if self.request.headers.get('X-Real-Host'):
                server_baseurl = '%s://%s' % (
                    self.request.headers.get('X-Forwarded-Proto'),
                    self.request.headers.get('X-Real-Host'))
            else:
                server_baseurl = '%s://%s' % (self.request.protocol,
                                              self.request.host)

            ok, resp, msgs = yield self.authnzerver_request(
                'user-signup-email', {
                    'email_address': email,
                    'server_baseurl': server_baseurl,
                    'server_id': 'HSC viz-inspect',
                    'session_token': current_user['session_token'],
                    'smtp_server': smtp_server,
                    'smtp_sender': smtp_sender,
                    'smtp_user': smtp_user,
                    'smtp_pass': smtp_pass,
                    'smtp_server': smtp_server,
                    'smtp_port': smtp_port,
                    'fernet_verification_token': fernet_verification_token,
                    'created_info': resp
                })

            if ok:

                self.save_flash_messages(
                    "Thanks for signing up! We've sent a verification "
                    "request to your email address. "
                    "Please complete user registration by "
                    "entering the code you received.", "primary")
                self.redirect('/users/verify')

            # FIXME: if the backend breaks here, the user is left in limbo
            # what to do?
            else:

                LOGGER.error('failed to send an email. %r' % msgs)
                self.save_flash_messages(msgs, 'warning')
                self.redirect('/users/new')

        # if the sign up request fails, tell the user what happened
        else:
            LOGGER.error("Could not complete sign up request: %r" % msgs)
            self.save_flash_messages(" ".join(msgs), "danger")
            self.redirect('/users/new')
Exemplo n.º 25
0
def get_arxiv_articles(paperlinks, paperdata, crosslinks, crossdata):

    paperdict = {}
    crossdict = {}

    for ind, link, data in zip(range(len(paperlinks)), paperlinks, paperdata):

        paper_abstract = squeeze(data.p.text.replace('\n', ' ').strip())
        paper_title = squeeze(
            data.find_all('div',
                          class_='list-title')[0].text.strip('\n').replace(
                              'Title:', '', 1))

        paper_authors = (data.find_all(
            'div', class_='list-authors')[0].text.strip('\n').replace(
                'Authors:', '', 1))
        paper_authors = [
            squeeze(x.lstrip('\n').rstrip('\n'))
            for x in paper_authors.split(', ')
        ]

        paper_links = link.find_all('a')[1:3]
        paper_link, arxiv_id = paper_links[0]['href'], paper_links[0].text
        paper_pdf = paper_links[1]['href']

        try:
            comment_contents = data.find('div',
                                         class_='list-comments').contents[2:]
            paper_comments = squeeze(' '.join([
                str(x).lstrip('\n').rstrip('\n') for x in comment_contents
            ]).strip())

            # handle internal arxiv links correctly
            if '<a href="/abs' in paper_comments:
                paper_comments = paper_comments.replace(
                    '/abs', 'https://arxiv.org/abs')

        except AttributeError:
            paper_comments = ''

        paperdict[ind + 1] = {
            'authors': paper_authors,
            'title': paper_title,
            'abstract': paper_abstract,
            'comments': paper_comments,
            'arxiv': arxiv_id,
            'link': paper_link,
            'pdf': paper_pdf
        }

    for ind, link, data in zip(range(len(crosslinks)), crosslinks, crossdata):

        cross_abstract = squeeze(data.p.text.replace('\n', ' ').strip())
        cross_title = squeeze(
            data.find_all('div',
                          class_='list-title')[0].text.strip('\n').replace(
                              'Title:', '', 1))

        cross_authors = (data.find_all(
            'div', class_='list-authors')[0].text.strip('\n').replace(
                'Authors:', '', 1))
        cross_authors = [
            squeeze(x.lstrip('\n').rstrip('\n'))
            for x in cross_authors.split(', ')
        ]

        cross_links = link.find_all('a')[1:3]
        cross_link, arxiv_id = cross_links[0]['href'], cross_links[0].text
        cross_pdf = cross_links[1]['href']

        # figure out which original arxiv this came from
        try:
            cltext = link.text
            cltext_xlind_start = cltext.index('cross-list')
            cltext_xlind_end = cltext.index('[pdf') - 2

            # annotate the title with the original arxiv category
            cltext = cltext[cltext_xlind_start:cltext_xlind_end]
            cross_title = u'[%s] %s' % (cltext, cross_title)

        # if the cross-list doesn't say where it came from, just add a
        # [cross-list] annotation
        except:
            cross_title = u'[cross-list] %s' % cross_title

        try:
            comment_contents = data.find('div',
                                         class_='list-comments').contents[2:]
            cross_comments = squeeze(' '.join([
                str(x).lstrip('\n').rstrip('\n') for x in comment_contents
            ]).strip())

            # handle internal arxiv links correctly
            if '<a href="/abs' in paper_comments:
                paper_comments = paper_comments.replace(
                    '/abs', 'https://arxiv.org/abs')

        except AttributeError:
            cross_comments = ''

        crossdict[ind + 1] = {
            'authors': cross_authors,
            'title': cross_title,
            'abstract': cross_abstract,
            'comments': cross_comments,
            'arxiv': arxiv_id,
            'link': cross_link,
            'pdf': cross_pdf
        }

    return paperdict, crossdict
Exemplo n.º 26
0
def get_varfeatures(lcfile,
                    outdir,
                    timecols=None,
                    magcols=None,
                    errcols=None,
                    mindet=1000,
                    lcformat='hat-sql',
                    lcformatdir=None):
    '''This runs :py:func:`astrobase.varclass.varfeatures.all_nonperiodic_features`
    on a single LC file.

    Parameters
    ----------

    lcfile : str
        The input light curve to process.

    outfile : str
        The filename of the output variable features pickle that will be
        generated.

    timecols : list of str or None
        The timecol keys to use from the lcdict in calculating the features.

    magcols : list of str or None
        The magcol keys to use from the lcdict in calculating the features.

    errcols : list of str or None
        The errcol keys to use from the lcdict in calculating the features.

    mindet : int
        The minimum number of LC points required to generate variability
        features.

    lcformat : str
        This is the `formatkey` associated with your light curve format, which
        you previously passed in to the `lcproc.register_lcformat`
        function. This will be used to look up how to find and read the light
        curves specified in `basedir` or `use_list_of_filenames`.

    lcformatdir : str or None
        If this is provided, gives the path to a directory when you've stored
        your lcformat description JSONs, other than the usual directories lcproc
        knows to search for them in. Use this along with `lcformat` to specify
        an LC format JSON file that's not currently registered with lcproc.

    Returns
    -------

    str
        The generated variability features pickle for the input LC, with results
        for each magcol in the input `magcol` or light curve format's default
        `magcol` list.

    '''

    try:
        formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir)
        if formatinfo:
            (dfileglob, readerfunc, dtimecols, dmagcols, derrcols,
             magsarefluxes, normfunc) = formatinfo
        else:
            LOGERROR("can't figure out the light curve format")
            return None
    except Exception:
        LOGEXCEPTION("can't figure out the light curve format")
        return None

    # override the default timecols, magcols, and errcols
    # using the ones provided to the function
    if timecols is None:
        timecols = dtimecols
    if magcols is None:
        magcols = dmagcols
    if errcols is None:
        errcols = derrcols

    try:

        # get the LC into a dict
        lcdict = readerfunc(lcfile)

        # this should handle lists/tuples being returned by readerfunc
        # we assume that the first element is the actual lcdict
        # FIXME: figure out how to not need this assumption
        if ((isinstance(lcdict, (list, tuple)))
                and (isinstance(lcdict[0], dict))):
            lcdict = lcdict[0]

        resultdict = {
            'objectid': lcdict['objectid'],
            'info': lcdict['objectinfo'],
            'lcfbasename': os.path.basename(lcfile)
        }

        # normalize using the special function if specified
        if normfunc is not None:
            lcdict = normfunc(lcdict)

        for tcol, mcol, ecol in zip(timecols, magcols, errcols):

            # dereference the columns and get them from the lcdict
            if '.' in tcol:
                tcolget = tcol.split('.')
            else:
                tcolget = [tcol]
            times = _dict_get(lcdict, tcolget)

            if '.' in mcol:
                mcolget = mcol.split('.')
            else:
                mcolget = [mcol]
            mags = _dict_get(lcdict, mcolget)

            if '.' in ecol:
                ecolget = ecol.split('.')
            else:
                ecolget = [ecol]
            errs = _dict_get(lcdict, ecolget)

            # normalize here if not using special normalization
            if normfunc is None:
                ntimes, nmags = normalize_magseries(
                    times, mags, magsarefluxes=magsarefluxes)

                times, mags, errs = ntimes, nmags, errs

            # make sure we have finite values
            finind = np.isfinite(times) & np.isfinite(mags) & np.isfinite(errs)

            # make sure we have enough finite values
            if mags[finind].size < mindet:

                LOGINFO('not enough LC points: %s in normalized %s LC: %s' %
                        (mags[finind].size, mcol, os.path.basename(lcfile)))
                resultdict[mcol] = None

            else:

                # get the features for this magcol
                lcfeatures = varfeatures.all_nonperiodic_features(
                    times, mags, errs)
                resultdict[mcol] = lcfeatures

        # now that we've collected all the magcols, we can choose which is the
        # "best" magcol. this is defined as the magcol that gives us the
        # smallest LC MAD.

        try:
            magmads = np.zeros(len(magcols))
            for mind, mcol in enumerate(magcols):
                if '.' in mcol:
                    mcolget = mcol.split('.')
                else:
                    mcolget = [mcol]

                magmads[mind] = resultdict[mcol]['mad']

            # smallest MAD index
            bestmagcolind = np.where(magmads == np.min(magmads))[0]
            resultdict['bestmagcol'] = magcols[bestmagcolind]

        except Exception:
            resultdict['bestmagcol'] = None

        outfile = os.path.join(
            outdir, 'varfeatures-%s.pkl' %
            squeeze(resultdict['objectid']).replace(' ', '-'))

        with open(outfile, 'wb') as outfd:
            pickle.dump(resultdict, outfd, protocol=4)

        return outfile

    except Exception as e:

        LOGEXCEPTION('failed to get LC features for %s because: %s' %
                     (os.path.basename(lcfile), e))
        return None
Exemplo n.º 27
0
def validate_input_password(email,
                            password,
                            min_length=12,
                            max_match_threshold=50):
    '''This validates user input passwords.

    1. must be at least min_length characters (we'll truncate the password at
       1024 characters since we don't want to store entire novels)
    2. must not match within max_match_threshold of their email
    3. must not match within max_match_threshold of the site's FQDN
    4. must not have a single case-folded character take up more than 20% of the
       length of the password
    5. must not be completely numeric

    '''

    messages = []

    # we'll ignore any repeated white space and fail immediately if the password
    # is all white space
    if len(squeeze(password.strip())) < min_length:

        LOGGER.warning('password for new account: %s is too short' % email)
        messages.append('Your password is too short. '
                        'It must have at least %s characters.' % min_length)
        passlen_ok = False
    else:
        passlen_ok = True

    # check the fuzzy match against the FQDN and email address
    fqdn = socket.getfqdn()
    fqdn_match = UQRatio(password, fqdn)
    email_match = UQRatio(password, email)

    fqdn_ok = fqdn_match < max_match_threshold
    email_ok = email_match < max_match_threshold

    if not fqdn_ok or not email_ok:
        LOGGER.warning('password for new account: %s matches FQDN '
                       '(similarity: %s) or their email address '
                       '(similarity: %s)' % (email, fqdn_match, email_match))
        messages.append('Your password is too similar to either '
                        'the domain name of this LCC-Server or your '
                        'own email address.')

    # next, check if the password is complex enough
    histogram = {}
    for char in password:
        if char.lower() not in histogram:
            histogram[char.lower()] = 1
        else:
            histogram[char.lower()] = histogram[char.lower()] + 1

    hist_ok = True

    for h in histogram:
        if (histogram[h]/len(password)) > 0.2:
            hist_ok = False
            LOGGER.warning('one character is more than '
                           '0.2 x length of the password')
            messages.append(
                'Your password is not complex enough. '
                'One or more characters appear appear too frequently.'
            )
            break

    # check if the password is all numeric
    if password.isdigit():
        numeric_ok = False
        messages.append('Your password cannot be all numbers.')
    else:
        numeric_ok = True

    return (
        (passlen_ok and email_ok and fqdn_ok and hist_ok and numeric_ok),
        messages
    )
Exemplo n.º 28
0
 def test_squeeze(self):
     self.assertEqual(
         squeeze(u"sequences     of    whitespace   chars"),
         u"sequences of whitespace chars",
     )
Exemplo n.º 29
0
def timebinlc(lcfile,
              binsizesec,
              outdir=None,
              lcformat='hat-sql',
              lcformatdir=None,
              timecols=None,
              magcols=None,
              errcols=None,
              minbinelems=7):
    '''This bins the given light curve file in time using the specified bin size.

    Parameters
    ----------

    lcfile : str
        The file name to process.

    binsizesec : float
        The time bin-size in seconds.

    outdir : str or None
        If this is a str, the output LC will be written to `outdir`. If this is
        None, the output LC will be written to the same directory as `lcfile`.

    lcformat : str
        This is the `formatkey` associated with your light curve format, which
        you previously passed in to the `lcproc.register_lcformat`
        function. This will be used to look up how to find and read the light
        curve file.

    lcformatdir : str or None
        If this is provided, gives the path to a directory when you've stored
        your lcformat description JSONs, other than the usual directories lcproc
        knows to search for them in. Use this along with `lcformat` to specify
        an LC format JSON file that's not currently registered with lcproc.

    timecols,magcols,errcols : lists of str
        The keys in the lcdict produced by your light curve reader function that
        correspond to the times, mags/fluxes, and associated measurement errors
        that will be used as inputs to the binning process. If these are None,
        the default values for `timecols`, `magcols`, and `errcols` for your
        light curve format will be used here.

    minbinelems : int
        The minimum number of time-bin elements required to accept a time-bin as
        valid for the output binned light curve.

    Returns
    -------

    str
        The name of the output pickle file with the binned LC.

        Writes the output binned light curve to a pickle that contains the
        lcdict with an added `lcdict['binned'][magcol]` key, which contains the
        binned times, mags/fluxes, and errs as
        `lcdict['binned'][magcol]['times']`, `lcdict['binned'][magcol]['mags']`,
        and `lcdict['epd'][magcol]['errs']` for each `magcol` provided in the
        input or default `magcols` value for this light curve format.

    '''

    try:
        formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir)
        if formatinfo:
            (dfileglob, readerfunc, dtimecols, dmagcols, derrcols,
             magsarefluxes, normfunc) = formatinfo
        else:
            LOGERROR("can't figure out the light curve format")
            return None
    except Exception as e:
        LOGEXCEPTION("can't figure out the light curve format")
        return None

    # override the default timecols, magcols, and errcols
    # using the ones provided to the function
    if timecols is None:
        timecols = dtimecols
    if magcols is None:
        magcols = dmagcols
    if errcols is None:
        errcols = derrcols

    # get the LC into a dict
    lcdict = readerfunc(lcfile)

    # this should handle lists/tuples being returned by readerfunc
    # we assume that the first element is the actual lcdict
    # FIXME: figure out how to not need this assumption
    if ((isinstance(lcdict, (list, tuple))) and (isinstance(lcdict[0], dict))):
        lcdict = lcdict[0]

    # skip already binned light curves
    if 'binned' in lcdict:
        LOGERROR('this light curve appears to be binned already, skipping...')
        return None

    lcdict['binned'] = {}

    for tcol, mcol, ecol in zip(timecols, magcols, errcols):

        # dereference the columns and get them from the lcdict
        if '.' in tcol:
            tcolget = tcol.split('.')
        else:
            tcolget = [tcol]
        times = _dict_get(lcdict, tcolget)

        if '.' in mcol:
            mcolget = mcol.split('.')
        else:
            mcolget = [mcol]
        mags = _dict_get(lcdict, mcolget)

        if '.' in ecol:
            ecolget = ecol.split('.')
        else:
            ecolget = [ecol]
        errs = _dict_get(lcdict, ecolget)

        # normalize here if not using special normalization
        if normfunc is None:
            ntimes, nmags = normalize_magseries(times,
                                                mags,
                                                magsarefluxes=magsarefluxes)

            times, mags, errs = ntimes, nmags, errs

        # now bin the mag series as requested
        binned = time_bin_magseries_with_errs(times,
                                              mags,
                                              errs,
                                              binsize=binsizesec,
                                              minbinelems=minbinelems)

        # put this into the special binned key of the lcdict
        lcdict['binned'][mcol] = {
            'times': binned['binnedtimes'],
            'mags': binned['binnedmags'],
            'errs': binned['binnederrs'],
            'nbins': binned['nbins'],
            'timebins': binned['jdbins'],
            'binsizesec': binsizesec
        }

    # done with binning for all magcols, now generate the output file
    # this will always be a pickle

    if outdir is None:
        outdir = os.path.dirname(lcfile)

    outfile = os.path.join(
        outdir, '%s-binned%.1fsec-%s.pkl' %
        (squeeze(lcdict['objectid']).replace(' ', '-'), binsizesec, lcformat))

    with open(outfile, 'wb') as outfd:
        pickle.dump(lcdict, outfd, protocol=pickle.HIGHEST_PROTOCOL)

    return outfile
Exemplo n.º 30
0
def doc_render_worker(docpage,
                      basedir,
                      serverindex,
                      siteindex):
    '''This is a worker that renders Markdown to HTML markup.

    Works in a background Executor.

    serverindex and siteindex are the dicts containing server and site doc page
    titles and doc page names.

    '''

    # check for shady doc pages
    if '.' in docpage:
        return None, None
    if '/' in docpage:
        return None, None
    if len(docpage) != len(squeeze(docpage).strip().replace(' ','')):
        return None, None

    # find the doc page requested
    if docpage in serverindex:
        page_title = serverindex[docpage]
        doc_md_file = os.path.join(os.path.dirname(__file__),
                                   '..',
                                   'server-docs',
                                   '%s.md' % docpage)
    elif docpage in siteindex:
        page_title = siteindex[docpage]
        doc_md_file = os.path.join(basedir,'docs',
                                   '%s.md' % docpage)

    # if the doc page is not found in either index, then it doesn't exist
    else:
        return None, None

    # check for some more shenanigans
    if not os.path.exists(doc_md_file):
        return None, None

    doc_md_dir_abspath = os.path.dirname(os.path.abspath(doc_md_file))

    if docpage in serverindex:
        doc_dir_abspath = os.path.abspath(
            os.path.join(os.path.dirname(__file__),
                         '..',
                         'server-docs')
        )
    elif docpage in siteindex:
        doc_dir_abspath = os.path.abspath(os.path.join(basedir,'docs'))

    if (doc_md_dir_abspath != doc_dir_abspath):
        return None, None

    # we'll open in 'r' mode since we want unicode for markdown
    with open(doc_md_file,'r') as infd:
        doc_markdown = infd.read()

    LOGGER.info('read %s for requested docs page: %s...' %
                (doc_md_file, docpage))

    # render the markdown to HTML
    doc_html = markdown.markdown(
        doc_markdown,
        output_format='html5',
        extensions=['markdown.extensions.extra',
                    'markdown.extensions.codehilite',
                    'markdown.extensions.toc',
                    'markdown.extensions.tables'],
        extension_configs={
            'markdown.extensions.codehilite':{
                'guess_lang': False
            },
            'markdown.extensions.toc':{
                'anchorlink': True
            },
        }
    )

    return doc_html, page_title
Exemplo n.º 31
0
def get_starfeatures(lcfile,
                     outdir,
                     kdtree,
                     objlist,
                     lcflist,
                     neighbor_radius_arcsec,
                     deredden=True,
                     custom_bandpasses=None,
                     lcformat='hat-sql',
                     lcformatdir=None):
    '''This runs the functions from :py:func:`astrobase.varclass.starfeatures`
    on a single light curve file.

    Parameters
    ----------

    lcfile : str
        This is the LC file to extract star features for.

    outdir : str
        This is the directory to write the output pickle to.

    kdtree: scipy.spatial.cKDTree
        This is a `scipy.spatial.KDTree` or `cKDTree` used to calculate neighbor
        proximity features. This is for the light curve catalog this object is
        in.

    objlist : np.array
        This is a Numpy array of object IDs in the same order as the
        `kdtree.data` np.array. This is for the light curve catalog this object
        is in.

    lcflist : np.array
        This is a Numpy array of light curve filenames in the same order as
        `kdtree.data`. This is for the light curve catalog this object is in.

    neighbor_radius_arcsec : float
        This indicates the radius in arcsec to search for neighbors for this
        object using the light curve catalog's `kdtree`, `objlist`, `lcflist`,
        and in GAIA.

    deredden : bool
        This controls if the colors and any color classifications will be
        dereddened using 2MASS DUST.

    custom_bandpasses : dict or None
        This is a dict used to define any custom bandpasses in the
        `in_objectinfo` dict you want to make this function aware of and
        generate colors for. Use the format below for this dict::

            {
            '<bandpass_key_1>':{'dustkey':'<twomass_dust_key_1>',
                                'label':'<band_label_1>'
                                'colors':[['<bandkey1>-<bandkey2>',
                                           '<BAND1> - <BAND2>'],
                                          ['<bandkey3>-<bandkey4>',
                                           '<BAND3> - <BAND4>']]},
            .
            ...
            .
            '<bandpass_key_N>':{'dustkey':'<twomass_dust_key_N>',
                                'label':'<band_label_N>'
                                'colors':[['<bandkey1>-<bandkey2>',
                                           '<BAND1> - <BAND2>'],
                                          ['<bandkey3>-<bandkey4>',
                                           '<BAND3> - <BAND4>']]},
            }

        Where:

        `bandpass_key` is a key to use to refer to this bandpass in the
        `objectinfo` dict, e.g. 'sdssg' for SDSS g band

        `twomass_dust_key` is the key to use in the 2MASS DUST result table for
        reddening per band-pass. For example, given the following DUST result
        table (using http://irsa.ipac.caltech.edu/applications/DUST/)::

            |Filter_name|LamEff |A_over_E_B_V_SandF|A_SandF|A_over_E_B_V_SFD|A_SFD|
            |char       |float  |float             |float  |float           |float|
            |           |microns|                  |mags   |                |mags |
             CTIO U       0.3734              4.107   0.209            4.968 0.253
             CTIO B       0.4309              3.641   0.186            4.325 0.221
             CTIO V       0.5517              2.682   0.137            3.240 0.165
            .
            .
            ...

        The `twomass_dust_key` for 'vmag' would be 'CTIO V'. If you want to
        skip DUST lookup and want to pass in a specific reddening magnitude
        for your bandpass, use a float for the value of
        `twomass_dust_key`. If you want to skip DUST lookup entirely for
        this bandpass, use None for the value of `twomass_dust_key`.

        `band_label` is the label to use for this bandpass, e.g. 'W1' for
        WISE-1 band, 'u' for SDSS u, etc.

        The 'colors' list contains color definitions for all colors you want
        to generate using this bandpass. this list contains elements of the
        form::

            ['<bandkey1>-<bandkey2>','<BAND1> - <BAND2>']

        where the the first item is the bandpass keys making up this color,
        and the second item is the label for this color to be used by the
        frontends. An example::

            ['sdssu-sdssg','u - g']

    lcformat : str
        This is the `formatkey` associated with your light curve format, which
        you previously passed in to the `lcproc.register_lcformat`
        function. This will be used to look up how to find and read the light
        curves specified in `basedir` or `use_list_of_filenames`.

    lcformatdir : str or None
        If this is provided, gives the path to a directory when you've stored
        your lcformat description JSONs, other than the usual directories lcproc
        knows to search for them in. Use this along with `lcformat` to specify
        an LC format JSON file that's not currently registered with lcproc.

    Returns
    -------

    str
        Path to the output pickle containing all of the star features for this
        object.

    '''

    try:
        formatinfo = get_lcformat(lcformat, use_lcformat_dir=lcformatdir)
        if formatinfo:
            (dfileglob, readerfunc, dtimecols, dmagcols, derrcols,
             magsarefluxes, normfunc) = formatinfo
        else:
            LOGERROR("can't figure out the light curve format")
            return None
    except Exception as e:
        LOGEXCEPTION("can't figure out the light curve format")
        return None

    try:

        # get the LC into a dict
        lcdict = readerfunc(lcfile)

        # this should handle lists/tuples being returned by readerfunc
        # we assume that the first element is the actual lcdict
        # FIXME: figure out how to not need this assumption
        if ((isinstance(lcdict, (list, tuple)))
                and (isinstance(lcdict[0], dict))):
            lcdict = lcdict[0]

        resultdict = {
            'objectid': lcdict['objectid'],
            'info': lcdict['objectinfo'],
            'lcfbasename': os.path.basename(lcfile)
        }

        # run the coord features first
        coordfeat = starfeatures.coord_features(lcdict['objectinfo'])

        # next, run the color features
        colorfeat = starfeatures.color_features(
            lcdict['objectinfo'],
            deredden=deredden,
            custom_bandpasses=custom_bandpasses)

        # run a rough color classification
        colorclass = starfeatures.color_classification(colorfeat, coordfeat)

        # finally, run the neighbor features
        nbrfeat = starfeatures.neighbor_gaia_features(lcdict['objectinfo'],
                                                      kdtree,
                                                      neighbor_radius_arcsec)

        # get the objectids of the neighbors found if any
        if nbrfeat['nbrindices'].size > 0:
            nbrfeat['nbrobjectids'] = objlist[nbrfeat['nbrindices']]
            nbrfeat['closestnbrobjectid'] = objlist[
                nbrfeat['closestdistnbrind']]
            nbrfeat['closestnbrlcfname'] = lcflist[
                nbrfeat['closestdistnbrind']]

        else:
            nbrfeat['nbrobjectids'] = np.array([])
            nbrfeat['closestnbrobjectid'] = np.array([])
            nbrfeat['closestnbrlcfname'] = np.array([])

        # update the result dict
        resultdict.update(coordfeat)
        resultdict.update(colorfeat)
        resultdict.update(colorclass)
        resultdict.update(nbrfeat)

        outfile = os.path.join(
            outdir, 'starfeatures-%s.pkl' %
            squeeze(resultdict['objectid']).replace(' ', '-'))

        with open(outfile, 'wb') as outfd:
            pickle.dump(resultdict, outfd, protocol=4)

        return outfile

    except Exception as e:

        LOGEXCEPTION('failed to get star features for %s because: %s' %
                     (os.path.basename(lcfile), e))
        return None
Exemplo n.º 32
0
def tag_local_authors(arxiv_date,
                      database=None,
                      match_threshold=0.93,
                      update_db=False):
    '''
    This finds all local authors for all papers on the date arxiv_date and tags
    the rows for them in the DB.

    '''

    # open the database if needed and get a cursor
    if not database:
        database, cursor = opendb()
        closedb = True
    else:
        cursor = database.cursor()
        closedb = False

    # get all local authors first
    query = 'select author from local_authors'
    cursor.execute(query)
    rows = cursor.fetchall()
    if rows and len(rows) > 0:
        local_authors = list(zip(*rows)[0])
        local_authors = [x.lower() for x in local_authors]
        local_authors = [x.replace('.',' ') for x in local_authors]
        local_authors = [squeeze(x) for x in local_authors]

        # this contains firstinitial-lastname pairs
        local_author_fnames = [x.split() for x in local_authors]
        local_author_fnames = [''.join([x[0][0],x[-1]])
                               for x in local_author_fnames]
        local_authors = [x.replace(' ','') for x in local_authors]

    else:
        local_authors = []

    if len(local_authors) > 0:

        # get all the authors for this date
        query = 'select arxiv_id, authors from arxiv where utcdate = date(?)'
        query_params = (arxiv_date,)
        cursor.execute(query, query_params)
        rows = cursor.fetchall()

        if rows and len(rows) > 0:

            local_author_articles = []

            for row in rows:

               paper_authors = row[1]
               paper_authors = (paper_authors.split(': ')[-1]).split(',')

               # normalize these names so we can compare them more robustly to
               # the local authors
               paper_authors = [x.lower().strip() for x in paper_authors]
               paper_authors = [x.split('(')[0] for x in paper_authors]
               paper_authors = [x.strip() for x in paper_authors if len(x) > 1]
               paper_authors = [x.replace('.',' ') for x in paper_authors]
               paper_authors = [squeeze(x) for x in paper_authors]

               paper_author_fnames = [x.split() for x in paper_authors]
               paper_author_fnames = [''.join([x[0][0],x[-1]]) for x
                                      in paper_author_fnames]
               paper_authors = [x.replace(' ','') for x in paper_authors]

               # match to the flastname first, then if that works, try another
               # match with fullname. if both work, then we accept this as a
               # local author match
               for paper_author, paper_fname in zip(paper_authors,
                                                    paper_author_fnames):

                   matched_author_fname = difflib.get_close_matches(
                       paper_fname,
                       local_author_fnames,
                       n=1,
                       cutoff=match_threshold
                   )

                   if matched_author_fname:

                       # this is a bit lower to allow looser matches between
                       # f. m. lastname in the paper authors list and first
                       # lastname pairs in the local authors list
                       matched_author_full = difflib.get_close_matches(
                           paper_author,
                           local_authors,
                           n=1,
                           cutoff=0.7
                           )

                       if matched_author_full:

                           local_author_articles.append((row[0]))
                           print('%s: %s, matched paper author: %s '
                                 'to local author: %s' % (row[0],
                                                          paper_authors,
                                                          paper_author,
                                                          matched_author_full))

                           if update_db:
                               cursor.execute('update arxiv '
                                              'set local_authors = ? where '
                                              'arxiv_id = ?', (True, row[0],))

                           break

            if update_db:
                database.commit()

            return local_author_articles

        else:

            print('no articles for this date')
            return False

    else:

        print('no local authors defined')
        return False

    # at the end, close the cursor and DB connection
    if closedb:
        cursor.close()
        database.close()
Exemplo n.º 33
0
def create_new_user(
    payload: dict,
    min_pass_length: int = 12,
    max_unsafe_similarity: int = 33,
    override_authdb_path: str = None,
    raiseonfail: bool = False,
    config: SimpleNamespace = None,
) -> dict:
    """Makes a new user.

    Parameters
    ----------

    payload : dict
        This is a dict with the following required keys:

        - full_name: str. Full name for the user

        - email: str. User's email address

        - password: str. User's password.

        Optional payload items include:

        - extra_info: dict. optional dict to add any extra
          info for this user, will be stored as JSON in the DB

        - verify_retry_wait: int, default: 6. This sets the amount of
          time in hours a user must wait before retrying a failed verification
          action, i.e., responding before expiry of and with the correct
          verification token.

        - system_id: str. If this is provided, must be a unique string that will
          serve as the system_id for the user. This ID is safe to share with
          client JS, etc., as opposed to the user_id primary key for the
          user. If not provided, a UUIDv4 will be generated and used for the
          system_id.

        - public_suffix_list: list of str. If this is provided as a payload
          item, it must be a list of domain name suffixes sources from the
          Mozilla Public Suffix list: https://publicsuffix.org/list/. This is
          used to check if the full name of the user may possibly be a spam
          link intended to be used when the authnzerver emails out verification
          tokens for new users. If the full name contains a suffix in this list,
          the user creation request will fail. If this item is not provided in
          the payload, this function will look up the current process's
          namespace to see if it was loaded there and use it from there if so.
          If the public suffix list can't be found in either item, new user
          creation will fail.

        In addition to these items received from an authnzerver client, the
        payload must also include the following keys (usually added in by a
        wrapping function):

        - reqid: int or str
        - pii_salt: str

    override_authdb_path : str or None
        If given as a str, is the alternative path to the auth DB.

    raiseonfail : bool
        If True, will raise an Exception if something goes wrong.

    min_pass_length : int
        The minimum required character length of the password.

    max_unsafe_similarity : int
        The maximum ratio required to fuzzy-match the input password against
        the server's domain name, the user's email, or their name.

    config : SimpleNamespace object or None
        An object containing systemwide config variables as attributes. This is
        useful when the wrapping function needs to pass in some settings
        directly from environment variables.

    Returns
    -------

    dict
        Returns a dict with the user's user_id and user_email, and a boolean for
        send_verification.

    Notes
    -----

    If the email address already exists in the database, then either the user
    has forgotten that they have an account or someone else is being
    annoying. In this case, if is_active is True, we'll tell the user that we've
    sent an email but won't do anything. If is_active is False and
    emailverify_sent_datetime is at least *payload['verify_retry_wait']* hours
    in the past, we'll send a new email verification email and update the
    emailverify_sent_datetime. In this case, we'll just tell the user that we've
    sent the email but won't tell them if their account exists.

    Only after the user verifies their email, is_active will be set to True and
    user_role will be set to 'authenticated'.

    """

    engine, meta, permjson, dbpath = get_procdb_permjson(
        override_authdb_path=override_authdb_path,
        override_permissions_json=None,
        raiseonfail=raiseonfail,
    )

    for key in ("reqid", "pii_salt"):
        if key not in payload:
            LOGGER.error(
                "Missing %s in payload dict. Can't process this request." %
                key)
            return {
                "success":
                False,
                "user_email":
                None,
                "user_id":
                None,
                "send_verification":
                False,
                "failure_reason":
                ("invalid request: missing '%s' in request" % key),
                "messages": ["Invalid user creation request."],
            }

    for key in ("full_name", "email", "password"):

        if key not in payload:

            LOGGER.error("[%s] Invalid user creation request, missing %s." %
                         (payload["reqid"], key))

            return {
                "success":
                False,
                "user_email":
                None,
                "user_id":
                None,
                "send_verification":
                False,
                "failure_reason":
                ("invalid request: missing '%s' in request" % key),
                "messages": ["Invalid user creation request."],
            }

    #
    # validate the email provided
    #

    # check for Unicode confusables and dangerous usernames
    email_confusables_ok = validators.validate_confusables_email(
        payload["email"])

    # check if the email is a valid one according to HTML5 specs
    email_regex_ok = validators.validate_email_address(payload["email"])

    # check if the email domain is not a disposable email address
    if email_confusables_ok and email_regex_ok:
        email_domain = payload["email"].split("@")[1].casefold()
        email_domain_not_disposable = (
            email_domain not in validators.DISPOSABLE_EMAIL_DOMAINS)
    else:
        email_domain_not_disposable = False

    # if all of the tests above pass, the email is OK
    email_ok = (email_regex_ok and email_confusables_ok
                and email_domain_not_disposable)

    if not email_ok:

        LOGGER.error("[%s] User creation request failed for "
                     "email: %s. "
                     "The email address provided is not valid." % (
                         payload["reqid"],
                         pii_hash(payload["email"], payload["pii_salt"]),
                     ))

        return {
            "success":
            False,
            "user_email":
            None,
            "user_id":
            None,
            "send_verification":
            False,
            "failure_reason":
            "invalid email",
            "messages": [
                "The email address provided doesn't "
                "seem to be a valid email address and cannot be used "
                "to sign up for an account on this server."
            ],
        }

    email = validators.normalize_value(payload["email"])
    full_name = validators.normalize_value(payload["full_name"],
                                           casefold=False)

    # sanitize the full name
    full_name = squeeze(xhtml_escape(full_name))
    if "http" in full_name.casefold() or "://" in full_name:
        LOGGER.error(f"[{payload['reqid']}] Full name provided contains "
                     f"a link or is close to one: {full_name} "
                     f"and is likely suspicious.")
        return {
            "success":
            False,
            "user_email":
            None,
            "user_id":
            None,
            "send_verification":
            False,
            "failure_reason":
            "invalid full name",
            "messages": [
                "The full name provided appears to contain "
                "an HTTP link, and cannot be used "
                "to sign up for an account on this server."
            ],
        }

    # check if the full name contains a valid public suffix domain
    # it's probably suspicious if so
    currproc = mp.current_process()
    public_suffix_list = getattr(currproc, "public_suffix_list", None)
    if not public_suffix_list:
        public_suffix_list = payload.get("public_suffix_list", None)

    if not public_suffix_list:
        LOGGER.error(
            f"[{payload['reqid']}] Could not validate full name "
            f"because the public suffix list is not provided in "
            f"either the payload or in the current process namespace.")
        return {
            "success":
            False,
            "user_email":
            None,
            "user_id":
            None,
            "send_verification":
            False,
            "failure_reason":
            "public suffix list not present",
            "messages": [
                "Full name could not be validated "
                "because of an internal server error"
            ],
        }

    for domain_suffix in public_suffix_list:
        if domain_suffix in full_name.casefold():
            LOGGER.error(f"[{payload['reqid']}] Full name provided contains "
                         f"a link or is close to one: {full_name} "
                         f"and is likely suspicious.")
            return {
                "success":
                False,
                "user_email":
                None,
                "user_id":
                None,
                "send_verification":
                False,
                "failure_reason":
                "invalid full name",
                "messages": [
                    "The full name provided appears to contain "
                    "an HTTP link, and cannot be used "
                    "to sign up for an account on this server."
                ],
            }

    # get the password
    password = payload["password"]

    #
    # optional items
    #

    # 1. get extra info if any
    extra_info = payload.get("extra_info", None)

    # 2. get the verify_retry_wait time
    verify_retry_wait = payload.get("verify_retry_wait", 6)
    try:
        verify_retry_wait = int(verify_retry_wait)
    except Exception:
        verify_retry_wait = 6

    if verify_retry_wait < 1:
        verify_retry_wait = 1

    # 3. generate or get a system_id for this user
    if "system_id" in payload and isinstance(payload["system_id"], str):
        system_id = payload["system_id"]
    else:
        system_id = str(uuid.uuid4())

    #
    # proceed to processing
    #

    users = meta.tables["users"]

    # the password is restricted to 256 characters since that should be enough
    # (for 2020), and we don't want to kill our own server when hashing absurdly
    # long passwords through Argon2-id.
    input_password = password[:256]

    # hash the user's password
    hashed_password = pass_hasher.hash(input_password)

    # validate the input password to see if it's OK
    # do this here to make sure the password hash completes at least once
    passok, messages = validate_input_password(
        full_name,
        email,
        input_password,
        payload["pii_salt"],
        payload["reqid"],
        min_pass_length=min_pass_length,
        max_unsafe_similarity=max_unsafe_similarity,
        config=config,
    )

    if not passok:

        LOGGER.error("[%s] User creation request failed for "
                     "email: %s. "
                     "The password provided is not secure." % (
                         payload["reqid"],
                         pii_hash(payload["email"], payload["pii_salt"]),
                     ))

        return {
            "success": False,
            "user_email": email,
            "user_id": None,
            "send_verification": False,
            "failure_reason": "invalid password",
            "messages": messages,
        }

    # insert stuff into the user's table, set is_active = False, user_role =
    # 'locked', the emailverify_sent_datetime to datetime.utcnow()
    new_user_dict = None

    try:

        if not extra_info:
            extra_info = {
                "provenance": "request-created",
                "type": "normal-user",
                "verify_retry_wait": verify_retry_wait,
            }
        else:
            extra_info.update({
                "provenance": "request-created",
                "type": "normal-user",
                "verify_retry_wait": verify_retry_wait,
            })

        new_user_dict = {
            "full_name": full_name,
            "system_id": system_id,
            "password": hashed_password,
            "email": email,
            "email_verified": False,
            "is_active": False,
            "emailverify_sent_datetime": datetime.utcnow(),
            "created_on": datetime.utcnow(),
            "user_role": "locked",
            "last_updated": datetime.utcnow(),
            "extra_info": extra_info,
        }

        with engine.begin() as conn:
            ins = insert(users).values(new_user_dict)
            conn.execute(ins)

        user_added = True

    # this will catch stuff like people trying to sign up again with their email
    # address
    except Exception:

        user_added = False

    with engine.begin() as conn:

        # get back the user ID
        sel = (select(
            users.c.email,
            users.c.user_id,
            users.c.system_id,
            users.c.is_active,
            users.c.emailverify_sent_datetime,
        ).select_from(users).where(users.c.email == email))
        result = conn.execute(sel)
        rows = result.first()

    # if the user was added successfully, tell the frontend all is good and to
    # send a verification email
    if user_added and rows:

        LOGGER.info("[%s] User creation request succeeded for "
                    "email: %s. New user_id: %s" % (
                        payload["reqid"],
                        pii_hash(payload["email"], payload["pii_salt"]),
                        pii_hash(rows.user_id, payload["pii_salt"]),
                    ))

        messages.append(
            "User account created. Please verify your email address to log in."
        )

        return {
            "success": True,
            "user_email": rows.email,
            "user_id": rows.user_id,
            "system_id": rows.system_id,
            "send_verification": True,
            "messages": messages,
        }

    # if the user wasn't added successfully, then they exist in the DB already
    elif (not user_added) and rows:

        LOGGER.error("[%s] User creation request failed for "
                     "email: %s. "
                     "The email provided probably exists in the DB already. " %
                     (
                         payload["reqid"],
                         pii_hash(payload["email"], payload["pii_salt"]),
                     ))

        # check the timedelta between now and the emailverify_sent_datetime
        verification_timedelta = (datetime.utcnow() -
                                  rows.emailverify_sent_datetime)

        # this sets whether we should resend the verification email
        resend_verification = (not rows.is_active) and (
            verification_timedelta > timedelta(hours=verify_retry_wait))
        LOGGER.warning(
            "[%s] Existing user_id = %s for new user creation "
            "request with email = %s, is_active = %s. "
            "Email verification originally sent at = %sZ, "
            "verification timedelta: %s, verify_retry_wait = %s hours. "
            "Will resend verification = %s" % (
                payload["reqid"],
                pii_hash(rows.user_id, payload["pii_salt"]),
                pii_hash(payload["email"], payload["pii_salt"]),
                rows.is_active,
                rows.emailverify_sent_datetime.isoformat(),
                verification_timedelta,
                verify_retry_wait,
                resend_verification,
            ))

        if resend_verification:

            # if we're going to resend the verification, update the users table
            # with the latest info sent by the user (they might've changed their
            # password in the meantime)
            if new_user_dict is not None:
                del new_user_dict["created_on"]
                del new_user_dict["system_id"]

            with engine.begin() as conn:

                upd = (users.update().where(
                    users.c.user_id == rows.user_id).values(new_user_dict))
                conn.execute(upd)

                # get back the user ID
                sel = (select(
                    users.c.email,
                    users.c.user_id,
                    users.c.system_id,
                    users.c.is_active,
                    users.c.emailverify_sent_datetime,
                ).select_from(users).where(users.c.email == email))
                result = conn.execute(sel)
                rows = result.first()

            LOGGER.warning(
                "[%s] Resending verification to user: %s because timedelta "
                "between original sign up and retry: %s > "
                "verify_retry_wait: %s hours. "
                "User information has been updated "
                "with their latest provided sign-up info." % (
                    payload["reqid"],
                    pii_hash(rows.user_id, payload["pii_salt"]),
                    verification_timedelta,
                    verify_retry_wait,
                ))

        messages.append(
            "User account created. Please verify your email address to log in."
        )
        return {
            "success": False,
            "user_email": rows.email,
            "user_id": rows.user_id,
            "system_id": rows.system_id,
            "send_verification": resend_verification,
            "failure_reason": "user exists",
            "messages": messages,
        }

    # otherwise, the user wasn't added successfully and they don't already exist
    # in the database so something else went wrong.
    else:

        LOGGER.error("[%s] User creation request failed for email: %s. "
                     "Could not add row to the DB." % (
                         payload["reqid"],
                         pii_hash(payload["email"], payload["pii_salt"]),
                     ))

        messages.append(
            "User account created. Please verify your email address to log in."
        )
        return {
            "success": False,
            "user_email": None,
            "user_id": None,
            "send_verification": False,
            "failure_reason": "DB issue with user creation",
            "messages": messages,
        }
Exemplo n.º 34
0
# -*- coding: utf-8 -*-
#
# Copyright(c) 2015 http://feilong.me
#
# @author: Felinx Lee <*****@*****.**>
#

from tornado.httpclient import HTTPClient, HTTPRequest
from tornado import escape
import hashlib

cookie = "csrftoken=NVddnpsZKq1XJHzdVb5vzv4nMULCUI6r" # Your current cookie 
cookie = escape.squeeze(cookie)
agent = "Instagram 6.14.0 (iPhone7,1; iPhone OS 8_3; zh_CN;zh-Hans) AppleWebKit/420+"
url = "https://i.instagram.com/api/v1/feed/user/1419637354" #  USER_ID should be replaced by real ID like 12345678

def main():
    client = HTTPClient()
    request = HTTPRequest(url, headers={"Cookie": cookie, "User-Agent": agent})
    response = client.fetch(request)
    
    resp = escape.json_decode(response.body)
    items = resp.get("items", [])
    for item in items:
        imgurl = item["image_versions2"]["candidates"][0]["url"]
        urlmd5 = hashlib.md5(imgurl).hexdigest()
        print ("wget '%s' -O %s.jpg" % (imgurl, urlmd5)) # or run this command in Python or request img file again

if __name__ == '__main__':
    main()
Exemplo n.º 35
0
def _write_checkplot_picklefile(checkplotdict,
                                outfile=None,
                                protocol=None,
                                outgzip=False):

    '''This writes the checkplotdict to a (gzipped) pickle file.

    Parameters
    ----------

    checkplotdict : dict
        This the checkplotdict to write to the pickle file.

    outfile : None or str
        The path to the output pickle file to write. If `outfile` is None,
        writes a (gzipped) pickle file of the form:

        checkplot-{objectid}.pkl(.gz)

        to the current directory.

    protocol : int
        This sets the pickle file protocol to use when writing the pickle:

        If None, will choose a protocol using the following rules:

        - 4 -> default in Python >= 3.4 - fast but incompatible with Python 2
        - 3 -> default in Python 3.0-3.3 - mildly fast
        - 2 -> default in Python 2 - very slow, but compatible with Python 2/3

        The default protocol kwarg is None, this will make an automatic choice
        for pickle protocol that's best suited for the version of Python in
        use. Note that this will make pickles generated by Py3 incompatible with
        Py2.

    outgzip : bool
        If this is True, will gzip the output file. Note that if the `outfile`
        str ends in a gzip, this will be automatically turned on.

    Returns
    -------

    str
        The absolute path to the written checkplot pickle file. None if writing
        fails.

    '''

    # for Python >= 3.4; use v4 by default
    if not protocol:
        protocol = 4

    if outgzip:

        if not outfile:

            outfile = (
                'checkplot-{objectid}.pkl.gz'.format(
                    objectid=squeeze(checkplotdict['objectid']).replace(' ','-')
                )
            )

        with gzip.open(outfile,'wb') as outfd:
            pickle.dump(checkplotdict,outfd,protocol=protocol)

    else:

        if not outfile:

            outfile = (
                'checkplot-{objectid}.pkl'.format(
                    objectid=squeeze(checkplotdict['objectid']).replace(' ','-')
                )
            )

        # make sure to do the right thing if '.gz' is in the filename but
        # outgzip was False
        if outfile.endswith('.gz'):

            LOGWARNING('output filename ends with .gz but kwarg outgzip=False. '
                       'will use gzip to compress the output pickle')
            with gzip.open(outfile,'wb') as outfd:
                pickle.dump(checkplotdict,outfd,protocol=protocol)

        else:
            with open(outfile,'wb') as outfd:
                pickle.dump(checkplotdict,outfd,protocol=protocol)

    return os.path.abspath(outfile)
Exemplo n.º 36
0
    def _FormatConversationEmail(cls, client, recipient_id, viewpoint,
                                 activity):
        """Constructs an email which alerts the recipient that they have access to a new
    conversation, either due to a share_new operation, or to an add_followers operation.
    The email includes a clickable link to the conversation on the web site.
    """
        from viewfinder.backend.db.identity import Identity
        from viewfinder.backend.db.photo import Photo
        from viewfinder.backend.db.user import User

        # Get email address of recipient.
        recipient_user = yield gen.Task(User.Query, client, recipient_id, None)
        if recipient_user.email is None:
            # No email address associated with user, so can't send email.
            raise gen.Return(None)

        identity_key = 'Email:%s' % recipient_user.email

        # Create ShortURL that sets prospective user cookie and then redirects to the conversation.
        viewpoint_url = yield AlertManager._CreateViewpointURL(
            client, recipient_user, identity_key, viewpoint)

        sharer = yield gen.Task(User.Query, client, activity.user_id, None)
        sharer_name = AlertManager._GetNameFromUser(sharer,
                                                    prefer_given_name=False)

        # Create the cover photo ShortURL by appending a "next" query parameter to the viewpoint ShortURL.
        cover_photo_url = None
        cover_photo_height = None
        cover_photo_width = None
        if viewpoint.cover_photo != None:
            next_url = '/episodes/%s/photos/%s.f' % (
                viewpoint.cover_photo['episode_id'],
                viewpoint.cover_photo['photo_id'])
            cover_photo_url = "%s?%s" % (viewpoint_url,
                                         urlencode(dict(next=next_url)))

            photo = yield gen.Task(Photo.Query, client,
                                   viewpoint.cover_photo['photo_id'], None)

            if photo.aspect_ratio < 1:
                cover_photo_height = AlertManager._MAX_COVER_PHOTO_DIM
                cover_photo_width = int(AlertManager._MAX_COVER_PHOTO_DIM *
                                        photo.aspect_ratio)
            else:
                cover_photo_width = AlertManager._MAX_COVER_PHOTO_DIM
                cover_photo_height = int(AlertManager._MAX_COVER_PHOTO_DIM /
                                         photo.aspect_ratio)

        email_args = {
            'from': EmailManager.Instance().GetInfoAddress(),
            'to': recipient_user.email,
            'subject': '%s added you to a conversation' % sharer_name
        }
        util.SetIfNotEmpty(email_args, 'toname', recipient_user.name)
        if sharer_name:
            email_args['fromname'] = '%s via Viewfinder' % sharer_name

        # Create the unsubscribe URL.
        unsubscribe_cookie = User.CreateUnsubscribeCookie(
            recipient_id, AccountSettings.EMAIL_ALERTS)
        unsubscribe_url = 'https://%s/unsubscribe?%s' % (
            options.options.domain, urlencode(dict(cookie=unsubscribe_cookie)))

        # Set viewpoint title.
        viewpoint_title = viewpoint.title if viewpoint is not None else None

        fmt_args = {
            'cover_photo_url': cover_photo_url,
            'cover_photo_height': cover_photo_height,
            'cover_photo_width': cover_photo_width,
            'viewpoint_url': viewpoint_url,
            'unsubscribe_url': unsubscribe_url,
            'sharer_name': sharer_name,
            'viewpoint_title': viewpoint_title,
            'toname': recipient_user.name
        }

        resources_mgr = ResourcesManager.Instance()

        email_args['html'] = escape.squeeze(
            resources_mgr.GenerateTemplate('alert_conv_base.email',
                                           is_html=True,
                                           **fmt_args))
        email_args['text'] = resources_mgr.GenerateTemplate(
            'alert_conv_base.email', is_html=False, **fmt_args)

        raise gen.Return(email_args)
Exemplo n.º 37
0
 def test_squeeze(self):
     self.assertEqual(
         squeeze(u"sequences     of    whitespace   chars"),
         u"sequences of whitespace chars",
     )
Exemplo n.º 38
0
  def _FormatConversationEmail(cls, client, recipient_id, viewpoint, activity):
    """Constructs an email which alerts the recipient that they have access to a new
    conversation, either due to a share_new operation, or to an add_followers operation.
    The email includes a clickable link to the conversation on the web site.
    """
    from viewfinder.backend.db.identity import Identity
    from viewfinder.backend.db.photo import Photo
    from viewfinder.backend.db.user import User

    # Get email address of recipient.
    recipient_user = yield gen.Task(User.Query, client, recipient_id, None)
    if recipient_user.email is None:
      # No email address associated with user, so can't send email.
      raise gen.Return(None)

    identity_key = 'Email:%s' % recipient_user.email

    # Create ShortURL that sets prospective user cookie and then redirects to the conversation.
    viewpoint_url = yield AlertManager._CreateViewpointURL(client, recipient_user, identity_key, viewpoint)

    sharer = yield gen.Task(User.Query, client, activity.user_id, None)
    sharer_name = AlertManager._GetNameFromUser(sharer, prefer_given_name=False)

    # Create the cover photo ShortURL by appending a "next" query parameter to the viewpoint ShortURL.
    cover_photo_url = None
    cover_photo_height = None
    cover_photo_width = None
    if viewpoint.cover_photo != None:
      next_url = '/episodes/%s/photos/%s.f' % (viewpoint.cover_photo['episode_id'], viewpoint.cover_photo['photo_id'])
      cover_photo_url = "%s?%s" % (viewpoint_url, urlencode(dict(next=next_url)))

      photo = yield gen.Task(Photo.Query, client, viewpoint.cover_photo['photo_id'], None)

      if photo.aspect_ratio < 1:
        cover_photo_height = AlertManager._MAX_COVER_PHOTO_DIM
        cover_photo_width = int(AlertManager._MAX_COVER_PHOTO_DIM * photo.aspect_ratio)
      else:
        cover_photo_width = AlertManager._MAX_COVER_PHOTO_DIM
        cover_photo_height = int(AlertManager._MAX_COVER_PHOTO_DIM / photo.aspect_ratio)

    email_args = {'from': EmailManager.Instance().GetInfoAddress(),
                  'to': recipient_user.email,
                  'subject': '%s added you to a conversation' % sharer_name}
    util.SetIfNotEmpty(email_args, 'toname', recipient_user.name)
    if sharer_name:
      email_args['fromname'] = '%s via Viewfinder' % sharer_name

    # Create the unsubscribe URL.
    unsubscribe_cookie = User.CreateUnsubscribeCookie(recipient_id, AccountSettings.EMAIL_ALERTS)
    unsubscribe_url = 'https://%s/unsubscribe?%s' % (options.options.domain,
                                                     urlencode(dict(cookie=unsubscribe_cookie)))

    # Set viewpoint title.
    viewpoint_title = viewpoint.title if viewpoint is not None else None

    fmt_args = {'cover_photo_url': cover_photo_url,
                'cover_photo_height': cover_photo_height,
                'cover_photo_width': cover_photo_width,
                'viewpoint_url': viewpoint_url,
                'unsubscribe_url': unsubscribe_url,
                'sharer_name': sharer_name,
                'viewpoint_title': viewpoint_title,
                'toname': recipient_user.name}

    resources_mgr = ResourcesManager.Instance()

    email_args['html'] = escape.squeeze(resources_mgr.GenerateTemplate('alert_conv_base.email',
                                                                       is_html=True,
                                                                       **fmt_args))
    email_args['text'] = resources_mgr.GenerateTemplate('alert_conv_base.email',
                                                        is_html=False,
                                                        **fmt_args)

    raise gen.Return(email_args)
Exemplo n.º 39
0
 def test_squeeze(self):
     self.assertEqual(squeeze(u'sequences     of    whitespace   chars'),
                      u'sequences of whitespace chars')
Exemplo n.º 40
0
def validate_input_password(full_name,
                            email,
                            password,
                            min_length=12,
                            max_match_threshold=20):
    '''This validates user input passwords.

    1. must be at least min_length characters (we'll truncate the password at
       1024 characters since we don't want to store entire novels)
    2. must not match within max_match_threshold of their email or full_name
    3. must not match within max_match_threshold of the site's FQDN
    4. must not have a single case-folded character take up more than 20% of the
       length of the password
    5. must not be completely numeric
    6. must not be in the top 10k passwords list

    '''

    messages = []

    # we'll ignore any repeated white space and fail immediately if the password
    # is all white space
    if len(squeeze(password.strip())) < min_length:

        LOGGER.warning('password for new account: %s is too short' % email)
        messages.append('Your password is too short. '
                        'It must have at least %s characters.' % min_length)
        passlen_ok = False
    else:
        passlen_ok = True

    # check if the password is straight-up dumb
    if password.casefold() in validators.TOP_10K_PASSWORDS:
        LOGGER.warning('password for new account: %s is '
                       'in top 10k passwords list' % email)
        messages.append('Your password is on the list of the '
                        'most common passwords and is vulnerable to guessing.')
        tenk_ok = False
    else:
        tenk_ok = True

    # FIXME: also add fuzzy matching to top 10k passwords list to avoid stuff
    # like 'passwordpasswordpassword'

    # check the fuzzy match against the FQDN and email address
    fqdn = socket.getfqdn()
    fqdn_match = UQRatio(password.casefold(), fqdn.casefold())
    email_match = UQRatio(password.casefold(), email.casefold())
    name_match = UQRatio(password.casefold(), full_name.casefold())

    fqdn_ok = fqdn_match < max_match_threshold
    email_ok = email_match < max_match_threshold
    name_ok = name_match < max_match_threshold

    if not fqdn_ok or not email_ok or not name_ok:
        LOGGER.warning('password for new account: %s matches FQDN '
                       '(similarity: %s) or their email address '
                       '(similarity: %s)' % (email, fqdn_match, email_match))
        messages.append('Your password is too similar to either '
                        'the domain name of this server or your '
                        'own name or email address.')

    # next, check if the password is complex enough
    histogram = {}
    for char in password:
        if char.lower() not in histogram:
            histogram[char.lower()] = 1
        else:
            histogram[char.lower()] = histogram[char.lower()] + 1

    hist_ok = True

    for h in histogram:
        if (histogram[h] / len(password)) > 0.2:
            hist_ok = False
            LOGGER.warning('one character is more than '
                           '0.2 x length of the password')
            messages.append(
                'Your password is not complex enough. '
                'One or more characters appear appear too frequently.')
            break

    # check if the password is all numeric
    if password.isdigit():
        numeric_ok = False
        messages.append('Your password cannot be all numbers.')
    else:
        numeric_ok = True

    return ((passlen_ok and email_ok and name_ok and fqdn_ok and hist_ok
             and numeric_ok and tenk_ok), messages)