示例#1
0
def apply_auth_defaults(params):
    params = params.copy()

    if 'platform' not in params:
        # Default to neptune
        params['platform'] = config.platform_name

    if 'from_name' not in params:
        if params['platform'] == 'neptune':
            params['from_name'] = 'PERTS'
        if params['platform'] == 'triton':
            params['from_name'] = 'Copilot'

    if 'template_content' not in params:
        params['template_content'] = {}

    if 'contact_email_address' not in params['template_content']:
        params['template_content']['contact_email_address'] = \
            params.get('from_address', config.from_server_email_address)

    if 'domain' in params:
        check_domain_allowed(params['domain'])
    else:
        # Default the domain to neptune.
        params['domain'] = util.get_domain()
    # Either way, also make it available to email templates
    params['template_content']['domain'] = params['domain']

    return params
示例#2
0
def setup_agent4(hostname=None, domain=None, pc="1", agent_conf="files/puppet-agent.conf", puppetserver=None, proxy_url=None, hosts_file=None):
    """Setup Puppet 4 agent"""
    import package, util, config

    if not hostname:
        hostname = util.get_hostname()
    if not domain:
        domain = util.get_domain()

    install_puppetlabs_release_package(pc, proxy_url=proxy_url)
    package.install("puppet-agent")

    # Use puppetserver value from setting.ini file if none is given on the
    # command-line. If that fails use the default.
    if not puppetserver:
        try:    puppetserver = config.get("puppet", "puppetserver")
        except: puppetserver = None

    # Add a customized puppet.conf
    util.put_and_chown(agent_conf, "/etc/puppetlabs/puppet/puppet.conf")
    if puppetserver: server = puppetserver
    else:            server = "puppet.%s" % domain
    sudo("puppet config set --section agent server %s" % server)

    util.set_hostname(hostname + "." + domain)
    util.add_host_entry(util.get_ip(), hostname, domain)

    # Optionally add hosts from a separate file. This is useful when the IP of
    # the puppetmaster as seen from the Puppet agent node does not match its
    # name in DNS.
    util.add_host_entries(hosts_file)
    util.add_to_path("/opt/puppetlabs/bin")
    run_agent(noop="True", onlychanges="False")
示例#3
0
def aggregate_investor(source_investor):
    investor_id = source_investor["investorId"]
    if investor_id is not None:
        return investor_id

    name = source_investor["name"]
    website = source_investor["website"]
    if website.find("无") != -1:
        website = None
    domain = util.get_domain(website)

    investor = conn.get("select * from investor where name=%s", name)
    if investor is None and website is not None and website != "":
        investor = conn.get("select * from investor where website=%s", website)
    if investor is None and domain is not None and domain != "":
        investor = conn.get("select * from investor where domain=%s", domain)

    if investor is None:
        investor_id = conn.insert(
            "insert investor(name,website,domain,\
                            description,logo,stage,field,type,\
                            active,createTime,modifyTime) \
                            values(%s,%s,%s,\
                            %s,%s,%s,%s,%s,\
                            'Y',now(),now())", source_investor["name"],
            website, domain, source_investor["description"],
            source_investor["logo"], source_investor["stage"],
            source_investor["field"], 10020)
    else:
        investor_id = investor["id"]
    conn.update("update source_investor set investorId=%s where id=%s",
                investor_id, source_investor["id"])

    return investor_id
示例#4
0
def calc_EIstep(X_init, Y_init, batchsize, normalize, savepath, kernel):
    space = GPyOpt.core.task.space.Design_space(get_domain(normalize=normalize), None)
    if kernel == "RBF":
        model_gp = GPyOpt.models.GPModel(kernel=GPy.kern.RBF(input_dim=X_init.shape[1], ARD=True),ARD=True,verbose=False)
    elif kernel == "matern52":
        model_gp = GPyOpt.models.GPModel(ARD=True, verbose=False)
    objective = GPyOpt.core.task.SingleObjective(None)
    acquisition_optimizer = GPyOpt.optimization.AcquisitionOptimizer(space)
    acquisition_EI = GPyOpt.acquisitions.AcquisitionEI(model_gp, space, acquisition_optimizer, jitter=0)
    acquisition = GPyOpt.acquisitions.LP.AcquisitionLP(model_gp, space, acquisition_optimizer,acquisition_EI)
    evaluator = GPyOpt.core.evaluators.LocalPenalization(acquisition, batch_size=batchsize)
    
    bo_EI = GPyOpt.methods.ModularBayesianOptimization(
    model=model_gp,
    space=space,
    objective=objective,
    acquisition=acquisition,
    evaluator=evaluator,
    X_init=X_init,
    Y_init=Y_init,   
    normalize_Y=True
    )
    
    nextX = bo_EI.suggest_next_locations()
        
    if normalize:
        nextX = rescale(nextX)
    
    with open( savepath+"/model/EI_j"+".pkl", "wb") as f:
        pickle.dump(bo_EI, f, protocol=2)
    
    return nextX
示例#5
0
def main():
    with open("sample.json", encoding="utf-8") as f:
        input_json = json.load(f)
    # ページ単位のフィルタリング
    contents = [
        d["content"] for d in input_json
        if "/privacy/" not in d["url"] and "/en/" not in d["url"]
    ]
    tokenizer = Tokenizer()

    word_count = Counter()
    for sentence in contents:
        print("=" * 50)
        # print(sentence)
        li = [
            token.surface for token in tokenizer.tokenize(sentence)
            if token.part_of_speech.startswith("名詞")
        ]
        if "JavaScript" in li and ("無効" in li or "enable" in li):
            continue
        c = Counter(li)
        word_count += c
    pprint(word_count.most_common())
    from util import get_domain
    print(get_domain("https://news.yahoo.co.jp/"))
示例#6
0
def setup_agent5(hostname=None, domain=None, pc="1", agent_conf="files/puppet-agent5.conf", puppetserver=None,
proxy_url=None, hosts_file=None):
    """Setup Puppet 5 agent"""
    import package, util, config
    if not hostname:
        hostname = util.get_hostname()
    if not domain:
        domain = util.get_domain()
    install_puppetlabs_release_package(pc, proxy_url=proxy_url)
    package.install("puppet-agent")
    # Use puppetserver value from setting.ini file if none is given on the command-line. If that fails use the default.
    if not puppetserver:
        try: puppetserver = config.get("puppet", "puppetserver")
        except: puppetserver = None
    # Add a customized puppet.conf
    util.put_and_chown(agent_conf, "/etc/puppetlabs/puppet/puppet.conf")
    if puppetserver: server = puppetserver
    else: server = "puppet.%s" % domain
    sudo("puppet config set --section agent server %s" % server)
    util.set_hostname(hostname + "." + domain)
    util.add_host_entry(util.get_ip(), hostname, domain)
    # Optionally add hosts from a separate file. This is useful when the IP of the puppetmaster as seen from the Puppet agent 
    # node does not match its name in DNS.
    util.add_host_entries(hosts_file)
    util.add_to_path("/opt/puppetlabs/bin")
    run_agent(noop="False", onlychanges="False")
示例#7
0
def setup_agent4(hostname=None,
                 domain=None,
                 pc="1",
                 agent_conf="files/puppet-agent.conf",
                 proxy_url=None,
                 hosts_file=None):
    """Setup Puppet 4 agent"""
    import package, util

    if not hostname:
        hostname = util.get_hostname()
    if not domain:
        domain = util.get_domain()

    install_puppetlabs_release_package(pc, proxy_url=proxy_url)
    package.install("puppet-agent")
    util.put_and_chown(agent_conf, "/etc/puppetlabs/puppet/puppet.conf")
    util.set_hostname(hostname + "." + domain)
    util.add_host_entry("127.0.1.1", hostname, domain)

    # Optionally add hosts from a separate file. This is useful when the IP of
    # the puppetmaster does not match its name in DNS.
    util.add_host_entries(hosts_file)
    util.add_to_path("/opt/puppetlabs/bin")
    run_agent(noop="True", onlychanges="False")
示例#8
0
def mandrill_send(template_data={}, **kwargs):

    # Determine if message should send
    if util.is_development() and not config.should_deliver_smtp_dev:
        logging.info('Email not sent, check config!')
        return None

    subject = render(kwargs['subject'], **template_data)

    # Add in default template data
    template_data['to_address'] = kwargs.get('to_address', None)
    template_data['domain'] = util.get_domain()
    # Python keeps time to the microsecond, but we don't need it, and
    # it's easier to render as ISO 8601 without it.
    template_data['server_time'] = datetime.datetime.today().replace(microsecond=0)
    template_data['contact_email_address'] = config.from_server_email_address

    # Determine if using html string or a template
    html_body = None
    if 'html' in kwargs:
        html_body = kwargs['html']
    elif 'body' in kwargs:
        html_body = render(kwargs['body'], **template_data)
    elif 'template' in kwargs:
        html_body = render_template(kwargs['template'], **template_data)

    text_body = kwargs.get('text', None)

    if util.is_localhost() or util.is_testing():
        sender = _send_localhost_and_testing
    elif util.is_development():
        sender = _send_development
    else:
        sender = _send_production

    optional_mandrill_keys = ('from_address', 'reply_to', 'from_name')
    optional_mandrill_kwargs = {k: kwargs[k] for k in optional_mandrill_keys
                                if k in kwargs}

    return sender(kwargs['to_address'], subject, html_body, text_body,
                  kwargs.get('mandrill_template', None),
                  kwargs.get('mandrill_template_content', None),
                  kwargs.get('cc_address', None),
                  kwargs.get('bcc_address', None),
                  **optional_mandrill_kwargs)
示例#9
0
文件: puppet.py 项目: mattock/fabric
def setup_agent4(hostname=None, domain=None, pc="1", agent_conf="files/puppet-agent.conf", proxy_url=None, hosts_file=None):
    """Setup Puppet 4 agent"""
    import package, util

    if not hostname:
        hostname = util.get_hostname()
    if not domain:
        domain = util.get_domain()

    install_puppetlabs_release_package(pc, proxy_url=proxy_url)
    package.install("puppet-agent")
    util.put_and_chown(agent_conf, "/etc/puppetlabs/puppet/puppet.conf")
    util.set_hostname(hostname + "." + domain)
    util.add_host_entry("127.0.1.1", hostname, domain)

    # Optionally add hosts from a separate file. This is useful when the IP of
    # the puppetmaster does not match its name in DNS.
    util.add_host_entries(hosts_file)
    util.add_to_path("/opt/puppetlabs/bin")
    run_agent(noop="True", onlychanges="False")
示例#10
0
文件: puppet.py 项目: mattock/fabric
def setup_server4(hostname=None, domain=None, pc="1", forge_modules=["puppetlabs/stdlib", "puppetlabs/concat", "puppetlabs/firewall", "puppetlabs/apt"]):
    """Setup Puppet 4 server"""
    import package, util, git, service

    # Local files to copy over
    basedir = "/etc/puppetlabs"
    local_master_conf = "files/puppet-master.conf"
    remote_master_conf = basedir+"/puppet/puppet.conf"
    local_hiera_yaml = "files/hiera.yaml"
    remote_hiera_yaml = basedir+"/code/hiera.yaml"
    local_fileserver_conf = "files/fileserver.conf"
    remote_fileserver_conf = basedir+"/puppet/fileserver.conf"
    local_environments = "files/environments"
    remote_codedir = basedir+"/code"
    local_gitignore = "files/gitignore"
    remote_gitignore = basedir+"/.gitignore"
    modules_dir = basedir+"/code/environments/production/modules"

    # Verify that all the local files are in place
    try:
        open(local_master_conf)
        open(local_hiera_yaml)
    except IOError:
        print "ERROR: some local config files were missing!"
        sys.exit(1)

    # Autodetect hostname and domain from env.host, if they're not overridden
    # with method parameters
    if not hostname:
        hostname = util.get_hostname()
    if not domain:
        domain = util.get_domain()

    # Ensure that clock is correct before doing anything else, like creating SSL 
    # certificates.
    util.set_clock()

    # Start the install
    install_puppetlabs_release_package(pc)
    package.install("puppetserver")
    util.put_and_chown(local_master_conf, remote_master_conf)
    util.put_and_chown(local_hiera_yaml, remote_hiera_yaml)
    util.put_and_chown(local_fileserver_conf, remote_fileserver_conf)
    util.put_and_chown(local_gitignore, remote_gitignore)
    util.add_to_path("/opt/puppetlabs/bin")
    util.set_hostname(hostname + "." + domain)
    # "facter fqdn" return a silly name on EC2 without this
    util.add_host_entry("127.0.1.1", hostname, domain)

    # Copy over template environments
    util.put_and_chown(local_environments, remote_codedir)

    # Add modules from Puppet Forge. These should in my experience be limited to
    # those which provide new types and providers. In particular puppetlabs'
    # modules which control some daemon (puppetdb, postgresql, mysql) are
    # extremely complex, very prone to breakage and nasty to debug. 
    for module in forge_modules:
        add_forge_module(module)

    # Git setup
    git.install()
    git.init(basedir)
    if not exists(modules_dir):
        sudo("mkdir "+modules_dir)
    git.init(modules_dir)
    git.add_submodules(basedir=modules_dir)
    git.add_all(basedir)
    git.commit(basedir, "Initial commit")

    # Link hieradata and manifests from production to testing. This keeps the
    # testing environment identical to the production environment. The modules
    # directory in testing is separate and may (or may not) contain modules that
    # override or complement those in production.
    util.symlink(remote_codedir+"/environments/production/hieradata", remote_codedir+"/environments/testing/hieradata")
    util.symlink(remote_codedir+"/environments/production/manifests", remote_codedir+"/environments/testing/manifests")

    # Start puppetserver to generate the CA and server certificates/keys
    service.start("puppetserver")
    run_agent(noop="False")
示例#11
0
def display_company_profile():
    ''' Check if the selected company exists in FCCompany already. If not, calls API. 
    Returns all data on company '''

    selected_cb_comp_id = int(request.form.get('selectedCompanyId'))

    # create a company info dict to store cb & fc info into that can be jsonified
    selected_comp_info_dict = {}

    joined_cb_data = (
        m.CBCompany.query.
        options(joinedload(m.CBCompany.funding_rounds).
            joinedload(m.FundingRound.funding_type)).
        options(joinedload(m.CBCompany.company_markets).
            joinedload(m.CompanyMarket.market_type)).
        filter(m.CBCompany.cb_company_id == selected_cb_comp_id).
        first()
        )

    funding_rounds_lst = []

    # create list of funding rounds
    for item in joined_cb_data.funding_rounds:
        funding_round_dict = {
            'round_name': item.funding_type.funding_type_name.capitalize() + ' ' + item.funding_type.funding_type_code,
            'funded_amount': item.funded_amt,
            'funded_date': item.funded_date,
            'funding_type_id': item.funding_type_id,
        }
        funding_rounds_lst.append(funding_round_dict)

    # sort the list of funding dicts by date
    funding_rounds_lst.sort(key=lambda x: x['funded_date'])


    #data formatting to pass into dictionary for chart rendering
    color_lst = ['#e83e8c', '#20c997', '#6f42c1', '#fd7e14', '#2AA198', '#6610f2', 
        '#CB4B16', '#268BD2', '#fff']
    funding_round_labels = []
    funding_round_numbers =[]
    for item in funding_rounds_lst:
        funding_round_labels.append(item['round_name'] + ': $' + '{:,}'.format(int(item['funded_amount'])))
        funding_round_numbers.append(item['funded_amount'])

    selected_comp_info_dict['comp_funding_rounds_data'] = {
        'labels': funding_round_labels,
        'datasets': [
            {'data': funding_round_numbers, 
            'backgroundColor': color_lst[:len(funding_round_numbers)],
            'hoverBackgroundColor': color_lst[:len(funding_round_numbers)]}
            ]
        }
    # getting the funding rounds for similar market type/funding round companies
    # figure out latest round funding type id
    funding_type_id = funding_rounds_lst[-1]['funding_type_id']
    # figure out market type id
    market_types_lst = joined_cb_data.company_markets
    # get list of the market names for crunchbase info dict
    market_type_name_lst = []
    for item in market_types_lst:
        market_type_name_lst.append(item.market_type.market_type)

    # query 
    same_funding_type_rounds = (
        m.FundingRound.query.
        options(
            joinedload(m.FundingRound.cb_company).
            joinedload(m.CBCompany.company_markets).
            joinedload(m.CompanyMarket.market_type)).
            filter(m.FundingRound.funding_type_id == funding_type_id, 
                m.FundingRound.funded_amt != '').
        all())
    
    def num_of_months(d1, d2):
        ''' calculate the number of months between funding rounds '''
        return((d1.year - d2.year) * 12 + d1.month - d2.month)

    selected_comp_markets = joined_cb_data.company_markets
    market_ids_set = set()
    for item in selected_comp_markets:
        market_ids_set.add(item.market_type_id)

    same_market_and_funding_types = []

    # check for same market type in same_funding_type_round list
    for item in same_funding_type_rounds:
        item_markets = item.cb_company.company_markets
        for market in item_markets:
            if market.market_type_id in market_ids_set:
                same_market_and_funding_types.append(item)
    # print(len(same_market_and_funding_types))

    # get data in format for scatter chart
    funding_and_market_research_data = []
    for funding_round in same_market_and_funding_types:
        months_since_first_funding = num_of_months(funding_round.funded_date, funding_round.cb_company.first_funding)
        if months_since_first_funding != 0:
            funding_and_market_research_data.append({'x': months_since_first_funding, 'y': funding_round.funded_amt})

   
    selected_comp_info_dict['mrkt_funding_research'] = {
        'labels': ['Scatter'],
        'datasets': [{
            'label': 'Total {} funding raised since founding by similar market types'.format(funding_rounds_lst[-1]['round_name']),
            'data': funding_and_market_research_data,
            'fill': False,
            'backgroundColor': 'rgba(75,192,192,0.4)',
            'pointBorderColor': 'rgba(75,192,192,1)',
            'pointBackgroundColor': '#fff',
            'pointBorderWidth': 1,
            'pointHoverRadius': 5,
            'pointHoverBackgroundColor': 'rgba(75,192,192,1)',
            'pointHoverBorderColor': 'rgba(220,220,220,1)',
            'pointHoverBorderWidth': 2,
            'pointRadius': 1,
            'pointHitRadius': 10,
            }],
        'labels': []
        }

    # add crunchbase info to selected_comp_info_dict
    selected_comp_info_dict['crunchbase'] = [
        {'cb_comp_name': joined_cb_data.cb_company_name},
        {'comp_url': joined_cb_data.cb_url},
        {'state': joined_cb_data.state_code},
        {'city': joined_cb_data.city_name},
        {'funding_rounds': funding_rounds_lst},
        {'total_funding': '{:,}'.format(joined_cb_data.total_funding)},
        {'markets': ', '.join(market_type_name_lst)}
        ]

    # check to see if fc info already stored in db
    check_fc_comp_db = ( 
        m.FCCompany.query.
        options(joinedload(m.FCCompany.social_media)).
        options(joinedload(m.FCCompany.company_links)).
        options(joinedload(m.FCCompany.industries).
            joinedload(m.CompanyIndustry.industry_type)).
        options(joinedload(m.FCCompany.cb_company)).
        filter(m.FCCompany.cb_company_id == selected_cb_comp_id).
        first()
        )

    # function that takes in the joined fc company objects and turns into a lst of dicts
    def create_fc_comp_info_lst(comp_obj):
        
        social_media_lst = []
        company_links_lst = []
        industry_lst = []

        for item in comp_obj.social_media:
            social_media_site = [
                {'site_name': item.sm_name},
                {'site_url': item.sm_site_url},
                {'site_bio': item.sm_bio}
                ]
            social_media_lst.append(social_media_site)

        for item in comp_obj.company_links:
            company_link_item = [
                {'link_type': item.link_type},
                {'link_url': item.link_url}
            ]
            company_links_lst.append(company_link_item)

        for item in comp_obj.industries:
            company_industry_item = [
                {'industry_type': item.industry_type.industry_name}
            ]
            industry_lst.append(company_industry_item)

        fc_comp_info_lst = [
        {'fc_comp_name': comp_obj.fc_company_name},
        {'comp_domain': comp_obj.fc_company_domain},
        {'company_bio': comp_obj.fc_company_bio},
        {'logo_url': comp_obj.logo_image_url},
        {'founded': comp_obj.founded},
        {'employees': '{:,}'.format(int(comp_obj.num_employees))},
        {'social_media': social_media_lst},
        {'industries': industry_lst}
        ]

        return fc_comp_info_lst

    # checks if fc company exists in db or calls api and adds fc info to selected_comp_info_dict
    if check_fc_comp_db != None:
        joined_fc_data = create_fc_comp_info_lst(check_fc_comp_db)
        selected_comp_info_dict['fullcontact'] = joined_fc_data
    else:
        comp_domain = u.get_domain(joined_cb_data)
        
        api_comp_info = u.fetch_fc_company(comp_domain)

        # add to db
        u.load_fc_industry_types(api_comp_info)
        u.load_fc_company(api_comp_info, comp_domain, joined_cb_data.cb_company_id)
        u.db.session.commit()

        get_fc_comp_obj = ( 
            m.FCCompany.query.
            options(joinedload(m.FCCompany.social_media)).
            options(joinedload(m.FCCompany.company_links)).
            options(joinedload(m.FCCompany.industries).
                joinedload(m.CompanyIndustry.industry_type)).
            options(joinedload(m.FCCompany.cb_company)).
            filter(m.FCCompany.cb_company_id == selected_cb_comp_id).
            first()
        )

        joined_fc_data = create_fc_comp_info_lst(get_fc_comp_obj)

        selected_comp_info_dict['fullcontact'] = joined_fc_data

    # print(selected_comp_info_dict)

    return jsonify(selected_comp_info_dict)
示例#12
0
def aggregate(source_company_id):
    logger.info("source_company_id: %s" % source_company_id)
    s = conn.get("select * from source_company where id=%s", source_company_id)
    if s == None:
        return

    company_id = find_company(s)

    #company
    if company_id is not None:
        logger.info("Update company: %s" % s["fullName"])
        conn.update("update source_company set companyId=%s where id=%s",
                    company_id, source_company_id)

        company = conn.get("select * from company where id=%s", company_id)
        if company["code"] is None or company["code"] == "":
            code = aggregator_util.get_company_code(company["name"])
            conn.update("update company set code=%s where id=%s", code,
                        company_id)

        # 来自不同的源或不同项目, 如何合并?
        css = conn.query(
            "select * from source_company where companyId=%s and companyStatus!=2020",
            company_id)

        rank = sys.maxint
        selected = None
        if len(css) > 1:
            for cs in css:
                if cs["companyStatus"] == 2020:
                    continue
                if cs["description"] is None or cs["description"] == "":
                    continue

                sa = conn.query(
                    "select * from source_artifact where sourceCompanyId=%s and type=4010",
                    cs["id"])
                for a in sa:
                    domain = util.get_domain(a["link"])
                    if domain is None or domain == "":
                        continue

                    myRank = None
                    if a["rank"] is not None:
                        diff = datetime.datetime.today() - a["rankDate"]
                        if diff.days <= 3:
                            myRank = a["rank"]

                    if myRank is None:
                        alex = trends_tool.get_alexa(domain)
                        logger.info("%s, %s, %s" %
                                    (cs["name"], domain, alex["global_rank"]))
                        try:
                            if alex["global_rank"] == "-":
                                myRank = -1
                            else:
                                myRank = int(alex["global_rank"].replace(
                                    ",", ""))
                            conn.update(
                                "update source_artifact set rank=%s, rankDate=now() where id=%s",
                                myRank, a["id"])
                        except:
                            continue

                    if myRank != -1 and myRank < rank:
                        rank = myRank
                        selected = cs

            if selected is None:
                selected = s  # TODO
        else:
            selected = css[0]

        if selected is not None and selected["companyStatus"] != 2020:
            logger.info("selected=%s" % selected["name"])
            sql = "update company set \
                name=%s,fullName=%s,description=%s,brief=%s,\
                productDesc=%s, modelDesc=%s, operationDesc=%s, teamDesc=%s, marketDesc=%s, compititorDesc=%s, advantageDesc=%s, planDesc=%s, \
                round=%s,roundDesc=%s,companyStatus=%s,fundingType=%s,preMoney=%s,currency=%s,\
                locationId=%s,address=%s,phone=%s,establishDate=%s,logo=%s,\
                headCountMin=%s,headCountMax=%s,\
                modifyTime=now() \
                where id=%s"

            conn.update(
                sql, selected["name"], selected["fullName"],
                selected["description"], selected["brief"],
                selected.get("productDesc"), selected.get("modelDesc"),
                selected.get("operationDesc"), selected.get("teamDesc"),
                selected.get("marketDesc"), selected.get("compititorDesc"),
                selected.get("advantageDesc"), selected.get("planDesc"),
                selected["round"], selected["roundDesc"],
                selected["companyStatus"], selected["fundingType"],
                selected["preMoney"], selected["currency"],
                selected["locationId"], selected["address"], selected["phone"],
                selected["establishDate"], selected["logo"],
                selected["headCountMin"], selected["headCountMax"], company_id)
    else:
        logger.info("New company: %s" % s["fullName"])
        if s["companyStatus"] != 2020:
            code = aggregator_util.get_company_code(s["name"])
            sql = "insert company(code,name,fullName,description,brief,\
                productDesc, modelDesc, operationDesc, teamDesc, marketDesc, compititorDesc, advantageDesc, planDesc, \
                round,roundDesc,companyStatus,fundingType,preMoney,currency,\
                locationId,address,phone,establishDate,logo,\
                headCountMin,headCountMax,\
                active,createTime,modifyTime) \
                values(%s,%s,%s,%s,%s,\
                    %s,%s,%s,%s,%s,%s,%s,%s, \
                    %s,%s,%s,%s,%s,%s,\
                    %s,%s,%s,%s,%s,\
                    %s,%s,\
                    %s,now(),now())"

            company_id = conn.insert(sql, code, s["name"], s["fullName"],
                                     s["description"], s["brief"],
                                     s.get("productDesc"), s.get("modelDesc"),
                                     s.get("operationDesc"), s.get("teamDesc"),
                                     s.get("marketDesc"),
                                     s.get("compititorDesc"),
                                     s.get("advantageDesc"), s.get("planDesc"),
                                     s["round"], s["roundDesc"],
                                     s["companyStatus"], s["fundingType"],
                                     s["preMoney"], s["currency"],
                                     s["locationId"], s["address"], s["phone"],
                                     s["establishDate"], s["logo"],
                                     s["headCountMin"], s["headCountMax"], 'Y')

            conn.update("update source_company set companyId=%s where id=%s",
                        company_id, source_company_id)
        else:
            return

    # company_alias
    add_company_alias(company_id, s["fullName"])

    # domain & company_alias
    source_domains = conn.query(
        "select * from source_domain where sourceCompanyId=%s",
        source_company_id)
    for sd in source_domains:
        if sd["organizerType"] == "企业":
            add_company_alias(company_id, sd["organizer"])

        if sd["organizer"] is not None:
            domain = conn.get(
                "select * from domain where companyId=%s and domain=%s and organizer=%s",
                company_id, sd["domain"], sd["organizer"])
        else:
            domain = conn.get(
                "select * from domain where companyId=%s and domain=%s limit 1",
                company_id, sd["domain"])
        if domain is None:
            sql = "insert domain(companyId,domain,organizer,organizerType,beianhao,mainBeianhao,\
                    websiteName,homepage,beianDate,expire,\
                    active,createTime,modifyTime)\
                    values(%s,%s,%s,%s,%s,%s,\
                    %s,%s,%s,%s,\
                    'Y',now(),now())"
            conn.insert(sql,
                    company_id,
                    sd["domain"],sd["organizer"],sd["organizerType"],sd["beianhao"],sd["mainBeianhao"],\
                    sd["websiteName"],sd["homepage"],sd["beianDate"],sd["expire"]
                    )

    # footprint
    source_footprints = conn.query(
        "select * from source_footprint where sourceCompanyId=%s and footprintId is null",
        source_company_id)
    for sf in source_footprints:
        fp = conn.get(
            "select * from footprint where companyId=%s and footDate=%s and description=%s",
            company_id, sf["footDate"], sf["description"])
        if fp is None:
            sql = "insert footprint(companyId,footDate,description,active,createTime,modifyTime) \
                values(%s,%s,%s,'Y',now(),now())"

            footprint_id = conn.insert(sql, company_id, sf["footDate"],
                                       sf["description"])
        else:
            footprint_id = fp["id"]
        conn.update("update source_footprint set footprintId=%s where id=%s",
                    footprint_id, sf["id"])

    # establishDate
    company1 = conn.get("select * from company where id=%s", company_id)
    if company1["establishDate"] is None:
        gongshang = conn.get(
            "select g.* from gongshang_base g join company_alias a on g.companyAliasId=a.id \
                             join company c on a.companyId=c.id where c.id=%s order by g.establishTime limit 1",
            company_id)
        if gongshang is not None:
            conn.update("update company set establishDate=%s where id=%s",
                        gongshang["establishTime"], company_id)
        else:
            fp = conn.get(
                "select * from footprint where companyId=%s order by footDate limit 1",
                company_id)
            if fp is not None:
                conn.update("update company set establishDate=%s where id=%s",
                            fp["footDate"], company_id)

    # member
    rels = conn.query(
        "select * from source_company_member_rel where sourceCompanyId=%s",
        source_company_id)
    for rel in rels:
        if rel["companyMemberRelId"] is not None:
            # 已匹配
            continue

        source_member_id = rel["sourceMemberId"]
        source_member = conn.get("select * from source_member where id=%s",
                                 source_member_id)
        if source_member is None:
            continue

        member_id = source_member["memberId"]
        if member_id is None:
            member_id = aggregate_member(company_id, source_member)

        cmrel = conn.get(
            "select * from company_member_rel where companyId=%s and memberId=%s",
            company_id, member_id)
        if cmrel is None:
            cmrelId = conn.insert(
                "insert company_member_rel(\
                companyId,memberId,position,joinDate,leaveDate,type,\
                active,createTime,modifyTime) \
                values(%s,%s,%s,%s,%s,%s,'Y',now(),now())", company_id,
                member_id, rel["position"], rel["joinDate"], rel["leaveDate"],
                rel["type"])
        else:
            cmrelId = cmrel["id"]

        conn.update(
            "update source_company_member_rel set companyMemberRelId=%s where id=%s",
            cmrelId, rel["id"])

    # funding & investor
    sfs = conn.query("select * from source_funding where sourceCompanyId=%s",
                     source_company_id)
    for sf in sfs:
        if sf["fundingId"] is None:
            #f = conn.get("select * from funding where companyId=%s and round=%s and roundDesc=%s",
            #             company_id, sf["round"], sf["roundDesc"])
            f = conn.get(
                "select * from funding where companyId=%s and round=%s limit 1",
                company_id, sf["round"])
            if f is None:
                sql = "insert funding(companyId,preMoney,postMoney,investment,\
                            round,roundDesc,currency,precise,fundingDate,fundingType,\
                            active,createTime,modifyTime) \
                        values(%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,'Y',now(),now())"

                fundingId = conn.insert(sql, company_id, sf["preMoney"],
                                        sf["postMoney"], sf["investment"],
                                        sf["round"], sf["roundDesc"],
                                        sf["currency"], sf["precise"],
                                        sf["fundingDate"], 8030)
            else:
                fundingId = f["id"]
            conn.update("update source_funding set fundingId=%s where id=%s",
                        fundingId, sf["id"])
        else:
            fundingId = sf["fundingId"]

        sfirs = conn.query(
            "select * from source_funding_investor_rel where sourceFundingId=%s",
            sf["id"])
        for sfir in sfirs:
            if sfir["fundingInvestorRelId"] is not None:
                continue

            source_investor = conn.get(
                "select * from source_investor where id=%s",
                sfir["sourceInvestorId"])
            if source_investor is None:
                continue
            investor_id = aggregate_investor(source_investor)

            funding_investor_rel = conn.get(
                "select * from funding_investor_rel \
                                where investorId=%s and fundingId=%s",
                investor_id, fundingId)
            if funding_investor_rel is None:
                sql = "insert funding_investor_rel(fundingId, investorId, currency, investment,\
                        precise,active,createTime,modifyTime) \
                        values(%s,%s,%s,%s,%s,'Y',now(),now())"

                fundingInvestorRelId = conn.insert(sql, fundingId, investor_id,
                                                   sfir["currency"],
                                                   sfir["investment"],
                                                   sfir["precise"])
            else:
                fundingInvestorRelId = funding_investor_rel["id"]

            conn.update(
                "update source_funding_investor_rel set fundingInvestorRelId=%s where id=%s",
                fundingInvestorRelId, sfir["id"])

    # update company stage
    funding = conn.get(
        "select * from funding where companyId=%s order by round desc, fundingDate desc limit 1",
        company_id)
    if funding is not None:
        conn.update("update company set round=%s, roundDesc=%s where id=%s",
                    funding["round"], funding["roundDesc"], company_id)

    # artifact
    sas = conn.query("select * from source_artifact where sourceCompanyId=%s",
                     source_company_id)
    for sa in sas:
        if sa["artifactId"] is not None:
            continue
        if sa["type"] == 4010:
            continue
            '''
            if sa["link"] is not None and sa["link"] != "":
                link = util.norm_url(sa["link"])
                a = conn.get("select * from artifact where type=4010 and (name=%s or link=%s)", sa["name"], link)
                if a is None:
                    sql = "insert artifact(companyId,name,description,link,type,active,createTime,modifyTime) \
                            values(%s,%s,%s,%s,4010,'Y',now(),now())"
                    artifact_id = conn.insert(sql,
                            company_id,sa["name"],sa["description"],link
                                )
                else:
                    artifact_id = a["id"]
                conn.update("update source_artifact set artifactId=%s where id=%s",
                            artifact_id, sa["id"])
            '''
        else:
            a = conn.get("select * from artifact where type=%s and name=%s",
                         sa["type"], sa["name"])
            if a is None:
                sql = "insert artifact(companyId,name,description,link,type,active,createTime,modifyTime) \
                        values(%s,%s,%s,%s,%s,'Y',now(),now())"

                artifact_id = conn.insert(sql, company_id, sa["name"],
                                          sa["description"], sa["link"],
                                          sa["type"])
            else:
                artifact_id = a["id"]
            conn.update("update source_artifact set artifactId=%s where id=%s",
                        artifact_id, sa["id"])

    domains = conn.query("select * from domain where companyId=%s", company_id)
    for domain in domains:
        str = domain["homepage"]
        if str is None:
            continue
        homepages = str.split(",")
        for h in homepages:
            logger.info(h)
            homepage = conn.get(
                "select * from homepage where originalHomepage=%s", h)

            lastHomepage = None
            tags = None
            desc = None
            if homepage is None:
                url = "http://" + h
                (flag, r) = my_request.get_no_sesion(logger, url)
                if flag == -1:
                    conn.insert(
                        "insert homepage(companyId,originalHomepage,status,createTime,modifyTime) \
                            values(%s,%s,%s,now(),now())", company_id, h, -1)
                else:
                    logger.info("status=%s, url=%s" % (r.status_code, r.url))
                    netloc = urlsplit(r.url).netloc
                    conn.insert(
                        "insert homepage(companyId,originalHomepage,lastHomepage,status,createTime,modifyTime) \
                            values(%s,%s,%s,%s,now(),now())", company_id, h,
                        netloc, r.status_code)
                    lastHomepage = "http://" + netloc
                    r.encoding = r.apparent_encoding
                    d = pq(r.text)
                    tags = d('meta[name="keywords"]').attr('content')
                    desc = d('meta[name="description"]').attr('content')
            else:
                if homepage["lastHomepage"] is not None:
                    lastHomepage = "http://" + homepage["lastHomepage"]

            if lastHomepage is not None:
                a = conn.get(
                    "select * from artifact where type=4010 and link=%s",
                    lastHomepage)
                if a is None:
                    sql = "insert artifact(companyId,name,link,type,active,createTime,modifyTime,domain,alexa,tags,description) \
                            values(%s,null,%s,4010,'Y',now(),now(),%s,'Y',%s,%s)"

                    artifact_id = conn.insert(sql, company_id, lastHomepage,
                                              util.get_domain(lastHomepage),
                                              tags, desc)
                else:
                    sql = "update artifact set domain=%s, alexa='Y',tags=%s, description=%s where id=%s"
                    conn.update(sql, util.get_domain(lastHomepage), tags, desc,
                                a["id"])

    # news
    aggregator_util.merge_news(source_company_id, company_id, conn)

    # job
    aggregator_util.merge_job(source_company_id, company_id, conn)

    # others
    company = conn.get('select * from company where id = %s', company_id)
    full_name = company['fullName']
    name = company['name']

    # aggregator_util.merge_weibo(company_id, name, full_name, conn)
    # aggregator_util.merge_wechat(company_id, name, full_name, conn)

    # result = trends_tool.haosou_news(result['name'])

    msg = {"type": "company", "id": company_id}
    flag = False
    while flag == False:
        try:
            kafkaProducer.send_messages("aggregator_v2", json.dumps(msg))
            flag = True
        except Exception, e:
            logger.exception(e)
            time.sleep(60)
示例#13
0
        html_parsed= app.get("html_parsed")
        if html_parsed is None:
            continue
        json_content = app.get("json")
        if json_content is None:
            continue

        urls = html_parsed.get('urls')
        if urls == None:
            continue

        companyId = app.get("companyId")
        if companyId is None:
            for url in urls:
                try:
                    domain = util.get_domain(url)
                except:
                    continue

                if domain is None:
                    continue

                d = conn.get("select * from domain where domain=%s limit 1",domain)
                if d is not None:
                    companyId = d["companyId"]
                    break

        if companyId is not None:
            logger.info("companyId=%s, app=%s" % (companyId,app["url"]))
            a = conn.get("select * from artifact where type=4040 and domain=%s limit 1", app["appId"])
            if a is None:
示例#14
0
def setup_server4(hostname=None,
                  domain=None,
                  pc="1",
                  forge_modules=[
                      "puppetlabs/stdlib", "puppetlabs/concat",
                      "puppetlabs/firewall", "puppetlabs/apt"
                  ]):
    """Setup Puppet 4 server"""
    import package, util, git, service

    # Local files to copy over
    basedir = "/etc/puppetlabs"
    local_master_conf = "files/puppet-master.conf"
    remote_master_conf = basedir + "/puppet/puppet.conf"
    local_hiera_yaml = "files/hiera.yaml"
    remote_hiera_yaml = basedir + "/code/hiera.yaml"
    local_fileserver_conf = "files/fileserver.conf"
    remote_fileserver_conf = basedir + "/puppet/fileserver.conf"
    local_environments = "files/environments"
    remote_codedir = basedir + "/code"
    local_gitignore = "files/gitignore"
    remote_gitignore = basedir + "/.gitignore"
    modules_dir = basedir + "/code/environments/production/modules"

    # Verify that all the local files are in place
    try:
        open(local_master_conf)
        open(local_hiera_yaml)
    except IOError:
        print "ERROR: some local config files were missing!"
        sys.exit(1)

    # Autodetect hostname and domain from env.host, if they're not overridden
    # with method parameters
    if not hostname:
        hostname = util.get_hostname()
    if not domain:
        domain = util.get_domain()

    # Ensure that clock is correct before doing anything else, like creating SSL
    # certificates.
    util.set_clock()

    # Start the install
    install_puppetlabs_release_package(pc)
    package.install("puppetserver")
    util.put_and_chown(local_master_conf, remote_master_conf)
    util.put_and_chown(local_hiera_yaml, remote_hiera_yaml)
    util.put_and_chown(local_fileserver_conf, remote_fileserver_conf)
    util.put_and_chown(local_gitignore, remote_gitignore)
    util.add_to_path("/opt/puppetlabs/bin")
    util.set_hostname(hostname + "." + domain)
    # "facter fqdn" return a silly name on EC2 without this
    util.add_host_entry("127.0.1.1", hostname, domain)

    # Copy over template environments
    util.put_and_chown(local_environments, remote_codedir)

    # Add modules from Puppet Forge. These should in my experience be limited to
    # those which provide new types and providers. In particular puppetlabs'
    # modules which control some daemon (puppetdb, postgresql, mysql) are
    # extremely complex, very prone to breakage and nasty to debug.
    for module in forge_modules:
        add_forge_module(module)

    # Git setup
    git.install()
    git.init(basedir)
    if not exists(modules_dir):
        sudo("mkdir " + modules_dir)
    git.init(modules_dir)
    git.add_submodules(basedir=modules_dir)
    git.add_all(basedir)
    git.commit(basedir, "Initial commit")

    # Link hieradata and manifests from production to testing. This keeps the
    # testing environment identical to the production environment. The modules
    # directory in testing is separate and may (or may not) contain modules that
    # override or complement those in production.
    util.symlink(remote_codedir + "/environments/production/hieradata",
                 remote_codedir + "/environments/testing/hieradata")
    util.symlink(remote_codedir + "/environments/production/manifests",
                 remote_codedir + "/environments/testing/manifests")

    # Start puppetserver to generate the CA and server certificates/keys
    service.start("puppetserver")
    run_agent(noop="False")
示例#15
0
 def get_view_fullurl(self):
   return urlparse.urljoin(util.get_domain(), self.get_view_url())
def aggregate(source_company_id):
    logger.info("source_company_id: %s" % source_company_id)
    s = conn.get("select * from source_company where id=%s", source_company_id)
    if s is None:
        return

    company_id = find_company(s)

    #company
    if company_id is not None:
        logger.info("Update company: %s" % s["name"])
    else:
        logger.info("New company: %s" % s["name"])
        if s["companyStatus"] != 2020:
            code = get_company_code(s["name"])
            sql = "insert company(code,name,fullName,description,brief,\
                productDesc, modelDesc, operationDesc, teamDesc, marketDesc, compititorDesc, advantageDesc, planDesc, \
                round,roundDesc,companyStatus,fundingType,preMoney,currency,\
                locationId,address,phone,establishDate,logo,type,\
                headCountMin,headCountMax,\
                active,createTime,modifyTime) \
                values(%s,%s,%s,%s,%s,\
                    %s,%s,%s,%s,%s,%s,%s,%s, \
                    %s,%s,%s,%s,%s,%s,\
                    %s,%s,%s,%s,%s,41020,\
                    %s,%s,\
                    %s,now(),now())"

            company_id = conn.insert(sql, code, s["name"], s["fullName"],
                                     s["description"], s["brief"],
                                     s.get("productDesc"), s.get("modelDesc"),
                                     s.get("operationDesc"), s.get("teamDesc"),
                                     s.get("marketDesc"),
                                     s.get("compititorDesc"),
                                     s.get("advantageDesc"), s.get("planDesc"),
                                     s["round"], s["roundDesc"],
                                     s["companyStatus"], s["fundingType"],
                                     s["preMoney"], s["currency"],
                                     s["locationId"], s["address"], s["phone"],
                                     s["establishDate"], s["logo"],
                                     s["headCountMin"], s["headCountMax"], 'Y')
        else:
            return

    logger.info("companyId=%s", company_id)
    conn.update("update source_company set companyId=%s where id=%s",
                company_id, source_company_id)

    # company_alias
    add_company_alias(company_id, s["fullName"])

    # domain & company_alias
    source_domains = conn.query(
        "select * from source_domain where sourceCompanyId=%s",
        source_company_id)
    for sd in source_domains:
        if sd["organizerType"] == "企业":
            add_company_alias(company_id, sd["organizer"])

        if sd["organizer"] is not None:
            domain = conn.get(
                "select * from domain where companyId=%s and domain=%s and organizer=%s",
                company_id, sd["domain"], sd["organizer"])
        else:
            domain = conn.get(
                "select * from domain where companyId=%s and domain=%s limit 1",
                company_id, sd["domain"])
        if domain is None:
            sql = "insert domain(companyId,domain,organizer,organizerType,beianhao,mainBeianhao,\
                    websiteName,homepage,beianDate,expire,\
                    active,createTime,modifyTime)\
                    values(%s,%s,%s,%s,%s,%s,\
                    %s,%s,%s,%s,\
                    'Y',now(),now())"
            conn.insert(sql,
                    company_id,
                    sd["domain"],sd["organizer"],sd["organizerType"],sd["beianhao"],sd["mainBeianhao"],\
                    sd["websiteName"],sd["homepage"],sd["beianDate"],sd["expire"]
                    )
        #TODO expire处理

    # artifact
    sas = conn.query("select * from source_artifact where sourceCompanyId=%s",
                     source_company_id)
    for sa in sas:
        if sa["artifactId"] is not None:
            continue
        if sa["type"] == 4010:  #website
            if sa["link"] is not None and sa["link"] != "":
                link = util.norm_url(sa["link"])
                try:
                    domain = util.get_domain(link)
                except:
                    continue
                a = conn.get(
                    "select * from artifact where companyId=%s and type=4010 and (name=%s or link=%s) limit 1",
                    company_id, sa["name"], link)
                if a is None:
                    sql = "insert artifact(companyId,name,description,link,domain,type,active,createTime,modifyTime) \
                            values(%s,%s,%s,%s,%s,4010,'Y',now(),now())"

                    artifact_id = conn.insert(sql, company_id, sa["name"],
                                              sa["description"], link, domain)
                else:
                    artifact_id = a["id"]
                conn.update(
                    "update source_artifact set artifactId=%s where id=%s",
                    artifact_id, sa["id"])
        elif sa["type"] == 4040:  #itunes
            result = util.re_get_result('id(\d*)', sa["link"])
            if result is None:
                continue
            app_id, = result

            a = conn.get(
                "select * from artifact where type=4040 and domain=%s", app_id)
            if a is None:
                sql = "insert artifact(companyId,name,description,link,domain,type,active,createTime,modifyTime) \
                        values(%s,%s,%s,%s,%s,4040,'Y',now(),now())"

                artifact_id = conn.insert(sql, company_id, sa["name"],
                                          sa["description"], sa["link"],
                                          app_id)
            else:
                artifact_id = a["id"]
            conn.update("update source_artifact set artifactId=%s where id=%s",
                        artifact_id, sa["id"])
        elif sa["type"] == 4050:  #android
            package = None
            type, market = util.get_market(sa["link"])
            if market == 16030:  #wandoujia
                result = util.re_get_result('wandoujia.com/apps/(.*)',
                                            sa["link"])
                if result is None:
                    continue
                package, = result
            elif market == 16040:
                result = util.re_get_result('apkName=(.*)', sa["link"])
                if result is None:
                    continue
                package, = result
            else:
                continue
            a = conn.get(
                "select * from artifact where type=4050 and domain=%s",
                package)
            if a is None:
                sql = "insert artifact(companyId,name,description,link,domain,type,active,createTime,modifyTime) \
                        values(%s,%s,%s,%s,%s,4050,'Y',now(),now())"

                artifact_id = conn.insert(sql, company_id, sa["name"],
                                          sa["description"], sa["link"],
                                          package)
            else:
                artifact_id = a["id"]
            conn.update("update source_artifact set artifactId=%s where id=%s",
                        artifact_id, sa["id"])

    msg = {"type": "company", "id": company_id}
    flag = False
    while flag == False:
        try:
            kafkaProducer.send_messages("aggregator_v2", json.dumps(msg))
            flag = True
        except Exception, e:
            logger.exception(e)
            time.sleep(60)