def apply_auth_defaults(params): params = params.copy() if 'platform' not in params: # Default to neptune params['platform'] = config.platform_name if 'from_name' not in params: if params['platform'] == 'neptune': params['from_name'] = 'PERTS' if params['platform'] == 'triton': params['from_name'] = 'Copilot' if 'template_content' not in params: params['template_content'] = {} if 'contact_email_address' not in params['template_content']: params['template_content']['contact_email_address'] = \ params.get('from_address', config.from_server_email_address) if 'domain' in params: check_domain_allowed(params['domain']) else: # Default the domain to neptune. params['domain'] = util.get_domain() # Either way, also make it available to email templates params['template_content']['domain'] = params['domain'] return params
def setup_agent4(hostname=None, domain=None, pc="1", agent_conf="files/puppet-agent.conf", puppetserver=None, proxy_url=None, hosts_file=None): """Setup Puppet 4 agent""" import package, util, config if not hostname: hostname = util.get_hostname() if not domain: domain = util.get_domain() install_puppetlabs_release_package(pc, proxy_url=proxy_url) package.install("puppet-agent") # Use puppetserver value from setting.ini file if none is given on the # command-line. If that fails use the default. if not puppetserver: try: puppetserver = config.get("puppet", "puppetserver") except: puppetserver = None # Add a customized puppet.conf util.put_and_chown(agent_conf, "/etc/puppetlabs/puppet/puppet.conf") if puppetserver: server = puppetserver else: server = "puppet.%s" % domain sudo("puppet config set --section agent server %s" % server) util.set_hostname(hostname + "." + domain) util.add_host_entry(util.get_ip(), hostname, domain) # Optionally add hosts from a separate file. This is useful when the IP of # the puppetmaster as seen from the Puppet agent node does not match its # name in DNS. util.add_host_entries(hosts_file) util.add_to_path("/opt/puppetlabs/bin") run_agent(noop="True", onlychanges="False")
def aggregate_investor(source_investor): investor_id = source_investor["investorId"] if investor_id is not None: return investor_id name = source_investor["name"] website = source_investor["website"] if website.find("无") != -1: website = None domain = util.get_domain(website) investor = conn.get("select * from investor where name=%s", name) if investor is None and website is not None and website != "": investor = conn.get("select * from investor where website=%s", website) if investor is None and domain is not None and domain != "": investor = conn.get("select * from investor where domain=%s", domain) if investor is None: investor_id = conn.insert( "insert investor(name,website,domain,\ description,logo,stage,field,type,\ active,createTime,modifyTime) \ values(%s,%s,%s,\ %s,%s,%s,%s,%s,\ 'Y',now(),now())", source_investor["name"], website, domain, source_investor["description"], source_investor["logo"], source_investor["stage"], source_investor["field"], 10020) else: investor_id = investor["id"] conn.update("update source_investor set investorId=%s where id=%s", investor_id, source_investor["id"]) return investor_id
def calc_EIstep(X_init, Y_init, batchsize, normalize, savepath, kernel): space = GPyOpt.core.task.space.Design_space(get_domain(normalize=normalize), None) if kernel == "RBF": model_gp = GPyOpt.models.GPModel(kernel=GPy.kern.RBF(input_dim=X_init.shape[1], ARD=True),ARD=True,verbose=False) elif kernel == "matern52": model_gp = GPyOpt.models.GPModel(ARD=True, verbose=False) objective = GPyOpt.core.task.SingleObjective(None) acquisition_optimizer = GPyOpt.optimization.AcquisitionOptimizer(space) acquisition_EI = GPyOpt.acquisitions.AcquisitionEI(model_gp, space, acquisition_optimizer, jitter=0) acquisition = GPyOpt.acquisitions.LP.AcquisitionLP(model_gp, space, acquisition_optimizer,acquisition_EI) evaluator = GPyOpt.core.evaluators.LocalPenalization(acquisition, batch_size=batchsize) bo_EI = GPyOpt.methods.ModularBayesianOptimization( model=model_gp, space=space, objective=objective, acquisition=acquisition, evaluator=evaluator, X_init=X_init, Y_init=Y_init, normalize_Y=True ) nextX = bo_EI.suggest_next_locations() if normalize: nextX = rescale(nextX) with open( savepath+"/model/EI_j"+".pkl", "wb") as f: pickle.dump(bo_EI, f, protocol=2) return nextX
def main(): with open("sample.json", encoding="utf-8") as f: input_json = json.load(f) # ページ単位のフィルタリング contents = [ d["content"] for d in input_json if "/privacy/" not in d["url"] and "/en/" not in d["url"] ] tokenizer = Tokenizer() word_count = Counter() for sentence in contents: print("=" * 50) # print(sentence) li = [ token.surface for token in tokenizer.tokenize(sentence) if token.part_of_speech.startswith("名詞") ] if "JavaScript" in li and ("無効" in li or "enable" in li): continue c = Counter(li) word_count += c pprint(word_count.most_common()) from util import get_domain print(get_domain("https://news.yahoo.co.jp/"))
def setup_agent5(hostname=None, domain=None, pc="1", agent_conf="files/puppet-agent5.conf", puppetserver=None, proxy_url=None, hosts_file=None): """Setup Puppet 5 agent""" import package, util, config if not hostname: hostname = util.get_hostname() if not domain: domain = util.get_domain() install_puppetlabs_release_package(pc, proxy_url=proxy_url) package.install("puppet-agent") # Use puppetserver value from setting.ini file if none is given on the command-line. If that fails use the default. if not puppetserver: try: puppetserver = config.get("puppet", "puppetserver") except: puppetserver = None # Add a customized puppet.conf util.put_and_chown(agent_conf, "/etc/puppetlabs/puppet/puppet.conf") if puppetserver: server = puppetserver else: server = "puppet.%s" % domain sudo("puppet config set --section agent server %s" % server) util.set_hostname(hostname + "." + domain) util.add_host_entry(util.get_ip(), hostname, domain) # Optionally add hosts from a separate file. This is useful when the IP of the puppetmaster as seen from the Puppet agent # node does not match its name in DNS. util.add_host_entries(hosts_file) util.add_to_path("/opt/puppetlabs/bin") run_agent(noop="False", onlychanges="False")
def setup_agent4(hostname=None, domain=None, pc="1", agent_conf="files/puppet-agent.conf", proxy_url=None, hosts_file=None): """Setup Puppet 4 agent""" import package, util if not hostname: hostname = util.get_hostname() if not domain: domain = util.get_domain() install_puppetlabs_release_package(pc, proxy_url=proxy_url) package.install("puppet-agent") util.put_and_chown(agent_conf, "/etc/puppetlabs/puppet/puppet.conf") util.set_hostname(hostname + "." + domain) util.add_host_entry("127.0.1.1", hostname, domain) # Optionally add hosts from a separate file. This is useful when the IP of # the puppetmaster does not match its name in DNS. util.add_host_entries(hosts_file) util.add_to_path("/opt/puppetlabs/bin") run_agent(noop="True", onlychanges="False")
def mandrill_send(template_data={}, **kwargs): # Determine if message should send if util.is_development() and not config.should_deliver_smtp_dev: logging.info('Email not sent, check config!') return None subject = render(kwargs['subject'], **template_data) # Add in default template data template_data['to_address'] = kwargs.get('to_address', None) template_data['domain'] = util.get_domain() # Python keeps time to the microsecond, but we don't need it, and # it's easier to render as ISO 8601 without it. template_data['server_time'] = datetime.datetime.today().replace(microsecond=0) template_data['contact_email_address'] = config.from_server_email_address # Determine if using html string or a template html_body = None if 'html' in kwargs: html_body = kwargs['html'] elif 'body' in kwargs: html_body = render(kwargs['body'], **template_data) elif 'template' in kwargs: html_body = render_template(kwargs['template'], **template_data) text_body = kwargs.get('text', None) if util.is_localhost() or util.is_testing(): sender = _send_localhost_and_testing elif util.is_development(): sender = _send_development else: sender = _send_production optional_mandrill_keys = ('from_address', 'reply_to', 'from_name') optional_mandrill_kwargs = {k: kwargs[k] for k in optional_mandrill_keys if k in kwargs} return sender(kwargs['to_address'], subject, html_body, text_body, kwargs.get('mandrill_template', None), kwargs.get('mandrill_template_content', None), kwargs.get('cc_address', None), kwargs.get('bcc_address', None), **optional_mandrill_kwargs)
def setup_server4(hostname=None, domain=None, pc="1", forge_modules=["puppetlabs/stdlib", "puppetlabs/concat", "puppetlabs/firewall", "puppetlabs/apt"]): """Setup Puppet 4 server""" import package, util, git, service # Local files to copy over basedir = "/etc/puppetlabs" local_master_conf = "files/puppet-master.conf" remote_master_conf = basedir+"/puppet/puppet.conf" local_hiera_yaml = "files/hiera.yaml" remote_hiera_yaml = basedir+"/code/hiera.yaml" local_fileserver_conf = "files/fileserver.conf" remote_fileserver_conf = basedir+"/puppet/fileserver.conf" local_environments = "files/environments" remote_codedir = basedir+"/code" local_gitignore = "files/gitignore" remote_gitignore = basedir+"/.gitignore" modules_dir = basedir+"/code/environments/production/modules" # Verify that all the local files are in place try: open(local_master_conf) open(local_hiera_yaml) except IOError: print "ERROR: some local config files were missing!" sys.exit(1) # Autodetect hostname and domain from env.host, if they're not overridden # with method parameters if not hostname: hostname = util.get_hostname() if not domain: domain = util.get_domain() # Ensure that clock is correct before doing anything else, like creating SSL # certificates. util.set_clock() # Start the install install_puppetlabs_release_package(pc) package.install("puppetserver") util.put_and_chown(local_master_conf, remote_master_conf) util.put_and_chown(local_hiera_yaml, remote_hiera_yaml) util.put_and_chown(local_fileserver_conf, remote_fileserver_conf) util.put_and_chown(local_gitignore, remote_gitignore) util.add_to_path("/opt/puppetlabs/bin") util.set_hostname(hostname + "." + domain) # "facter fqdn" return a silly name on EC2 without this util.add_host_entry("127.0.1.1", hostname, domain) # Copy over template environments util.put_and_chown(local_environments, remote_codedir) # Add modules from Puppet Forge. These should in my experience be limited to # those which provide new types and providers. In particular puppetlabs' # modules which control some daemon (puppetdb, postgresql, mysql) are # extremely complex, very prone to breakage and nasty to debug. for module in forge_modules: add_forge_module(module) # Git setup git.install() git.init(basedir) if not exists(modules_dir): sudo("mkdir "+modules_dir) git.init(modules_dir) git.add_submodules(basedir=modules_dir) git.add_all(basedir) git.commit(basedir, "Initial commit") # Link hieradata and manifests from production to testing. This keeps the # testing environment identical to the production environment. The modules # directory in testing is separate and may (or may not) contain modules that # override or complement those in production. util.symlink(remote_codedir+"/environments/production/hieradata", remote_codedir+"/environments/testing/hieradata") util.symlink(remote_codedir+"/environments/production/manifests", remote_codedir+"/environments/testing/manifests") # Start puppetserver to generate the CA and server certificates/keys service.start("puppetserver") run_agent(noop="False")
def display_company_profile(): ''' Check if the selected company exists in FCCompany already. If not, calls API. Returns all data on company ''' selected_cb_comp_id = int(request.form.get('selectedCompanyId')) # create a company info dict to store cb & fc info into that can be jsonified selected_comp_info_dict = {} joined_cb_data = ( m.CBCompany.query. options(joinedload(m.CBCompany.funding_rounds). joinedload(m.FundingRound.funding_type)). options(joinedload(m.CBCompany.company_markets). joinedload(m.CompanyMarket.market_type)). filter(m.CBCompany.cb_company_id == selected_cb_comp_id). first() ) funding_rounds_lst = [] # create list of funding rounds for item in joined_cb_data.funding_rounds: funding_round_dict = { 'round_name': item.funding_type.funding_type_name.capitalize() + ' ' + item.funding_type.funding_type_code, 'funded_amount': item.funded_amt, 'funded_date': item.funded_date, 'funding_type_id': item.funding_type_id, } funding_rounds_lst.append(funding_round_dict) # sort the list of funding dicts by date funding_rounds_lst.sort(key=lambda x: x['funded_date']) #data formatting to pass into dictionary for chart rendering color_lst = ['#e83e8c', '#20c997', '#6f42c1', '#fd7e14', '#2AA198', '#6610f2', '#CB4B16', '#268BD2', '#fff'] funding_round_labels = [] funding_round_numbers =[] for item in funding_rounds_lst: funding_round_labels.append(item['round_name'] + ': $' + '{:,}'.format(int(item['funded_amount']))) funding_round_numbers.append(item['funded_amount']) selected_comp_info_dict['comp_funding_rounds_data'] = { 'labels': funding_round_labels, 'datasets': [ {'data': funding_round_numbers, 'backgroundColor': color_lst[:len(funding_round_numbers)], 'hoverBackgroundColor': color_lst[:len(funding_round_numbers)]} ] } # getting the funding rounds for similar market type/funding round companies # figure out latest round funding type id funding_type_id = funding_rounds_lst[-1]['funding_type_id'] # figure out market type id market_types_lst = joined_cb_data.company_markets # get list of the market names for crunchbase info dict market_type_name_lst = [] for item in market_types_lst: market_type_name_lst.append(item.market_type.market_type) # query same_funding_type_rounds = ( m.FundingRound.query. options( joinedload(m.FundingRound.cb_company). joinedload(m.CBCompany.company_markets). joinedload(m.CompanyMarket.market_type)). filter(m.FundingRound.funding_type_id == funding_type_id, m.FundingRound.funded_amt != ''). all()) def num_of_months(d1, d2): ''' calculate the number of months between funding rounds ''' return((d1.year - d2.year) * 12 + d1.month - d2.month) selected_comp_markets = joined_cb_data.company_markets market_ids_set = set() for item in selected_comp_markets: market_ids_set.add(item.market_type_id) same_market_and_funding_types = [] # check for same market type in same_funding_type_round list for item in same_funding_type_rounds: item_markets = item.cb_company.company_markets for market in item_markets: if market.market_type_id in market_ids_set: same_market_and_funding_types.append(item) # print(len(same_market_and_funding_types)) # get data in format for scatter chart funding_and_market_research_data = [] for funding_round in same_market_and_funding_types: months_since_first_funding = num_of_months(funding_round.funded_date, funding_round.cb_company.first_funding) if months_since_first_funding != 0: funding_and_market_research_data.append({'x': months_since_first_funding, 'y': funding_round.funded_amt}) selected_comp_info_dict['mrkt_funding_research'] = { 'labels': ['Scatter'], 'datasets': [{ 'label': 'Total {} funding raised since founding by similar market types'.format(funding_rounds_lst[-1]['round_name']), 'data': funding_and_market_research_data, 'fill': False, 'backgroundColor': 'rgba(75,192,192,0.4)', 'pointBorderColor': 'rgba(75,192,192,1)', 'pointBackgroundColor': '#fff', 'pointBorderWidth': 1, 'pointHoverRadius': 5, 'pointHoverBackgroundColor': 'rgba(75,192,192,1)', 'pointHoverBorderColor': 'rgba(220,220,220,1)', 'pointHoverBorderWidth': 2, 'pointRadius': 1, 'pointHitRadius': 10, }], 'labels': [] } # add crunchbase info to selected_comp_info_dict selected_comp_info_dict['crunchbase'] = [ {'cb_comp_name': joined_cb_data.cb_company_name}, {'comp_url': joined_cb_data.cb_url}, {'state': joined_cb_data.state_code}, {'city': joined_cb_data.city_name}, {'funding_rounds': funding_rounds_lst}, {'total_funding': '{:,}'.format(joined_cb_data.total_funding)}, {'markets': ', '.join(market_type_name_lst)} ] # check to see if fc info already stored in db check_fc_comp_db = ( m.FCCompany.query. options(joinedload(m.FCCompany.social_media)). options(joinedload(m.FCCompany.company_links)). options(joinedload(m.FCCompany.industries). joinedload(m.CompanyIndustry.industry_type)). options(joinedload(m.FCCompany.cb_company)). filter(m.FCCompany.cb_company_id == selected_cb_comp_id). first() ) # function that takes in the joined fc company objects and turns into a lst of dicts def create_fc_comp_info_lst(comp_obj): social_media_lst = [] company_links_lst = [] industry_lst = [] for item in comp_obj.social_media: social_media_site = [ {'site_name': item.sm_name}, {'site_url': item.sm_site_url}, {'site_bio': item.sm_bio} ] social_media_lst.append(social_media_site) for item in comp_obj.company_links: company_link_item = [ {'link_type': item.link_type}, {'link_url': item.link_url} ] company_links_lst.append(company_link_item) for item in comp_obj.industries: company_industry_item = [ {'industry_type': item.industry_type.industry_name} ] industry_lst.append(company_industry_item) fc_comp_info_lst = [ {'fc_comp_name': comp_obj.fc_company_name}, {'comp_domain': comp_obj.fc_company_domain}, {'company_bio': comp_obj.fc_company_bio}, {'logo_url': comp_obj.logo_image_url}, {'founded': comp_obj.founded}, {'employees': '{:,}'.format(int(comp_obj.num_employees))}, {'social_media': social_media_lst}, {'industries': industry_lst} ] return fc_comp_info_lst # checks if fc company exists in db or calls api and adds fc info to selected_comp_info_dict if check_fc_comp_db != None: joined_fc_data = create_fc_comp_info_lst(check_fc_comp_db) selected_comp_info_dict['fullcontact'] = joined_fc_data else: comp_domain = u.get_domain(joined_cb_data) api_comp_info = u.fetch_fc_company(comp_domain) # add to db u.load_fc_industry_types(api_comp_info) u.load_fc_company(api_comp_info, comp_domain, joined_cb_data.cb_company_id) u.db.session.commit() get_fc_comp_obj = ( m.FCCompany.query. options(joinedload(m.FCCompany.social_media)). options(joinedload(m.FCCompany.company_links)). options(joinedload(m.FCCompany.industries). joinedload(m.CompanyIndustry.industry_type)). options(joinedload(m.FCCompany.cb_company)). filter(m.FCCompany.cb_company_id == selected_cb_comp_id). first() ) joined_fc_data = create_fc_comp_info_lst(get_fc_comp_obj) selected_comp_info_dict['fullcontact'] = joined_fc_data # print(selected_comp_info_dict) return jsonify(selected_comp_info_dict)
def aggregate(source_company_id): logger.info("source_company_id: %s" % source_company_id) s = conn.get("select * from source_company where id=%s", source_company_id) if s == None: return company_id = find_company(s) #company if company_id is not None: logger.info("Update company: %s" % s["fullName"]) conn.update("update source_company set companyId=%s where id=%s", company_id, source_company_id) company = conn.get("select * from company where id=%s", company_id) if company["code"] is None or company["code"] == "": code = aggregator_util.get_company_code(company["name"]) conn.update("update company set code=%s where id=%s", code, company_id) # 来自不同的源或不同项目, 如何合并? css = conn.query( "select * from source_company where companyId=%s and companyStatus!=2020", company_id) rank = sys.maxint selected = None if len(css) > 1: for cs in css: if cs["companyStatus"] == 2020: continue if cs["description"] is None or cs["description"] == "": continue sa = conn.query( "select * from source_artifact where sourceCompanyId=%s and type=4010", cs["id"]) for a in sa: domain = util.get_domain(a["link"]) if domain is None or domain == "": continue myRank = None if a["rank"] is not None: diff = datetime.datetime.today() - a["rankDate"] if diff.days <= 3: myRank = a["rank"] if myRank is None: alex = trends_tool.get_alexa(domain) logger.info("%s, %s, %s" % (cs["name"], domain, alex["global_rank"])) try: if alex["global_rank"] == "-": myRank = -1 else: myRank = int(alex["global_rank"].replace( ",", "")) conn.update( "update source_artifact set rank=%s, rankDate=now() where id=%s", myRank, a["id"]) except: continue if myRank != -1 and myRank < rank: rank = myRank selected = cs if selected is None: selected = s # TODO else: selected = css[0] if selected is not None and selected["companyStatus"] != 2020: logger.info("selected=%s" % selected["name"]) sql = "update company set \ name=%s,fullName=%s,description=%s,brief=%s,\ productDesc=%s, modelDesc=%s, operationDesc=%s, teamDesc=%s, marketDesc=%s, compititorDesc=%s, advantageDesc=%s, planDesc=%s, \ round=%s,roundDesc=%s,companyStatus=%s,fundingType=%s,preMoney=%s,currency=%s,\ locationId=%s,address=%s,phone=%s,establishDate=%s,logo=%s,\ headCountMin=%s,headCountMax=%s,\ modifyTime=now() \ where id=%s" conn.update( sql, selected["name"], selected["fullName"], selected["description"], selected["brief"], selected.get("productDesc"), selected.get("modelDesc"), selected.get("operationDesc"), selected.get("teamDesc"), selected.get("marketDesc"), selected.get("compititorDesc"), selected.get("advantageDesc"), selected.get("planDesc"), selected["round"], selected["roundDesc"], selected["companyStatus"], selected["fundingType"], selected["preMoney"], selected["currency"], selected["locationId"], selected["address"], selected["phone"], selected["establishDate"], selected["logo"], selected["headCountMin"], selected["headCountMax"], company_id) else: logger.info("New company: %s" % s["fullName"]) if s["companyStatus"] != 2020: code = aggregator_util.get_company_code(s["name"]) sql = "insert company(code,name,fullName,description,brief,\ productDesc, modelDesc, operationDesc, teamDesc, marketDesc, compititorDesc, advantageDesc, planDesc, \ round,roundDesc,companyStatus,fundingType,preMoney,currency,\ locationId,address,phone,establishDate,logo,\ headCountMin,headCountMax,\ active,createTime,modifyTime) \ values(%s,%s,%s,%s,%s,\ %s,%s,%s,%s,%s,%s,%s,%s, \ %s,%s,%s,%s,%s,%s,\ %s,%s,%s,%s,%s,\ %s,%s,\ %s,now(),now())" company_id = conn.insert(sql, code, s["name"], s["fullName"], s["description"], s["brief"], s.get("productDesc"), s.get("modelDesc"), s.get("operationDesc"), s.get("teamDesc"), s.get("marketDesc"), s.get("compititorDesc"), s.get("advantageDesc"), s.get("planDesc"), s["round"], s["roundDesc"], s["companyStatus"], s["fundingType"], s["preMoney"], s["currency"], s["locationId"], s["address"], s["phone"], s["establishDate"], s["logo"], s["headCountMin"], s["headCountMax"], 'Y') conn.update("update source_company set companyId=%s where id=%s", company_id, source_company_id) else: return # company_alias add_company_alias(company_id, s["fullName"]) # domain & company_alias source_domains = conn.query( "select * from source_domain where sourceCompanyId=%s", source_company_id) for sd in source_domains: if sd["organizerType"] == "企业": add_company_alias(company_id, sd["organizer"]) if sd["organizer"] is not None: domain = conn.get( "select * from domain where companyId=%s and domain=%s and organizer=%s", company_id, sd["domain"], sd["organizer"]) else: domain = conn.get( "select * from domain where companyId=%s and domain=%s limit 1", company_id, sd["domain"]) if domain is None: sql = "insert domain(companyId,domain,organizer,organizerType,beianhao,mainBeianhao,\ websiteName,homepage,beianDate,expire,\ active,createTime,modifyTime)\ values(%s,%s,%s,%s,%s,%s,\ %s,%s,%s,%s,\ 'Y',now(),now())" conn.insert(sql, company_id, sd["domain"],sd["organizer"],sd["organizerType"],sd["beianhao"],sd["mainBeianhao"],\ sd["websiteName"],sd["homepage"],sd["beianDate"],sd["expire"] ) # footprint source_footprints = conn.query( "select * from source_footprint where sourceCompanyId=%s and footprintId is null", source_company_id) for sf in source_footprints: fp = conn.get( "select * from footprint where companyId=%s and footDate=%s and description=%s", company_id, sf["footDate"], sf["description"]) if fp is None: sql = "insert footprint(companyId,footDate,description,active,createTime,modifyTime) \ values(%s,%s,%s,'Y',now(),now())" footprint_id = conn.insert(sql, company_id, sf["footDate"], sf["description"]) else: footprint_id = fp["id"] conn.update("update source_footprint set footprintId=%s where id=%s", footprint_id, sf["id"]) # establishDate company1 = conn.get("select * from company where id=%s", company_id) if company1["establishDate"] is None: gongshang = conn.get( "select g.* from gongshang_base g join company_alias a on g.companyAliasId=a.id \ join company c on a.companyId=c.id where c.id=%s order by g.establishTime limit 1", company_id) if gongshang is not None: conn.update("update company set establishDate=%s where id=%s", gongshang["establishTime"], company_id) else: fp = conn.get( "select * from footprint where companyId=%s order by footDate limit 1", company_id) if fp is not None: conn.update("update company set establishDate=%s where id=%s", fp["footDate"], company_id) # member rels = conn.query( "select * from source_company_member_rel where sourceCompanyId=%s", source_company_id) for rel in rels: if rel["companyMemberRelId"] is not None: # 已匹配 continue source_member_id = rel["sourceMemberId"] source_member = conn.get("select * from source_member where id=%s", source_member_id) if source_member is None: continue member_id = source_member["memberId"] if member_id is None: member_id = aggregate_member(company_id, source_member) cmrel = conn.get( "select * from company_member_rel where companyId=%s and memberId=%s", company_id, member_id) if cmrel is None: cmrelId = conn.insert( "insert company_member_rel(\ companyId,memberId,position,joinDate,leaveDate,type,\ active,createTime,modifyTime) \ values(%s,%s,%s,%s,%s,%s,'Y',now(),now())", company_id, member_id, rel["position"], rel["joinDate"], rel["leaveDate"], rel["type"]) else: cmrelId = cmrel["id"] conn.update( "update source_company_member_rel set companyMemberRelId=%s where id=%s", cmrelId, rel["id"]) # funding & investor sfs = conn.query("select * from source_funding where sourceCompanyId=%s", source_company_id) for sf in sfs: if sf["fundingId"] is None: #f = conn.get("select * from funding where companyId=%s and round=%s and roundDesc=%s", # company_id, sf["round"], sf["roundDesc"]) f = conn.get( "select * from funding where companyId=%s and round=%s limit 1", company_id, sf["round"]) if f is None: sql = "insert funding(companyId,preMoney,postMoney,investment,\ round,roundDesc,currency,precise,fundingDate,fundingType,\ active,createTime,modifyTime) \ values(%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,'Y',now(),now())" fundingId = conn.insert(sql, company_id, sf["preMoney"], sf["postMoney"], sf["investment"], sf["round"], sf["roundDesc"], sf["currency"], sf["precise"], sf["fundingDate"], 8030) else: fundingId = f["id"] conn.update("update source_funding set fundingId=%s where id=%s", fundingId, sf["id"]) else: fundingId = sf["fundingId"] sfirs = conn.query( "select * from source_funding_investor_rel where sourceFundingId=%s", sf["id"]) for sfir in sfirs: if sfir["fundingInvestorRelId"] is not None: continue source_investor = conn.get( "select * from source_investor where id=%s", sfir["sourceInvestorId"]) if source_investor is None: continue investor_id = aggregate_investor(source_investor) funding_investor_rel = conn.get( "select * from funding_investor_rel \ where investorId=%s and fundingId=%s", investor_id, fundingId) if funding_investor_rel is None: sql = "insert funding_investor_rel(fundingId, investorId, currency, investment,\ precise,active,createTime,modifyTime) \ values(%s,%s,%s,%s,%s,'Y',now(),now())" fundingInvestorRelId = conn.insert(sql, fundingId, investor_id, sfir["currency"], sfir["investment"], sfir["precise"]) else: fundingInvestorRelId = funding_investor_rel["id"] conn.update( "update source_funding_investor_rel set fundingInvestorRelId=%s where id=%s", fundingInvestorRelId, sfir["id"]) # update company stage funding = conn.get( "select * from funding where companyId=%s order by round desc, fundingDate desc limit 1", company_id) if funding is not None: conn.update("update company set round=%s, roundDesc=%s where id=%s", funding["round"], funding["roundDesc"], company_id) # artifact sas = conn.query("select * from source_artifact where sourceCompanyId=%s", source_company_id) for sa in sas: if sa["artifactId"] is not None: continue if sa["type"] == 4010: continue ''' if sa["link"] is not None and sa["link"] != "": link = util.norm_url(sa["link"]) a = conn.get("select * from artifact where type=4010 and (name=%s or link=%s)", sa["name"], link) if a is None: sql = "insert artifact(companyId,name,description,link,type,active,createTime,modifyTime) \ values(%s,%s,%s,%s,4010,'Y',now(),now())" artifact_id = conn.insert(sql, company_id,sa["name"],sa["description"],link ) else: artifact_id = a["id"] conn.update("update source_artifact set artifactId=%s where id=%s", artifact_id, sa["id"]) ''' else: a = conn.get("select * from artifact where type=%s and name=%s", sa["type"], sa["name"]) if a is None: sql = "insert artifact(companyId,name,description,link,type,active,createTime,modifyTime) \ values(%s,%s,%s,%s,%s,'Y',now(),now())" artifact_id = conn.insert(sql, company_id, sa["name"], sa["description"], sa["link"], sa["type"]) else: artifact_id = a["id"] conn.update("update source_artifact set artifactId=%s where id=%s", artifact_id, sa["id"]) domains = conn.query("select * from domain where companyId=%s", company_id) for domain in domains: str = domain["homepage"] if str is None: continue homepages = str.split(",") for h in homepages: logger.info(h) homepage = conn.get( "select * from homepage where originalHomepage=%s", h) lastHomepage = None tags = None desc = None if homepage is None: url = "http://" + h (flag, r) = my_request.get_no_sesion(logger, url) if flag == -1: conn.insert( "insert homepage(companyId,originalHomepage,status,createTime,modifyTime) \ values(%s,%s,%s,now(),now())", company_id, h, -1) else: logger.info("status=%s, url=%s" % (r.status_code, r.url)) netloc = urlsplit(r.url).netloc conn.insert( "insert homepage(companyId,originalHomepage,lastHomepage,status,createTime,modifyTime) \ values(%s,%s,%s,%s,now(),now())", company_id, h, netloc, r.status_code) lastHomepage = "http://" + netloc r.encoding = r.apparent_encoding d = pq(r.text) tags = d('meta[name="keywords"]').attr('content') desc = d('meta[name="description"]').attr('content') else: if homepage["lastHomepage"] is not None: lastHomepage = "http://" + homepage["lastHomepage"] if lastHomepage is not None: a = conn.get( "select * from artifact where type=4010 and link=%s", lastHomepage) if a is None: sql = "insert artifact(companyId,name,link,type,active,createTime,modifyTime,domain,alexa,tags,description) \ values(%s,null,%s,4010,'Y',now(),now(),%s,'Y',%s,%s)" artifact_id = conn.insert(sql, company_id, lastHomepage, util.get_domain(lastHomepage), tags, desc) else: sql = "update artifact set domain=%s, alexa='Y',tags=%s, description=%s where id=%s" conn.update(sql, util.get_domain(lastHomepage), tags, desc, a["id"]) # news aggregator_util.merge_news(source_company_id, company_id, conn) # job aggregator_util.merge_job(source_company_id, company_id, conn) # others company = conn.get('select * from company where id = %s', company_id) full_name = company['fullName'] name = company['name'] # aggregator_util.merge_weibo(company_id, name, full_name, conn) # aggregator_util.merge_wechat(company_id, name, full_name, conn) # result = trends_tool.haosou_news(result['name']) msg = {"type": "company", "id": company_id} flag = False while flag == False: try: kafkaProducer.send_messages("aggregator_v2", json.dumps(msg)) flag = True except Exception, e: logger.exception(e) time.sleep(60)
html_parsed= app.get("html_parsed") if html_parsed is None: continue json_content = app.get("json") if json_content is None: continue urls = html_parsed.get('urls') if urls == None: continue companyId = app.get("companyId") if companyId is None: for url in urls: try: domain = util.get_domain(url) except: continue if domain is None: continue d = conn.get("select * from domain where domain=%s limit 1",domain) if d is not None: companyId = d["companyId"] break if companyId is not None: logger.info("companyId=%s, app=%s" % (companyId,app["url"])) a = conn.get("select * from artifact where type=4040 and domain=%s limit 1", app["appId"]) if a is None:
def setup_server4(hostname=None, domain=None, pc="1", forge_modules=[ "puppetlabs/stdlib", "puppetlabs/concat", "puppetlabs/firewall", "puppetlabs/apt" ]): """Setup Puppet 4 server""" import package, util, git, service # Local files to copy over basedir = "/etc/puppetlabs" local_master_conf = "files/puppet-master.conf" remote_master_conf = basedir + "/puppet/puppet.conf" local_hiera_yaml = "files/hiera.yaml" remote_hiera_yaml = basedir + "/code/hiera.yaml" local_fileserver_conf = "files/fileserver.conf" remote_fileserver_conf = basedir + "/puppet/fileserver.conf" local_environments = "files/environments" remote_codedir = basedir + "/code" local_gitignore = "files/gitignore" remote_gitignore = basedir + "/.gitignore" modules_dir = basedir + "/code/environments/production/modules" # Verify that all the local files are in place try: open(local_master_conf) open(local_hiera_yaml) except IOError: print "ERROR: some local config files were missing!" sys.exit(1) # Autodetect hostname and domain from env.host, if they're not overridden # with method parameters if not hostname: hostname = util.get_hostname() if not domain: domain = util.get_domain() # Ensure that clock is correct before doing anything else, like creating SSL # certificates. util.set_clock() # Start the install install_puppetlabs_release_package(pc) package.install("puppetserver") util.put_and_chown(local_master_conf, remote_master_conf) util.put_and_chown(local_hiera_yaml, remote_hiera_yaml) util.put_and_chown(local_fileserver_conf, remote_fileserver_conf) util.put_and_chown(local_gitignore, remote_gitignore) util.add_to_path("/opt/puppetlabs/bin") util.set_hostname(hostname + "." + domain) # "facter fqdn" return a silly name on EC2 without this util.add_host_entry("127.0.1.1", hostname, domain) # Copy over template environments util.put_and_chown(local_environments, remote_codedir) # Add modules from Puppet Forge. These should in my experience be limited to # those which provide new types and providers. In particular puppetlabs' # modules which control some daemon (puppetdb, postgresql, mysql) are # extremely complex, very prone to breakage and nasty to debug. for module in forge_modules: add_forge_module(module) # Git setup git.install() git.init(basedir) if not exists(modules_dir): sudo("mkdir " + modules_dir) git.init(modules_dir) git.add_submodules(basedir=modules_dir) git.add_all(basedir) git.commit(basedir, "Initial commit") # Link hieradata and manifests from production to testing. This keeps the # testing environment identical to the production environment. The modules # directory in testing is separate and may (or may not) contain modules that # override or complement those in production. util.symlink(remote_codedir + "/environments/production/hieradata", remote_codedir + "/environments/testing/hieradata") util.symlink(remote_codedir + "/environments/production/manifests", remote_codedir + "/environments/testing/manifests") # Start puppetserver to generate the CA and server certificates/keys service.start("puppetserver") run_agent(noop="False")
def get_view_fullurl(self): return urlparse.urljoin(util.get_domain(), self.get_view_url())
def aggregate(source_company_id): logger.info("source_company_id: %s" % source_company_id) s = conn.get("select * from source_company where id=%s", source_company_id) if s is None: return company_id = find_company(s) #company if company_id is not None: logger.info("Update company: %s" % s["name"]) else: logger.info("New company: %s" % s["name"]) if s["companyStatus"] != 2020: code = get_company_code(s["name"]) sql = "insert company(code,name,fullName,description,brief,\ productDesc, modelDesc, operationDesc, teamDesc, marketDesc, compititorDesc, advantageDesc, planDesc, \ round,roundDesc,companyStatus,fundingType,preMoney,currency,\ locationId,address,phone,establishDate,logo,type,\ headCountMin,headCountMax,\ active,createTime,modifyTime) \ values(%s,%s,%s,%s,%s,\ %s,%s,%s,%s,%s,%s,%s,%s, \ %s,%s,%s,%s,%s,%s,\ %s,%s,%s,%s,%s,41020,\ %s,%s,\ %s,now(),now())" company_id = conn.insert(sql, code, s["name"], s["fullName"], s["description"], s["brief"], s.get("productDesc"), s.get("modelDesc"), s.get("operationDesc"), s.get("teamDesc"), s.get("marketDesc"), s.get("compititorDesc"), s.get("advantageDesc"), s.get("planDesc"), s["round"], s["roundDesc"], s["companyStatus"], s["fundingType"], s["preMoney"], s["currency"], s["locationId"], s["address"], s["phone"], s["establishDate"], s["logo"], s["headCountMin"], s["headCountMax"], 'Y') else: return logger.info("companyId=%s", company_id) conn.update("update source_company set companyId=%s where id=%s", company_id, source_company_id) # company_alias add_company_alias(company_id, s["fullName"]) # domain & company_alias source_domains = conn.query( "select * from source_domain where sourceCompanyId=%s", source_company_id) for sd in source_domains: if sd["organizerType"] == "企业": add_company_alias(company_id, sd["organizer"]) if sd["organizer"] is not None: domain = conn.get( "select * from domain where companyId=%s and domain=%s and organizer=%s", company_id, sd["domain"], sd["organizer"]) else: domain = conn.get( "select * from domain where companyId=%s and domain=%s limit 1", company_id, sd["domain"]) if domain is None: sql = "insert domain(companyId,domain,organizer,organizerType,beianhao,mainBeianhao,\ websiteName,homepage,beianDate,expire,\ active,createTime,modifyTime)\ values(%s,%s,%s,%s,%s,%s,\ %s,%s,%s,%s,\ 'Y',now(),now())" conn.insert(sql, company_id, sd["domain"],sd["organizer"],sd["organizerType"],sd["beianhao"],sd["mainBeianhao"],\ sd["websiteName"],sd["homepage"],sd["beianDate"],sd["expire"] ) #TODO expire处理 # artifact sas = conn.query("select * from source_artifact where sourceCompanyId=%s", source_company_id) for sa in sas: if sa["artifactId"] is not None: continue if sa["type"] == 4010: #website if sa["link"] is not None and sa["link"] != "": link = util.norm_url(sa["link"]) try: domain = util.get_domain(link) except: continue a = conn.get( "select * from artifact where companyId=%s and type=4010 and (name=%s or link=%s) limit 1", company_id, sa["name"], link) if a is None: sql = "insert artifact(companyId,name,description,link,domain,type,active,createTime,modifyTime) \ values(%s,%s,%s,%s,%s,4010,'Y',now(),now())" artifact_id = conn.insert(sql, company_id, sa["name"], sa["description"], link, domain) else: artifact_id = a["id"] conn.update( "update source_artifact set artifactId=%s where id=%s", artifact_id, sa["id"]) elif sa["type"] == 4040: #itunes result = util.re_get_result('id(\d*)', sa["link"]) if result is None: continue app_id, = result a = conn.get( "select * from artifact where type=4040 and domain=%s", app_id) if a is None: sql = "insert artifact(companyId,name,description,link,domain,type,active,createTime,modifyTime) \ values(%s,%s,%s,%s,%s,4040,'Y',now(),now())" artifact_id = conn.insert(sql, company_id, sa["name"], sa["description"], sa["link"], app_id) else: artifact_id = a["id"] conn.update("update source_artifact set artifactId=%s where id=%s", artifact_id, sa["id"]) elif sa["type"] == 4050: #android package = None type, market = util.get_market(sa["link"]) if market == 16030: #wandoujia result = util.re_get_result('wandoujia.com/apps/(.*)', sa["link"]) if result is None: continue package, = result elif market == 16040: result = util.re_get_result('apkName=(.*)', sa["link"]) if result is None: continue package, = result else: continue a = conn.get( "select * from artifact where type=4050 and domain=%s", package) if a is None: sql = "insert artifact(companyId,name,description,link,domain,type,active,createTime,modifyTime) \ values(%s,%s,%s,%s,%s,4050,'Y',now(),now())" artifact_id = conn.insert(sql, company_id, sa["name"], sa["description"], sa["link"], package) else: artifact_id = a["id"] conn.update("update source_artifact set artifactId=%s where id=%s", artifact_id, sa["id"]) msg = {"type": "company", "id": company_id} flag = False while flag == False: try: kafkaProducer.send_messages("aggregator_v2", json.dumps(msg)) flag = True except Exception, e: logger.exception(e) time.sleep(60)