Exemplo n.º 1
0
 def _bulk_upload(self, data, user):
     print user
     data, user = json.loads(data), json.loads(user)
     _data = pd.DataFrame(data)[["company_name"]]
     _user = Parse()._pointer("_User", user["objectId"])
     _data["user"] = [_user for i in _data.index]
     _data["user_company"] = [user["user_company"] for i in _data.index]
     _data["user_company"]
     _list = {
         "user": _user,
         "user_company": user["user_company"],
         "list_type": "upload",
         "name": "Upload - " + arrow.utcnow().format("DD-MM-YYYY")
     }
     _list = Prospecter().create("CompanyProspectList", _list).json()
     print _list
     _list = Parse()._pointer("CompanyProspectList", _list["objectId"])
     _data["lists"] = [[_list] for i in _data.index]
     Prospecter()._batch_df_create("CompanyProspect", _data)
     for i in data:
         #q.enqueue(Companies()._bulk, i["company_name"])
         r = requests.get(
             "https://clear-spark.herokuapp.com/v1/companies/research",
             params={
                 "bulk": "bulk",
                 "api_key":
                 "9a31a1defcdc87a618e12970435fd44741d7b88794f7396cbec486b8",
                 "company_name": i["company_name"]
             })
         print r.text
Exemplo n.º 2
0
 def _google_contact_import(self, access_token, user, user_company):
     print access_token, user, user_company
     GOOGLE_CLIENT_ID = "1949492796-qq27u1gnqoct2n6p3hctb0cto58qel5i.apps.googleusercontent.com"
     GOOGLE_CLIENT_SECRET = "GpZlpLB66sU5v9SDPnPf-Ov1"
     #access_token = "ya29.aQFZQT43xw5UeOwINZZoOwCa_X1iND9QmWfp1ZJ2laZx1dU6iJomXSmOaUw2bFAM5f8jhWLCrKWWkQ"
     #access_token = "ya29.aQFdBMBQlqXv8RxtyH-qhKxPpNRU7Y_dTQt0Jbt3wFjzlbR-oNbAiYD-mPgQZXyxAW56JDKK7kCADA"
     # GData with access token
     token = gdata.gauth.OAuth2Token(
         client_id=GOOGLE_CLIENT_ID,
         client_secret=GOOGLE_CLIENT_SECRET,
         scope='https://www.google.com/m8/feeds',
         user_agent='app.testing',
         access_token=access_token)
      
     contact_client = gdata.contacts.client.ContactsClient()
     token.authorize(contact_client)
      
     feed = contact_client.GetContacts()
      
     for entry in feed.entry:
       entry.title.text
       for e in entry.email:
         e.address
      
     # JSON with access token
     contacts = []
     for i in range(0,10):
         index = i*50 if i != 0 else 1
         url = 'https://www.google.com/m8/feeds/contacts/default/full?access_token={0}&alt=json&max-results=50&start-index={1}'
         url = url.format(access_token, index)
         res = requests.get(url).text             
         data = json.loads(res)
         if "entry" not in data["feed"].keys():
             break
         contacts = contacts + data["feed"]["entry"]
         print len(contacts)
         
     contacts_ = []
     for i, row in pd.DataFrame(contacts)[["gd$email","title"]].iterrows():
         #print row["gd$email"][0]["address"], row["title"]["$t"]
         contacts_.append({"email":row["gd$email"][0]["address"],
                          "name": row["title"]["$t"],
                          "domain": row["gd$email"][0]["address"].split("@")[-1]})
     contacts_ = pd.DataFrame(contacts_)
     contacts_["source"] = "gmail"
     contacts_["db_type"] = "inbox"
     contacts_["user"]=[Parse()._pointer("_User", user) for i in contacts_.index]
     contacts_["user_company"] = [Parse()._pointer("UserCompany",user_company) for i in contacts_.index]
     Parse()._batch_df_create("UserContact", contacts_)
     Prospecter()._batch_df_create("UserContact", contacts_)
     print Prospecter().update("_User/"+user, 
             {"google_integration":arrow.utcnow().timestamp, 
              "google_token":access_token}).json()
Exemplo n.º 3
0
    def _add_reports(self, list_name, companies, company_list, _profile):
        company_list_id = company_list['objectId']
        _user, _company = companies[0]['user'], companies[0]['company']
        data = {'name': list_name, 'user': _user, 'company': _company}
        _company_list = Parse()._pointer('CompanyProspectList',
                                         company_list_id)
        data['parent_list'], data['list_type'] = _company_list, 'mining_job'
        _report = {
            'report_type': 'company_employee_mining_job',
            'profile': _profile
        }
        signal_report = Parse().create('SignalReport', _report).json()
        _report = Parse()._pointer('SignalReport', signal_report['objectId'])
        _list_id = Prospecter().create('ProspectList', data).json()['objectId']
        _prospect_list = Parse()._pointer('ProspectList', _list_id)
        _report = {'reports': {'__op': 'AddUnique', 'objects': [_report]}}
        _list = {
            'prospect_lists': {
                '__op': 'AddUnique',
                'objects': [_prospect_list]
            }
        }
        r = Prospecter().update('CompanyProspectList/' + company_list_id,
                                _list)
        rr = Prospecter().update('ProspectProfile/' + _profile['objectId'],
                                 _report)
        print r.json(), rr.json()

        return (signal_report['objectId'], _list_id)
Exemplo n.º 4
0
 def _score_report(self, _report):
     _report = Parse()._pointer("SignalReport", _report)
     qry = {"where": json.dumps({"report": _report})}
     qry["limit"] = 1000
     # TODO - where companies are null / undefined
     signals = Prospecter().get("CompanySignal", qry).json()["results"]
     api_key = "9a31a1defcdc87a618e12970435fd44741d7b88794f7396cbec486b8"
     for company in signals:
         company_name = company["company_name"]
         q.enqueue(CompanyScore()._company_info, company_name)
Exemplo n.º 5
0
    def employee_webhook(self,
                         company_name,
                         company_list,
                         qry="",
                         limit=5,
                         list_id="",
                         _report=""):
        _user, _company = company_list['user'], company_list['company']
        employees = Companies()._employees(company_name, qry)
        company = Companies()._get_info(company_name)
        _company_list = company_list['objectId']
        for index, row in employees.iterrows():
            data = row.to_dict()
            company['user'], company['company'] = _user, _company
            prospect = company
            prospect['name'], prospect['pos'] = row['name'], row['title']
            prospect['city'] = row['locale']
            prospect['linkedin_url'] = row['linkedin_url']
            prospect['lists'] = [Parse()._pointer('ProspectList', list_id)]
            if type(company['industry']) is list:
                company['industry'] = company['industry'][0]
            prospect['company_profile'] = company_list['profile']
            r = Prospecter().create('Prospect', company)
            print "prospect_create_result", r.json()

        if RQueue()._has_completed("{0}_{1}".format(_company_list, list_id)):
            data = {'done': arrow.now().timestamp}
            r = Prospecter().update("SignalReport/" + _report, data)
            print "employee_webhook_has_completed -->", r.json()
Exemplo n.º 6
0
 def _random(self):
     qry = {'order': '-createdAt'}
     patterns = Prospecter().get('EmailPattern', qry).json()['results']
     email_guesses = []
     for count, pattern in enumerate(patterns):
         data = {
             'pattern': pattern['pattern'],
             'tried': False,
             'source': 'random_guess'
         }
         email_guesses.append(data)
     random.shuffle(email_guesses)
     return email_guesses
Exemplo n.º 7
0
    def _company_list_employees(self, company_list_id, list_name, title,
                                limit):
        company_list = Prospecter().get('CompanyProspectList/' +
                                        company_list_id).json()
        print company_list
        _profile = company_list['profile']
        qry = {
            "lists": Prospecter()._pointer("CompanyProspectList",
                                           company_list_id)
        }
        qry = {'where': json.dumps(qry), 'order': '-createdAt'}
        companies = Prospecter().get('CompanyProspect', qry).json()['results']
        _report, _list = self._add_reports(list_name, companies, company_list,
                                           _profile)

        queue_name = "{0}_{1}".format(company_list_id, _list)
        for company in companies:
            job = q.enqueue(self.employee_webhook, company['name'],
                            company_list, title, limit, _list, _report)
            job.meta[queue_name] = True
            job.save()
        return {'started': True}
Exemplo n.º 8
0
 def _salesforce_import(self, session_id, instance, user, user_company):
     #print session_id, instance, user, user_company
     #SESSION_ID = "00Dj0000001neXP!AQUAQIbUn9RsdTZH6MbFA7qaPtDovNU75.fOC6geI_KnEhJKyUzk2_yFx2TXgkth7zgFuJThY6qZQwH7Pq4UtlcW.Cq0aHt1"
     print instance
     print instance.replace("https://","")
     sf = Salesforce(instance=instance.replace("https://",""),
                     session_id=session_id)
     lol = sf.query_all("SELECT Id, Name, Email FROM Contact")
     sf = pd.DataFrame(pd.DataFrame(lol).records.tolist())
     sf = sf[["Name","Email"]]
     sf.columns = ["name","email"]
     sf = sf.dropna()
     sf["domain"] = [i.split("@")[-1] if i else "" for i in sf.email]
     sf["source"] = "salesforce"
     sf["db_type"] = "crm"
     sf["user"] = [Parse()._pointer("_User", user) for i in sf.index] 
     sf["user_company"] = [Parse()._pointer("UserCompany",user_company) 
                           for i in sf.index]
     Parse()._batch_df_create("UserContact", sf)
     Prospecter()._batch_df_create("UserContact", sf)
     print Prospecter().update("_User/"+user, 
             {"salesforce_integration":arrow.utcnow().timestamp, 
              "salesforce_token":session_id}).json()
Exemplo n.º 9
0
 def _update_company_email_pattern(self, data):
     if not data: return 0
     qry = {'where':json.dumps({'domain': data['domain']})}
     companies = Parse().get('Company', qry).json()
     while "error" in companies.keys():
         time.sleep(3)
         companies = Parse().get('Company', qry).json()
     companies = companies['results']
     pattern = {'email_pattern': data['company_email_pattern']}
     if data['company_email_pattern'] == []: 
         pattern['email_guess'] = []
     #_pusher['customero'].trigger(data["domain"], pattern)
     for company in companies:
         #print data
         data = {'email_pattern':data['company_email_pattern'], 
                 'email_pattern_research': arrow.utcnow().timestamp}
         r = Parse().update('Company/'+company['objectId'], data)
         # pusher -->
         print r.json()
         try:
           ''' print data["domain"] '''
         except:
           ''' print "wtf error ", data '''
    def _old_start(self):
        print "started"
        cp = Parse()._bulk_get("CompanyProspect")
        p = Parse()._bulk_get("Prospect")
        uc = Parse()._bulk_get("UserContact")

        cl = Parse().get("ContactList", {"limit": 1000}).json()["results"]
        print cl
        cl = pd.DataFrame(cl)
        print cl.head()
        cl["user_id"] = [i["objectId"] for i in cl.user]

        for count, i in enumerate(cp):
            if "company" in i.keys():
                if "domain" in i["company"].keys():
                    cp[count]["domain"] = i["company"]["domain"]

        for count, i in enumerate(p):
            if "company" in i.keys():
                if "domain" in i["company"].keys():
                    p[count]["domain"] = i["company"]["domain"]

        for count, i in enumerate(uc):
            if "company" in i.keys():
                if "name" in i["company"].keys():
                    uc[count]["company_name"] = i["company"]["name"]
                else:
                    uc[count]["company_name"] = ""
            else:
                uc[count]["company_name"] = ""

        # Adding Lists To Contacts / Prospects
        for count, i in enumerate(cp):
            if "user" not in i.keys(): continue
            user_id = i["user"]["objectId"]
            _cl = cl[(cl.user_id == user_id)
                     & (cl.db_type == "all_company_prospect")]
            al = cl[(cl.user_id == user_id)
                    & (cl.db_type == "all_feed_prospect")]
            _cl, al = _cl.to_dict('r'), al.to_dict('r')
            all_feed_id = al[0]["objectId"] if al else ""
            list_id = _cl[0]["objectId"] if _cl else ""
            if "lists" in i.keys():
                cp[count]["lists"] = cp[count]["lists"] + [{
                    "objectId": list_id
                }, {
                    "objectId":
                    all_feed_id
                }]
            else:
                cp[count]["lists"] = [{
                    "objectId": list_id
                }, {
                    "objectId": all_feed_id
                }]

        for count, i in enumerate(p):
            if "user" not in i.keys(): continue
            user_id = i["user"]["objectId"]
            _cl = cl[(cl.user_id == user_id) & (cl.db_type == "all_prospect")]
            al = cl[(cl.user_id == user_id)
                    & (cl.db_type == "all_feed_prospect")]
            _cl, al = _cl.to_dict('r'), al.to_dict('r')
            all_feed_id = al[0]["objectId"] if al else ""
            list_id = _cl[0]["objectId"] if _cl else ""
            if "lists" in i.keys():
                p[count]["lists"] = p[count]["lists"] + [{
                    "objectId": list_id
                }, {
                    "objectId":
                    all_feed_id
                }]
            else:
                p[count]["lists"] = [{
                    "objectId": list_id
                }, {
                    "objectId": all_feed_id
                }]

        for count, i in enumerate(uc):
            if "user" not in i.keys(): continue
            db_type, user_id = i["db_type"], i["user"]["objectId"]
            _cl = cl[(cl.user_id == user_id) & (cl.db_type == db_type)]
            al = cl[(cl.user_id == user_id)
                    & (cl.db_type == "all_feed_prospect")]
            _cl, al = _cl.to_dict('r'), al.to_dict('r')
            all_feed_id = al[0]["objectId"] if al else ""
            list_id = _cl[0]["objectId"] if _cl else ""
            if "lists" in i.keys():
                uc[count]["lists"] = uc[count]["lists"] + [{
                    "objectId": list_id
                }, {
                    "objectId":
                    all_feed_id
                }]
            else:
                uc[count]["lists"] = [{
                    "objectId": list_id
                }, {
                    "objectId": all_feed_id
                }]

        _p, _cp, _uc = pd.DataFrame(p), pd.DataFrame(cp), pd.DataFrame(uc)
        #print _p[_p.domain.isnull()].shape, _p.shape
        #print _cp[_cp.domain.isnull()].shape, _cp.shape
        # for user pointer add user_contact_list pointer
        print _p.shape, _cp.shape, _uc.shape

        i, j, tmp = 0, 0, pd.concat([_cp, _p, _uc]).reset_index()

        print tmp.domain.drop_duplicates().shape
        #return
        for a, b in tmp[["domain", "lists", "company_name",
                         "user"]].groupby("domain"):
            if a == ".": continue
            i = i + 1

            if b.lists.dropna().sum():
                j = j + 1
                lists = [
                    ii["objectId"] for ii in b.lists.dropna().sum()
                    if "objectId" in ii.keys()
                ]
                lists = pd.Series(lists).unique().tolist()
                company_name, domain = b.company_name.tolist()[0], a
                #print lists, a, b.company_name.tolist()[0]
                '''
                r = requests.post("https://clear-spark.herokuapp.com/v1/clearspark/daily_news",
                #r = requests.post("http://localhost:4000/v1/clearspark/daily_news",
                      headers={'Content-type': 'application/json'},
                      data=json.dumps({"company_name":company_name,"domain":domain,
                            "lists":lists,"source":"blog"}))
                
                print r.text

                '''
                api_key = "9a31a1defcdc87a618e12970435fd44741d7b88794f7396cbec486b8"
                #if i > 2: break
                x = 600000
                #job = q.enqueue(Companies()._news, domain, api_key, company_name, timeout=x)
                company_name = self.remove_non_ascii(company_name)
                domain = self.remove_non_ascii(domain)

                print j, company_name, domain  #, lists, tmp.shape
                job = q.enqueue(Companies()._daily_secondary_research,
                                company_name,
                                domain,
                                api_key,
                                lists,
                                timeout=60000)
                '''
                job = q.enqueue(Companies()._recent_webpages_published, 
                                 domain, api_key, company_name, timeout=60000)
                #time.sleep(0.5)
                #print lists

                job.meta["lists"] = lists
                job.meta["_lists"] = lists
                job.save()
                #RQueue()._meta(job, "lists", lists)
                '''
                '''
Exemplo n.º 11
0
    def _update_company_info(self, data, api_key="", name=""):
        print "DATA"
        print data 
        company_name = self.remove_accents(data['company_name'])
        qry = {'where':json.dumps({'company_name':data['company_name']})}
        qry_1 ={'where':json.dumps({'company_name': company_name})}
        qry = {"where":json.dumps({"$or":[{"company_name":data["company_name"], "company_name": company_name}]})}
        company = Parse().get('Company', qry).json()
        while "results" not in company.keys():
            time.sleep(0.5)
            company = Parse().get('Company', qry).json()

        companies = company['results']
        data = self._unparsify_data(data)

        if companies == []:
            company = Parse().create('Company', data).json()
            while "objectId" not in company.keys():
                time.sleep(0.5)
                company = Parse().create('Company', data).json()
                print "retrying", company
            print company
            companies = [Parse()._pointer('Company',company['objectId'])]

        print data["company_name"]
        company_name = data["company_name"].replace(' ','-')
        #_pusher['customero'].trigger(company_name, {'company': data})

        print "__STARTED", len(companies)
        for company in companies:
            print "UPDATING COMPANY"
            #TODO batch_df update
            print Parse().update('Company/'+company['objectId'], data).json()
            _company = Parse()._pointer('Company', company['objectId'])
            classes = ['Prospect','CompanyProspect','PeopleSignal','CompanySignal']
            objects = []
            for _class in classes:
                df = pd.DataFrame()
                objects = Parse().get(_class, qry).json()['results']
                data = {'company':_company, 
                        'company_research': arrow.utcnow().timestamp}
                df["objectId"] = [i["objectId"] for i in objects]
                Parse()._batch_df_update(_class, df, data)

            #TODO - batch update
            for obj in objects:
                print "UPDATED", _class, obj
                _id = obj['objectId']
                print Parse().update(_class+"/"+_id, data).json()
                #TODO - add name email guess - what is this code below
                name = ""
                if _class == 'Prospect':
                    print company
                    domain = company["domain"]
                    #q.enqueue(EmailGuess().search_sources, domain, "", api_key)

        return "updated"

        # TODO BATCHIFY
        print "CREATING COMPANY"
        company = Parse().create('Company', data).json()
        _company = Parse()._pointer('Company', company['objectId'])
        classes = ['Prospect','CompanyProspect','PeopleSignal','CompanySignal']
        for _class in classes:
            objects = Parse().get(_class, qry).json()['results']
            for obj in objects:
                print "UPDATED", _class, obj
                _id = obj['objectId']
                print Parse().update(_class+"/"+_id, {'company':_company}).json()
                p['customero'].trigger(data["company_name"], {'company': data})
Exemplo n.º 12
0
    def _old_start(self):
        print "started"
        cp = Parse()._bulk_get("CompanyProspect")
        p  = Parse()._bulk_get("Prospect")
        uc = Parse()._bulk_get("UserContact")

        cl = Parse().get("ContactList",{"limit":1000}).json()["results"]
        print cl
        cl = pd.DataFrame(cl)
        print cl.head()
        cl["user_id"] = [i["objectId"] for i in cl.user]

        for count, i in enumerate(cp):
            if "company" in i.keys():
                if "domain" in i["company"].keys():
                    cp[count]["domain"] = i["company"]["domain"]
                    
        for count, i in enumerate(p):
            if "company" in i.keys():
                if "domain" in i["company"].keys():
                    p[count]["domain"] = i["company"]["domain"]
                    
        for count, i in enumerate(uc):
            if "company" in i.keys():
                if "name" in i["company"].keys():
                    uc[count]["company_name"] = i["company"]["name"] 
                else:
                    uc[count]["company_name"] = ""
            else:
                uc[count]["company_name"] = ""

        # Adding Lists To Contacts / Prospects
        for count, i in enumerate(cp):
            if "user" not in i.keys(): continue
            user_id = i["user"]["objectId"]
            _cl = cl[(cl.user_id == user_id) & (cl.db_type =="all_company_prospect")]
            al = cl[(cl.user_id == user_id) & (cl.db_type =="all_feed_prospect")]
            _cl, al = _cl.to_dict('r'), al.to_dict('r')
            all_feed_id = al[0]["objectId"] if al else ""
            list_id = _cl[0]["objectId"] if _cl else ""
            if "lists" in i.keys():
                cp[count]["lists"] = cp[count]["lists"]+[{"objectId":list_id},{"objectId":all_feed_id}]
            else:
                cp[count]["lists"] = [{"objectId":list_id},{"objectId":all_feed_id}]
                
        for count, i in enumerate(p):
            if "user" not in i.keys(): continue
            user_id = i["user"]["objectId"]
            _cl = cl[(cl.user_id == user_id) & (cl.db_type =="all_prospect")]
            al = cl[(cl.user_id == user_id) & (cl.db_type =="all_feed_prospect")]
            _cl, al = _cl.to_dict('r'), al.to_dict('r')
            all_feed_id = al[0]["objectId"] if al else ""
            list_id = _cl[0]["objectId"] if _cl else ""
            if "lists" in i.keys():
                p[count]["lists"] = p[count]["lists"]+[{"objectId":list_id},{"objectId":all_feed_id}]
            else:
                p[count]["lists"] = [{"objectId":list_id},{"objectId":all_feed_id}]
                    
        for count, i in enumerate(uc):
            if "user" not in i.keys(): continue
            db_type, user_id = i["db_type"], i["user"]["objectId"]
            _cl = cl[(cl.user_id == user_id) & (cl.db_type == db_type)]
            al = cl[(cl.user_id == user_id) & (cl.db_type =="all_feed_prospect")]
            _cl, al = _cl.to_dict('r'), al.to_dict('r')
            all_feed_id = al[0]["objectId"] if al else ""
            list_id = _cl[0]["objectId"] if _cl else ""
            if "lists" in i.keys():
                uc[count]["lists"] = uc[count]["lists"]+[{"objectId":list_id},{"objectId":all_feed_id}]
            else:
                uc[count]["lists"] = [{"objectId":list_id},{"objectId":all_feed_id}]

        _p, _cp, _uc = pd.DataFrame(p), pd.DataFrame(cp), pd.DataFrame(uc)
        #print _p[_p.domain.isnull()].shape, _p.shape
        #print _cp[_cp.domain.isnull()].shape, _cp.shape
        # for user pointer add user_contact_list pointer
        print _p.shape, _cp.shape, _uc.shape

        i, j, tmp = 0, 0, pd.concat([_cp, _p, _uc]).reset_index()

        print tmp.domain.drop_duplicates().shape
        #return
        for a, b in tmp[["domain","lists","company_name","user"]].groupby("domain"):
            if a == ".": continue
            i = i + 1
            
            if b.lists.dropna().sum():
                j = j + 1
                lists = [ii["objectId"] for ii in b.lists.dropna().sum() if "objectId" in ii.keys()]
                lists = pd.Series(lists).unique().tolist()
                company_name, domain = b.company_name.tolist()[0], a
                #print lists, a, b.company_name.tolist()[0]
                
                '''
                r = requests.post("https://clear-spark.herokuapp.com/v1/clearspark/daily_news",
                #r = requests.post("http://localhost:4000/v1/clearspark/daily_news",
                      headers={'Content-type': 'application/json'},
                      data=json.dumps({"company_name":company_name,"domain":domain,
                            "lists":lists,"source":"blog"}))
                
                print r.text

                '''
                api_key = "9a31a1defcdc87a618e12970435fd44741d7b88794f7396cbec486b8"
                #if i > 2: break
                x = 600000
                #job = q.enqueue(Companies()._news, domain, api_key, company_name, timeout=x)
                company_name = self.remove_non_ascii(company_name)
                domain = self.remove_non_ascii(domain)

                print j, company_name, domain#, lists, tmp.shape
                job = q.enqueue(Companies()._daily_secondary_research, 
                                company_name, domain, api_key, lists, timeout=60000)
                '''
                job = q.enqueue(Companies()._recent_webpages_published, 
                                 domain, api_key, company_name, timeout=60000)
                #time.sleep(0.5)
                #print lists

                job.meta["lists"] = lists
                job.meta["_lists"] = lists
                job.save()
                #RQueue()._meta(job, "lists", lists)
                '''
                '''