def test__prepare_batch_url(self):
     fc = FullContact('')
     assert_equal(
         fc._prepare_batch_url(('person', {
             'email': '*****@*****.**'
         })),
         'https://api.fullcontact.com/v2/person.json?email=test%40test.com')
示例#2
0
def full_contact_request(email):
    """ Request fullcontact info based on email """

    if (constants.FULLCONTACT_KEY is None):
        logger.fatal("constants.FULLCONTACT_KEY is not set.")
        return

    logger.info('Looking up %s', email)

    fc = FullContact(constants.FULLCONTACT_KEY)
    r = fc.person(email=email)

    MIN_RETRY_SECS = 10
    MAX_RETRY_SECS = 600

    code = int(r.status_code)
    if (code == 200) or (code == 404):
        # Success or not found
        # (We log "not found" results in db too, so that we know
        # we tried and can move on to next email.)
        contact_json = r.json()
        fc_row = db_models.FullContact()
        fc_row.email = email
        fc_row.fullcontact_response = contact_json

        if 'socialProfiles' in contact_json:
            profiles = contact_json['socialProfiles']
            for profile in profiles:
                if 'typeId' in profile and 'username' in profile:
                    network = profile['typeId']
                    username = profile['username']
                    if network == 'angellist':
                        fc_row.angellist_handle = username
                    if network == 'github':
                        fc_row.github_handle = username
                    if network == 'twitter':
                        fc_row.twitter_handle = username
        try:
            db.session.add(fc_row)
            db.session.commit()
            logger.info('Email %s  recorded to fullcontact', email)
        except IntegrityError as e:
            logger.warning(
                "Email %s has already been entered in FullContact table.",
                email)
    elif code == 403:
        # Key fail
        logger.fatal("constants.FULLCONTACT_KEY is not set or is invalid.")
    elif code == 202:
        # We're requesting too quickly, randomly back off
        delay = randint(MIN_RETRY_SECS, MAX_RETRY_SECS)
        logger.warning(
            "Throttled by FullContact. Retrying after random delay of %d" %
            delay)
        full_contact_request.retry(countdown=delay)
    else:
        logger.fatal("FullContact request %s with status code %s", email,
                     r.status_code)
        logger.fatal(r.json())
    def test_invalid_api_keys(self):
        fc = FullContact('test_key')
        r = fc.person(email='*****@*****.**')
        assert_equal(r.status_code, 403)

        test_batch = [
            ('person', {'email': '*****@*****.**'}),
            ('person', {'name': 'Bob Smith'})
        ]

        r = fc.api_batch(test_batch)
        assert_equal(r.status_code, 403)
    def test_invalid_api_keys(self):
        fc = FullContact('')
        r = fc.api_get('person', **{'email': '*****@*****.**'})
        assert_equal(r.status_code, 403)

        test_batch = [('person', {
            'email': '*****@*****.**'
        }), ('person', {
            'name': 'Bob Smith'
        })]

        r = fc.api_batch(test_batch)
        assert_equal(r.status_code, 403)
示例#5
0
    def check_fullcontact(self, email, password, interactive_flag=False, elastic=False):
        print("---" + Fore.CYAN + "FullContact" + Fore.RESET + "---")
        fc = FullContact(conf['keys']['fullcontact'])
        person = fc.person(email=email)
        decoded_person_json = person.content.decode("utf-8")
        person_json = json.loads(decoded_person_json)
        social_to_push = []
        to_elastic = {"email": email, "password": password}

        try:
            if person_json['status'] == 200:
                if 'contactInfo' in person_json:
                    if 'fullName' in person_json['contactInfo']:
                        print(person_json['contactInfo']['fullName']
                              )

                if 'socialProfiles' in person_json:
                    for social in person_json['socialProfiles']:
                        social_to_push.append(social['url'])
                        print(social['url'])

                if 'demographics' in person_json:
                    if 'locationGeneral' in person_json['demographics']:
                        print(person_json['demographics']['locationGeneral'])

                to_elastic.update(person_json)
                if elastic:
                    self.put_elastic('fullcontact', 'email', to_elastic)

            elif person_json['status'] == 202:
                if interactive_flag:
                    time_dec = input("Your search is queued, do you want to wait for 2 minutes? [Y/N] \n> ")
                    if time_dec == "Y":
                        print("Sleeping...")
                        time.sleep(60 * 2)
                        self.check_fullcontact(email, elastic)
                    else:
                        pass

            else:
                print("No results")

        except Exception as e:
            print(Fore.RED + str(e) + Fore.RESET)

        if len(social_to_push) > 0 and interactive_flag:
            return social_to_push
        else:
            return False
示例#6
0
class FullContact_Client:

    def __init__(self):

        self.fc = FullContact('ab76dbb1c4b8c50f')


    def searchbyemail(self, email):

        return self.fc.get(email=email)
示例#7
0
  def _email_search(self, email, api_key=""):
      try:
          person = clearbit.Person.find(email=email, stream=True)
      except:
          person = None
      data = {"pattern":None, "name":None, "email":email,
              "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
      if person:
          pattern = EmailGuessHelper()._find_email_pattern(person["name"]["fullName"], email)
          if pattern: 
              data = {"pattern":pattern, "name":person["name"]["fullName"], "email":email,
                      "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
      elif not person or not pattern:
          person = FullContact()._person_from_email(email)
          print person
          try:
              person = person["contactInfo"]["fullName"]
              fullcontact_person = True
          except:
              fullcontact_person = False

          if fullcontact_person:
              person = person["contactInfo"]["fullName"]
              pattern = EmailGuessHelper()._find_email_pattern(person, email)
              data = {"pattern":pattern, "name":person, "email":email,
                      "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
              print pattern
          else:
              _email = email.replace(".", " ").replace("-", " ").replace("_"," ")
              _email = _email.replace("@", " ")
              g = Google().search("{0} site:linkedin.com/pub".format(_email))
              g1 = Google().search("{0} site:linkedin.com/pub".format(_email.split(" "[0])))
              g2 = Google().search("{0} site:linkedin.com/pub".format(_email).split(" ")[-1])
              g = pd.concat([g, g1, g2])
              choices = [i.split(" |")[0] for i in g.link_text]
              person = process.extract(_email, choices, limit=1)
              try:
                person = person[0][0]
              except:
                ''' '''
              pattern = EmailGuessHelper()._find_email_pattern(person, email)
              print "google search pattern", pattern
              if pattern:
                  data = {"pattern":pattern, "name":person, "email":email,
                          "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
              else:
                  data = {"pattern":None, "name":None, "email":email,
                          "domain":email.split("@")[-1], "crawl_source":"email_hunter"}
      #data = pd.DataFrame([data])
      conn = r.connect(host="localhost", port=28015, db="triggeriq")
      r.table('email_pattern_crawls').insert(data).run(conn)
      #CompanyEmailPatternCrawl()._persist(data, "emailhunter", api_key)
      # persist to rethinkdb
      print "person", person
示例#8
0
文件: app.py 项目: garbados/fc_cli
def get_identity(config, email):
  fc = FullContact(config['FC_KEY'])

  db = divan.Database(config['DB_URI'],
                        config['DB_NAME'],
                        auth=(config['DB_USER'],
                              config['DB_PASS']))
  try:
    res = db.get_or_create()
    assert res.status_code in [200, 201]
  except AssertionError:
    return res.status_code, res.json()
  else:
    res = db.get(email)
    if res.status_code == 200:
      return res.status_code, res.json()
    else:
      profile = fc.get(email=email)
      if profile['status'] == 200:
        profile['_id'] = email
        res = db.post(params=profile)
        return res.status_code, profile
      else:
        return profile['status'], profile
示例#9
0
 def _whois_search(self, domain):
     # TODO - fix this
     try:
         results = pythonwhois.get_whois(domain)
         emails = pythonwhois.get_whois(domain)
     except:
         return pd.DataFrame()
     emails = filter(None, results['contacts'].values())
     emails = pd.DataFrame(emails)
     emails['domain'] = domain
     for index, row in emails.iterrows():
         name = FullContact()._normalize_name(row['name'])
         email = row.email.strip()
         pattern = EmailGuessHelper()._find_email_pattern(name, row.email)
         emails.ix[index, 'pattern'] = pattern
     CompanyEmailPatternCrawl()._persist(emails, "whois_search")
示例#10
0
 def _research_emails(self, emails):
     _emails = pd.DataFrame()
     for email in emails:
         # if -, ., _       | clean emails
         full_name = FullContact()._person_from_email(email)
         print email, full_name
         if type(full_name) is str: continue
         full_name = full_name['contactInfo']['fullName']
         person = EmailGuessHelper()._name_to_email_variables(full_name)
         person['domain'] = email.split('@')[-1]
         for pattern in EmailGuessHelper()._patterns():
             _email = pystache.render(pattern, person)
             if email.lower() == _email.lower():
                 person['pattern'], person['email'] = pattern, email
                 _emails = _emails.append(person, ignore_index=True)
     return _emails
示例#11
0
 def run(self, conf, args, plugins):
     fc = FullContact(conf['FullContact']['key'])
     if args.twitter:
         res = fc.person(twitter=args.twitter)
         print(json.dumps(res.json(), sort_keys=True, indent=4))
     elif args.email:
         res = fc.person(email=args.email)
         print(json.dumps(res.json(), sort_keys=True, indent=4))
     elif args.phone:
         res = fc.person(phone=args.phone)
         print(json.dumps(res.json(), sort_keys=True, indent=4))
     elif args.md5:
         res = fc.person(emailMD5=args.md5)
         print(json.dumps(res.json(), sort_keys=True, indent=4))
     elif args.domain:
         res = fc.person(domain=args.domain)
         print(json.dumps(res.json(), sort_keys=True, indent=4))
     else:
         self.parser.print_help()
示例#12
0
    def _zoominfo_search(self, domain):
        qry = 'site:zoominfo.com/p/ "@{0}"'.format(domain)
        queue = "zoominfo-check-" + domain
        test = Google().search(qry, 5)
        res = [[word.lower() for word in link.split() if "@" in word]
               for link in test[test.link_span.str.contains('@')].link_span]
        test.ix[test.link_span.str.contains('@'), 'email'] = res
        test = test[test.email.notnull()]
        test['name'] = [link.split('|')[0].strip() for link in test.link_text]
        emails = test
        emails['domain'] = domain
        patterns = []
        for index, row in emails.iterrows():
            name = FullContact()._normalize_name(row['name']).strip()
            print row.email
            email = row.email.strip()
            if email[-1] is ".": email = email[:-1]
            pattern = EmailGuessHelper()._find_email_pattern(name, email)
            patterns.append(pattern)

        emails['pattern'] = patterns
        CompanyEmailPatternCrawl()._persist(emails, "zoominfo_search")
 def test__prepare_batch_url(self):
     fc = FullContact('test_key')
     assert_equal(
         fc._prepare_batch_url(('person', {'email': '*****@*****.**'})),
         'https://api.fullcontact.com/v2/person.json?email=test%40test.com'
     )
示例#14
0
from fullcontact import FullContact
import json
fc = FullContact('your_api_key')
user_id = input('Please enter user email-id: ')
r = fc.person(email=user_id)
#data = json.load(r.json())
data = r.json()
#print(data['contactInfo'])

if data['status'] == 200:
    print('Name: ' + data['contactInfo']['fullName'])
    print('Location: ' +
          data['demographics']['locationDeduced']['deducedLocation'])
else:
    print('Data unavailable right now.')
示例#15
0
from fullcontact import FullContact
from config import fullcontact_api
from person import perDetail
import json
fc = FullContact(fullcontact_api)


def fetchData(email_id):
    person = fc.person(email=email_id)
    data = person.json()
    try:
        print "\n-------------------------------------------------\n"
        print "[+] Gathering Personal Details from [FullContact]\n"
        print "Full Name: ----> " + data['contactInfo']['fullName']
        print "Gender: -------> " + str(data['demographics']['gender'])
        print "State: --------> " + str(
            data['demographics']['locationDeduced']['state']['name'])
        print "Country: ------> " + str(
            data['demographics']['locationDeduced']['country']['name'])
        for u in data['contactInfo']['websites']:
            print "Website: ------> " + u['url']

    except:
        print "Unavailable"

    try:
        print "\n\n[+] Gathering Employment Details from [FullContact]\n"
        for org in data['organizations']:
            print "Organisation Name: " + org[
                'name'] + " " + "\nJob Title: " + " " + org[
                    'title'] + " " + "\nStart date: " + " " + org[
示例#16
0
文件: app.py 项目: jsandlund/gods_eye
def hello():
    first_name = request.form["firstName"]
    last_name = request.form["lastName"]
    company_url = request.form["companyUrl"]
    source = "Ad Hoc"
    api_key = "ddb2740f8d2338c78497519c13cc7076"
    params = {"key": api_key, "domain": company_url, "first": first_name, "last": last_name}

    toofr_url = "http://toofr.com/api/guess?"
    test = requests.get(toofr_url, params=params)
    toofr_data = test.json()

    try:
        toofr_email = toofr_data["response"]["email"]
    except:
        toofr_email = None
    try:
        toofr_confidence = toofr_data["response"]["confidence"]
    except:
        toofr_confidence = None
    toofr_data = pd.DataFrame(
        columns=("toofr_email", "first_name", "last_name", "toofr_confidence", "company_url", "source")
    )

    existing_records_final_links = len(toofr_data)
    toofr_data.loc[existing_records_final_links] = [
        toofr_email,
        first_name,
        last_name,
        toofr_confidence,
        company_url,
        source,
    ]
    toofr_data.to_sql("investor_toofr_data", con=conn, flavor="mysql", if_exists="append", index=False)

    query = """
    select td.id,td.toofr_email as email
    from investor_toofr_data td
    having td.id =  (select max(id)
                     from investor_toofr_data);"""

    toofr_data = psql.read_frame(query, conn)
    toofr_data_dict = {}
    toofr_data_dict = toofr_data.set_index("id").to_dict()
    toofr_data_dict = toofr_data_dict["email"]

    for key, value in toofr_data_dict.items():
        fc = FullContact("76152464a239f71c")
        print key, value
        person_profile = fc.get(email=value)

        if person_profile["status"] == 200:
            rep_gender = None
            rep_location = None
            rep_klout_score = None
            rep_klout_topic = None
            rep_facebook_url = None
            rep_facebook_followers = None
            rep_facebook_following = None
            rep_linkedin_url = None
            rep_twitter_url = None
            rep_twitter_followers = None
            rep_twitter_following = None
            rep_angellist_url = None
            rep_angellist_followers = None

            try:
                rep_gender = person_profile["demographics"]["gender"]
            except:
                print "gender_missing"
            try:
                rep_location = person_profile["demographics"]["locationGeneral"]
            except:
                print "location_missing"

            try:
                rep_klout_score = person_profile["digitalFootprint"]["scores"][0]["value"]
            except:
                print "klout score missing"
            try:
                rep_klout_topic = person_profile["digitalFootprint"]["topics"][0]["value"]
            except:
                print "klout topic missing"

            try:
                rep_social_profiles = person_profile["socialProfiles"]
                if len(rep_social_profiles) > 0:
                    for i in xrange(0, len(rep_social_profiles)):
                        if rep_social_profiles[i]["typeName"] == "Facebook":
                            try:
                                rep_facebook_url = rep_social_profiles[i]["url"]
                            except:
                                print "facebook url missing"
                            try:

                                rep_facebook_followers = rep_social_profiles[i]["followers"]
                            except:
                                print "facebook followers missing"
                            try:
                                rep_facebook_following = rep_social_profiles[i]["following"]
                            except:
                                print "facebook following missing"
                        if rep_social_profiles[i]["typeName"] == "LinkedIn":
                            try:
                                rep_linkedin_url = rep_social_profiles[i]["url"]
                            except:
                                print "linkedin url missing"
                        if rep_social_profiles[i]["typeName"] == "Twitter":
                            try:
                                rep_twitter_url = rep_social_profiles[i]["url"]
                            except:
                                print "twitter url missing"
                            try:

                                rep_twitter_followers = rep_social_profiles[i]["followers"]
                            except:
                                print "twitter followers missing"
                            try:

                                rep_twitter_following = rep_social_profiles[i]["following"]
                            except:
                                print "twitter following missing"
                        if rep_social_profiles[i]["typeName"] == "AngelList":
                            try:

                                rep_angellist_url = rep_social_profiles[i]["url"]
                            except:
                                print "angel list url missing"
                            try:

                                rep_angellist_followers = rep_social_profiles[i]["followers"]
                            except:
                                print "angel list followers missing"
            except:
                print "no social profile found"

            data = pd.DataFrame(
                columns=(
                    "toofr_id",
                    "rep_gender",
                    "rep_location",
                    "rep_klout_score",
                    "rep_klout_topic",
                    "rep_facebook_url",
                    "rep_facebook_followers",
                    "rep_facebook_following",
                    "rep_linkedin_url",
                    "rep_twitter_url",
                    "rep_twitter_followers",
                    "rep_twitter_following",
                    "rep_angellist_url",
                    "rep_angellist_followers",
                )
            )

            existing_records_final_links = len(data)
            data.loc[existing_records_final_links] = [
                key,
                rep_gender,
                rep_location,
                rep_klout_score,
                rep_klout_topic,
                rep_facebook_url,
                rep_facebook_followers,
                rep_facebook_following,
                rep_linkedin_url,
                rep_twitter_url,
                rep_twitter_followers,
                rep_twitter_following,
                rep_angellist_url,
                rep_angellist_followers,
            ]
            data = data.where(pd.notnull(data), None)
            data.to_sql("fullcontact", con=conn, flavor="mysql", if_exists="append", index=False)
    return render_template("form_action.html", firstName=first_name, lastName=last_name)
示例#17
0
 def test_adds_endpoint_methods(self):
     fc = FullContact('')
     for endpoint in fc.get_endpoints:
         assert_true(isinstance(getattr(fc, endpoint), FunctionType))
示例#18
0
 def __init__(self):
     super().__init__()
     self.fc = FullContact(self.THE_KEY)
示例#19
0
    def __init__(self):

        self.fc = FullContact('ab76dbb1c4b8c50f')
示例#20
0
from fullcontact import FullContact
import json
from attest import Tests, assert_hook

fc_tests = Tests()
api_key = raw_input("Please enter an API key for FullContact: ")
fc = FullContact(api_key)
test_email = "*****@*****.**"
test_twitter = "garbados"


@fc_tests.test
def bad_key():
    test = fc.get(email=test_email, apiKey='this is a bad api key')
    assert test['status'] == 403


@fc_tests.test
def bad_params():
    test = fc.get()
    assert test['status'] == 422


@fc_tests.test
def good_param():
    test = fc.get(email=test_email)
    assert test['status'] == 200


@fc_tests.test
def many_params():
示例#21
0
def hello():
    first_name = request.form['firstName']
    last_name = request.form['lastName']
    company_url = request.form['companyUrl']
    source = "Ad Hoc"
    api_key = "ddb2740f8d2338c78497519c13cc7076"
    params = {
        'key': api_key,
        'domain': company_url,
        'first': first_name,
        'last': last_name
    }

    toofr_url = "http://toofr.com/api/guess?"
    test = requests.get(toofr_url, params=params)
    toofr_data = test.json()

    try:
        toofr_email = toofr_data['response']['email']
    except:
        toofr_email = None
    try:
        toofr_confidence = toofr_data['response']['confidence']
    except:
        toofr_confidence = None
    toofr_data = pd.DataFrame(columns=('toofr_email', 'first_name',
                                       'last_name', 'toofr_confidence',
                                       'company_url', 'source'))

    existing_records_final_links = len(toofr_data)
    toofr_data.loc[existing_records_final_links] = [
        toofr_email, first_name, last_name, toofr_confidence, company_url,
        source
    ]
    toofr_data.to_sql("investor_toofr_data",
                      con=conn,
                      flavor='mysql',
                      if_exists='append',
                      index=False)

    query = '''
    select td.id,td.toofr_email as email
    from investor_toofr_data td
    having td.id =  (select max(id)
                     from investor_toofr_data);'''

    toofr_data = psql.read_frame(query, conn)
    toofr_data_dict = {}
    toofr_data_dict = toofr_data.set_index('id').to_dict()
    toofr_data_dict = toofr_data_dict['email']

    for key, value in toofr_data_dict.items():
        fc = FullContact('76152464a239f71c')
        print key, value
        person_profile = fc.get(email=value)

        if person_profile['status'] == 200:
            rep_gender = None
            rep_location = None
            rep_klout_score = None
            rep_klout_topic = None
            rep_facebook_url = None
            rep_facebook_followers = None
            rep_facebook_following = None
            rep_linkedin_url = None
            rep_twitter_url = None
            rep_twitter_followers = None
            rep_twitter_following = None
            rep_angellist_url = None
            rep_angellist_followers = None

            try:
                rep_gender = person_profile['demographics']['gender']
            except:
                print 'gender_missing'
            try:
                rep_location = person_profile['demographics'][
                    'locationGeneral']
            except:
                print 'location_missing'

            try:
                rep_klout_score = person_profile['digitalFootprint']['scores'][
                    0]['value']
            except:
                print 'klout score missing'
            try:
                rep_klout_topic = person_profile['digitalFootprint']['topics'][
                    0]['value']
            except:
                print 'klout topic missing'

            try:
                rep_social_profiles = person_profile['socialProfiles']
                if len(rep_social_profiles) > 0:
                    for i in xrange(0, len(rep_social_profiles)):
                        if rep_social_profiles[i]['typeName'] == 'Facebook':
                            try:
                                rep_facebook_url = rep_social_profiles[i][
                                    'url']
                            except:
                                print 'facebook url missing'
                            try:

                                rep_facebook_followers = rep_social_profiles[
                                    i]['followers']
                            except:
                                print 'facebook followers missing'
                            try:
                                rep_facebook_following = rep_social_profiles[
                                    i]['following']
                            except:
                                print 'facebook following missing'
                        if rep_social_profiles[i]['typeName'] == 'LinkedIn':
                            try:
                                rep_linkedin_url = rep_social_profiles[i][
                                    'url']
                            except:
                                print 'linkedin url missing'
                        if rep_social_profiles[i]['typeName'] == 'Twitter':
                            try:
                                rep_twitter_url = rep_social_profiles[i]['url']
                            except:
                                print 'twitter url missing'
                            try:

                                rep_twitter_followers = rep_social_profiles[i][
                                    'followers']
                            except:
                                print 'twitter followers missing'
                            try:

                                rep_twitter_following = rep_social_profiles[i][
                                    'following']
                            except:
                                print 'twitter following missing'
                        if rep_social_profiles[i]['typeName'] == 'AngelList':
                            try:

                                rep_angellist_url = rep_social_profiles[i][
                                    'url']
                            except:
                                print 'angel list url missing'
                            try:

                                rep_angellist_followers = rep_social_profiles[
                                    i]['followers']
                            except:
                                print 'angel list followers missing'
            except:
                print 'no social profile found'

            data = pd.DataFrame(
                columns=('toofr_id', 'rep_gender', 'rep_location',
                         'rep_klout_score', 'rep_klout_topic',
                         'rep_facebook_url', 'rep_facebook_followers',
                         'rep_facebook_following', 'rep_linkedin_url',
                         'rep_twitter_url', 'rep_twitter_followers',
                         'rep_twitter_following', 'rep_angellist_url',
                         'rep_angellist_followers'))

            existing_records_final_links = len(data)
            data.loc[existing_records_final_links] = [
                key, rep_gender, rep_location, rep_klout_score,
                rep_klout_topic, rep_facebook_url, rep_facebook_followers,
                rep_facebook_following, rep_linkedin_url, rep_twitter_url,
                rep_twitter_followers, rep_twitter_following,
                rep_angellist_url, rep_angellist_followers
            ]
            data = data.where(pd.notnull(data), None)
            data.to_sql('fullcontact',
                        con=conn,
                        flavor='mysql',
                        if_exists='append',
                        index=False)
    return render_template('form_action.html',
                           firstName=first_name,
                           lastName=last_name)
 def test_init(self):
     fc = FullContact('')
     assert_equal(fc.api_key, '')
示例#23
0
class FullContactEngager(Engager):

    #AYDRATE_KEY = "13edcff433f0c479"  # Aydrate
    #ACURATE_TRIAL_KEY = "2e7c73db16e677f8"  # AcureRate - Trial
    ACURATE_PRODUCTION_KEY = "401739667f580b02"  # AcureRate - Production

    THE_KEY = ACURATE_PRODUCTION_KEY  # Correct Key

    def __init__(self):
        super().__init__()
        self.fc = FullContact(self.THE_KEY)

    def __str__(self):
        return 'FullContact Engager'

    def __repr__(self):
        return 'FullContact Engager'

    def get_provider_name(self):
        return 'FullContact'

    def get_short_symbol(self):
        return 'fc'

    def get_api_key(self):
        return FullContactEngager.THE_KEY

    def set_enrich_key(self):
        t = self.enriched_entity.__class__.__name__
        if t == 'AcureRatePerson':
            email = self.get_pivot_email()
            if email is None:
                raise EngagementException(
                    "FullContacts - cannot engage. No email available as enrich key"
                )
            self.enrich_key = email
        elif t == 'AcureRateCompany':
            if C.DOMAIN not in self.enriched_entity.deduced:
                raise EngagementException(
                    "FullContacts - cannot engage - no domain property to use as key"
                )
            self.enrich_key = self.enriched_entity.deduced.get(C.DOMAIN)
        else:
            raise EngagementException(
                "FullContacts - cannot engage - cannot generate enrich key. Unknown entity type"
            )

    def enrich_person(self):

        result_obj = self._get_person_info()

        self.set_data("score", result_obj['likelihood'])

        contact_info = result_obj.get('contactInfo', None)
        if contact_info:
            if 'givenName' in contact_info:
                self.set_data(P.FIRST_NAME, contact_info['givenName'])
            if 'familyName' in contact_info:
                self.set_data(P.LAST_NAME, contact_info['familyName'])

        demographics = result_obj.get('demographics', None)
        if demographics:
            gender = demographics.get('gender', None)
            if gender:
                self.add_data(P.GENDER, gender.lower())
            loc = demographics.get('locationGeneral', None)
            if loc:
                self.add_data(P.LOCATIONS, loc)

        photos = result_obj.get('photos', None)
        if photos:
            for photo in photos:
                new_photo = {}
                m = {"url": P.PHOTO_URL, "typeName": P.PHOTO_SOURCE}
                AcureRateUtils.dict2dict(photo, new_photo, m)
                self.add_data(P.PHOTOS, new_photo)

        organizations = result_obj.get('organizations', None)
        if organizations:
            for org in organizations:
                new_job = {}
                m = {
                    "name": P.JOB_NAME,
                    "title": P.JOB_TITLE,
                    "current": P.JOB_CURRENT,
                    "isPrimary": P.JOB_PRIMARY
                }
                AcureRateUtils.dict2dict(org, new_job, m)
                # If there are start/end dates, grab them (year only - drop the month)
                if 'startDate' in org:
                    new_job[P.JOB_STARTED] = org['startDate'][0:4]
                if 'endDate' in org:
                    new_job[P.JOB_ENDED] = org['endDate'][0:4]
                self.add_data(P.JOBS, new_job)

        social_profiles = result_obj.get('socialProfiles', None)
        if social_profiles:
            for social_profile in social_profiles:
                if social_profile.get('typeName', '') == 'Twitter':
                    self.set_data(P.TWITTER_URL, social_profile['url'])
                elif social_profile.get('typeName', '') == 'LinkedIn':
                    self.set_data(P.LINKEDIN_URL, social_profile['url'])
                elif social_profile.get('typeName', '') == 'GooglePlus':
                    self.set_data(P.GOOGLEPLUS_URL, social_profile['url'])
                elif social_profile.get('typeName', '') == 'Facebook':
                    self.set_data(P.FACEBOOK_URL, social_profile['url'])
                elif social_profile.get('typeName', '') == 'Gravatar':
                    self.set_data(P.GRAVATAR_URL, social_profile['url'])
                elif social_profile.get('typeName', '') == 'Foursquare':
                    self.set_data(P.FOURSQUARE_URL, social_profile['url'])
                elif social_profile.get('typeName', '') == 'Pinterest':
                    self.set_data(P.PINTEREST_URL, social_profile['url'])
                elif social_profile.get('typeName', '') == 'Klout':
                    self.set_data(P.KLOUT_URL, social_profile['url'])
                elif social_profile.get('typeName', '') == 'AngelList':
                    self.set_data(P.ANGELLIST_URL, social_profile['url'])
                else:
                    print('Something else...')

        # TODO: add all other attributes received from FullContact

        return [P.JOBS]

    def enrich_company(self):

        domain = self.enriched_entity.deduced.get(C.DOMAIN, None)
        if domain is None:
            return []

        result_obj = self._get_company_info(domain)

        # Keep the logo url and website
        if 'logo' in result_obj:
            self.add_data(C.LOGOS, {
                C.LOGO_URL: result_obj['logo'],
                C.LOGO_SOURCE: 'fullcontact'
            })

        if 'website' in result_obj:
            self.set_data(C.WEBSITE, result_obj['website'])

        # Keep the founding year
        if 'founded' in result_obj['organization']:
            self.set_data(C.FOUNDING_YEAR,
                          result_obj['organization']['founded'])

        # Approximate Employees
        if 'approxEmployees' in result_obj['organization']:
            self.set_data(C.EMPLOYEES_NUMBER,
                          result_obj['organization']['approxEmployees'])

        # Keep keywords
        if 'keywords' in result_obj['organization']:
            self.set_data(C.KEYWORDS, result_obj['organization']['keywords'])

        # Keep name
        if 'name' in result_obj['organization']:
            self.set_data(C.NAME, result_obj['organization']['name'])

        # Keep social profiles URL
        # TODO: keep other social profiles...
        for profile in result_obj.get('socialProfiles', []):
            if profile['typeId'] == 'crunchbasecompany':
                self.set_data(C.CRUNCHBASE_URL, profile['url'])

        return [C.NAME]

    def _handle_fc_api_errors(self, response):
        if response.status_code == 200:  # All is ok.
            return
        # Handle different errors. Documentation - https://www.fullcontact.com/developer/docs/
        if response.status_code == 403:  # Quota exceeded - need special treatment
            raise EngagementException("403. Quota Exceeded.", True)
        elif response.status_code == 405 or response.status_code == 410 or response.status_code == 422:
            raise EngagementException(
                "%s. Invalid request sent to FC %s" %
                (response.status_code, response.text), True)
        elif response.status_code == 404:
            raise EngagementException(
                "404. Searched in the past 24 hours and nothing was found: %s"
                % response.text)
        elif response.status_code == 500 or response.status_code == 503:
            raise EngagementException(
                "%s. Transient errors in FC server. Possible maintenance/downtime. %s"
                % (response.status_code, response.text), True)
        elif response.status_code == 202:  # being processed...
            raise EngagementException(
                "202. Did not get info. Request is being processed. Return later."
            )
        else:
            raise EngagementException(
                "%s. Unknown error: %s" %
                (response.status_code, response.text), True)

    def _get_person_info(self):

        try:
            response = self.fc.api_get('person', **{'email': self.enrich_key})
            if hasattr(response, 'from_cache'):
                self.set_data("from_cache", response.from_cache)
            self._handle_fc_api_errors(response)
            # TODO: check if we can inspect the header and see our limit remaining...
            #r.headers['x-rate-limit-remaining']
        except EngagementException as e:
            raise e
        except Exception as e:
            raise EngagementException(e, True)

        json = response.json()
        return json

    def _get_company_info(self, domain):
        try:
            response = self.fc.api_get('company', **{'domain': domain})
            if hasattr(response, 'from_cache'):
                self.set_data("from_cache", response.from_cache)
            self._handle_fc_api_errors(response)
        except EngagementException as e:
            raise e
        except Exception as e:
            raise EngagementException(e, True)

        json = response.json()
        return json
示例#24
0
from fullcontact import FullContact
from config import *
import json
fc=FullContact(fullcontact_api_key)


def fetchData(email_id):
    person = fc.person(email=email_id)
    data = person.json()
    try:
        print "Personal Information :: "
        print "Full Name :: "+ data['contactInfo']['fullName']
        print "Given Name :: "+ data['contactInfo']['givenName']
        print "Gender :: "+ str(data['demographics']['gender'])
        print "Website :: "+ str(data['contactInfo']['websites'])
        print "Full Address :: "+ str(data['demographics']['locationDeduced']['normalizedLocation'])
        print "City :: "+ str(data['demographics']['locationDeduced']['city']['name'])
        print "State :: "+ str(data['demographics']['locationDeduced']['state']['name'])
        print "Country :: "+ str(data['demographics']['locationDeduced']['country']['name'])
        print "\n"
    except:
        print "Unavailable"


    try:
        print "Employment Detail :: "
        for org in data['organizations']:
            print "Organization::"+org['name']+" "+"Start date::"+" "+org['startDate']+" "+"Job Title::"+" "+org['title']

    except:
        print " "
示例#25
0
from fullcontact import FullContact


import urllib.request, json


import pyodbc
server = 'dbserveranasight.database.windows.net'
database = 'warehouse'
username = '******'
password = '******'
cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password)
cursor = cnxn.cursor()


fc = FullContact('0DHVXQOcAgZzZbUPTeTQ83AakHlzIE8L')
APIKey='0DHVXQOcAgZzZbUPTeTQ83AakHlzIE8L'
app = Flask(__name__)

#************ Get Contact details By Email *******
@app.route("/person_enrich", methods=['POST','GET'])
def person_enrich():
    if request.method == 'POST':
        jsonData = request.get_json(force=True)
        email1 =jsonData['email']
        if email1 == "" :
            return jsonify({'type': 'validation','message': 'Email is required','status': 0})
            sys.exit()

        if  email1 !="":
            headers = {
示例#26
0
    def _company_info(self, company_name, api_key=""):
        #TODO - company_name = self._remove_non_ascii(company_name) add to save
        qry = {
            'where': json.dumps({'company_name': company_name}),
            'limit': 1000
        }
        qry['order'] = '-createdAt'
        crawls = Parse().get('CompanyInfoCrawl', qry).json()['results']

        if not crawls:
            # start crawls
            return company_name
        crawls = self._source_score(pd.DataFrame(crawls))
        crawls = self._logo_score(crawls)
        #crawls = crawls[crawls.api_key == api_key]
        crawls['name_score'] = [
            fuzz.token_sort_ratio(row['name'], row.company_name)
            for index, row in crawls.iterrows()
        ]
        crawls = crawls[crawls.name_score > 70].append(
            crawls[crawls.name.isnull()])
        logo = crawls.sort("logo_score", ascending=False)

        #logo=logo[(logo.logo != "") & (logo.logo.notnull())][["source","logo"]]
        logo = logo[(logo.logo != "") & (logo.logo.notnull())].logo.tolist()
        logo = logo[0] if logo else ""

        #crawls = crawls[["press", 'source_score', 'source', 'createdAt', 'domain']]
        final = {}
        #print crawls.press.dropna()
        for col in crawls.columns:
            if col in ['source_score', 'source', 'createdAt']: continue
            df = crawls[[col, 'source_score', 'source', 'createdAt']]
            if df[col].dropna().empty: continue
            if type(list(df[col].dropna())[0]) == list:
                df[col] = df[col].dropna().apply(tuple)
            try:
                df = df[df[col] != ""]
            except:
                "lol"
            try:
                df = df[df[col].notnull()]
                df = [
                    source[1].sort('createdAt').drop_duplicates(col, True)
                    for source in df.groupby(col)
                ]
                df = [_df for _df in df if _df is not None]
                df = [pd.DataFrame(
                    columns=['source_score', col])] if len(df) is 0 else df
                df = pd.concat(df).sort('source_score')[col]
                if list(df): final[col] = list(df)[-1]
            except:
                "lol"

        if 'industry' in final.keys():
            try:
                final['industry'] = final['industry'][0]
            except:
                final["industry"] = ""

        try:
            final['industry_keywords'] = list(
                set(crawls.industry.dropna().sum()))
        except:
            final['industry_keywords'] = []

        if 'address' in final.keys():
            final['address'] = FullContact()._normalize_location(
                final['address'])
        try:
            final['handles'] = crawls[['source', 'handle']].dropna()
            final['handles'] = final['handles'].drop_duplicates().to_dict('r')
        except:
            "lol"

        tmp = crawls[['source', 'logo']].dropna()
        #print tmp
        #print "THE LOGO", logo
        final["logo"] = logo
        final['logos'] = tmp.drop_duplicates().to_dict('r')

        try:
            tmp = crawls[['source', 'phone']].dropna()
            final['phones'] = tmp.drop_duplicates().to_dict('r')
        except:
            """ """
        # TODO - if company_name exists update
        # TODO - find if domain exists under different company_name then update
        final = self._prettify_fields(final)
        if "name_score" in final.keys(): del final["name_score"]
        #print json.dumps(final)
        self._add_to_clearspark_db('Company', 'company_name', company_name,
                                   final)

        # TODO - find main domain from domain -> ie canon.ca should be canon.com
        # clean data - ie titleify fields, and lowercase domain
        # TODO - start a domain search with the deduced domain and the company_name
        #print "RQUEUE CHECK"
        if "domain" in final.keys():
            domain = final["domain"]
        '''
        if len(RQueue()._results("{0}_{1}".format(company_name, api_key))) == 1:
            q.enqueue(Companies()._domain_research, domain, api_key, company_name)
            q.enqueue(Companies()._secondary_research, company_name, domain, api_key)
        '''

        if RQueue()._has_completed("{0}_{1}".format(company_name, api_key)):
            #q.enqueue(Companies()._domain_research, domain, api_key, company_name)
            #q.enqueue(Companies()._secondary_research, company_name, domain, api_key)

            print "WEBHOOK <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
            if "company_name" in final.keys():
                Webhook()._update_company_info(final)
            '''
            job = q.enqueue(EmailGuess().search_sources, final["domain"],api_key,"")
            job.meta["{0}_{1}".format(company_name, api_key)] = True
            job.save()
            for domain in crawls.domain.dropna().drop_duplicates():
                job = q.enqueue(EmailGuess().search_sources, domain, api_key, "")
                RQueue()._meta(job, "{0}_{1}".format(company_name, api_key))
            '''
        return final