示例#1
0
    def linkedin(self):
        url = self.data['sites']['linkedin.com']['url']
        nickname = self.data['sites']['linkedin.com']['nickname']

        login = os.environ.get('LINKEDIN_LOGIN')
        password = os.environ.get('LINKEDIN_PASSWORD')
        api = Linkedin(login, password)

        result = {}

        profile = api.get_profile(nickname)
        result.update(profile)

        contact = api.get_profile_contact_info(nickname)
        result.update(contact)

        network = api.get_profile_network_info(nickname)
        result.update(network)

        skills = api.get_profile_skills(nickname)
        result.update({'skills': skills})

        result.update({'url': url})

        return result
示例#2
0
class LinkedinEngine:
    def __init__(self):
        self.api = Linkedin(LINKEDIN_USER_NAME, LINKEDIN_PASSWORD)

    def search_company(self, company_name):
        return self.api.search_people(keywords=company_name)

    def get_profile(self, user_name):
        time.sleep(random.randint(0, 3))
        user_name = url_parse(user_name)
        get_contact_dict = self.api.get_profile_contact_info(user_name)
        get_profile_dict = self.api.get_profile(user_name)
        return toolz.merge(get_contact_dict, get_profile_dict)
示例#3
0
import json
from linkedin_api import Linkedin

with open('credentials.json', 'r') as f:
    credentials = json.load(f)

if credentials:
    linkedin = Linkedin(credentials['username'], credentials['password'])

    profile = linkedin.get_profile('ACoAABQ11fIBQLGQbB1V1XPBZJsRwfK5r1U2Rzw')
    profile['contact_info'] = \
        linkedin.get_profile_contact_info('ACoAABQ11fIBQLGQbB1V1XPBZJsRwfK5r1U2Rzw')
    connections = linkedin.get_profile_connections(profile['profile_id'])
示例#4
0
def linkedinextract(id):
    url11 = request.args.get('url11')
    anna = request.args.get('aname')
    anem = request.args.get('aemail')
    anad = request.args.get('aaddress')
    anph = request.args.get('aphone')
    anni = request.args.get('anic')
    skills1 = request.args.get('dskills')
    name1 = request.args.get('dname')
    projects1 = request.args.get('dproject')
    degree1 = request.args.get('ddeg')
    univercity1 = request.args.get('duni')
    experience11 = request.args.get('dexp')
    phone1 = request.args.get('dmobile')
    address1 = request.args.get('daddress')
    nic1 = request.args.get('daddress')

    print(url11)
    lname = []
    lskills = []
    lexperience = []
    ldegree = []
    luniversity = []
    lemail1 = []
    lmobile = []
    # try:
    url11 = 'ravindu-landekumbura-19950214'
    linkedin1 = Linkedin('*****@*****.**', 'net@telecom')
    linkprofile = linkedin1.get_profile(url11)
    print(linkprofile)

    contact = linkedin1.get_profile_contact_info(url11)
    print(contact)
    lname = []
    lname1o = linkprofile['firstName']
    lname.append(json.dumps(lname1o))

    # lname="Ravindu landekumbura"

    lskills = []
    skills = (linkprofile['skills'])
    for skill in skills:
        z = skill['name']
        ls = json.dumps(z)
        lskills.append(ls)

    lexperience = []

    experience1 = (linkprofile['experience'])
    for ex in experience1:
        z = ex['companyName'], ex['title']
        d = json.dumps(z)
        lexperience.append(d)

    university1 = (linkprofile['education'])
    print(university1)
    luniversity = []
    ldegree = []
    for sch in university1:
        school = sch['school']
        print(school)

        nm = school['schoolName']
        luniversity.append(nm)
        dm = sch['degreeName']
        ldegree.append(dm)
    lluniversity = json.dumps(luniversity)
    lldegree = json.dumps(ldegree)
    lexperience1 = json.dumps(experience1)
    llskills = json.dumps(lskills)
    lemail1 = json.dumps(contact['email_address'])
    lmobile = json.dumps(contact['phone_numbers'])

    # except:
    print('cannot connect')

    mycursor.execute("SELECT email from user where id=%s;", [id])
    rows = mycursor.fetchall()
    for ele in rows:
        email1 = json.dumps(ele[0]).replace('[]', "")

    if request.method == 'POST':
        uid = id
        uemail = request.form['uemail']
        upassword = request.form['upassword']

        sql2 = "INSERT INTO cv_reg (id,email,password) VALUES (%s, %s, %s)"
        val = (uid, uemail, upassword)
        mycursor.execute(sql2, val)
        mydb.commit()
        return redirect(
            url_for('clogin', id1=uid, nameu=uemail, passu=upassword))

    return render_template('linkedin.html',
                           urlx=url11,
                           skillsx=skills1,
                           namex=name1,
                           emailx=email1,
                           projectsx=projects1,
                           degreesx=degree1,
                           universityx=univercity1,
                           experiencex=experience11,
                           mobilex=phone1,
                           addressx=address1,
                           linkedinx=url11,
                           nicx=nic1,
                           namexx=lname,
                           skillsxx=llskills,
                           experiencexx=lexperience,
                           unversityxx=lluniversity,
                           degreexx=lldegree,
                           emailxx=lemail1,
                           mobilexx=lmobile,
                           aname=anna,
                           aemail=anem,
                           aaddress=anad,
                           aphone=anph,
                           anic=anni,
                           ski=lskills,
                           len=len(lskills))
示例#5
0
import sys
import json
from traitlets import link
import os
from linkedin_api import Linkedin
import pandas as pd
import time
linkedin_api = Linkedin("*****@*****.**","password", refresh_cookies=True, debug=True)
comapany = linkedin_api.get_company(public_id="google")
comapanyid = int(comapany['url'].split('/')[len(comapany['url'].split('/')) - 1])
results = linkedin_api.search_people1(start=0,limit=10,current_company=comapanyid,regions="us:49",keywords="Software Engineer")
print(len(results))

search_results = pd.DataFrame()
for result in results:
        contact_info = linkedin_api.get_profile_contact_info(public_id=result['public_id'])
        profile = linkedin_api.get_profile(urn_id=result['urn_id'])
        data_firstname = profile['firstName']
        data_lastname = profile['lastName']
        data_url = "https://www.linkedin.com/in/%s" % \
                   result['public_id']
        data_location = profile['locationName']   if "locationName" in profile else " "
        data_country = profile['geoCountryName']   if "geoCountryName" in profile else " "
        data_jobpost = profile['headline']   if "headline" in profile else " "
        data_exp = ""
        for exp in profile['experience']:
            data_exp += "["
            data_exp += exp['locationName']  + "|" if "locationName" in exp else " "
            data_exp += exp['companyName'] + "|" if "companyName" in exp else " "
            data_exp += str(exp['timePeriod']['startDate']['month']) + " " if "timePeriod" in exp and "startDate" in exp['timePeriod'] and "month" in exp['timePeriod']['startDate']  else " "
            data_exp += str(exp['timePeriod']['startDate']['year']) + "|" if "timePeriod" in exp and "startDate" in exp['timePeriod'] and "year" in exp['timePeriod']['startDate']  else " "
示例#6
0
import urllib
import requests
import json
from linkedin_api import Linkedin

if __name__ == '__main__':

    api = Linkedin('*****@*****.**', 'DelhiBelly11#Snatch00')
    profile = api.get_profile('venkata-ratnadeep-suri')
    contact_info = api.get_profile_contact_info('venkata-ratnadeep-suri')
    connections = api.get_profile_connections('venkata-ratnadeep-suri')

    with open('venkata-ratnadeep-suri.txt', 'w') as p:
        json.dump(profile, p)

    with open('venkata-ratnadeep-suri', 'w') as c:
        json.dump(contact_info, c)

    with open('venkata-ratnadeep-suri', 'w') as connect:
        json.dump(connections, connect)
示例#7
0
import json
from linkedin_api import Linkedin

with open("credentials.json", "r") as f:
    credentials = json.load(f)

if credentials:
    linkedin = Linkedin(credentials["username"], credentials["password"])

    profile = linkedin.get_profile("ACoAABQ11fIBQLGQbB1V1XPBZJsRwfK5r1U2Rzw")
    profile["contact_info"] = linkedin.get_profile_contact_info(
        "ACoAABQ11fIBQLGQbB1V1XPBZJsRwfK5r1U2Rzw")
    connections = linkedin.get_profile_connections(profile["profile_id"])
示例#8
0
class Session:
    def __init__(self):
        self.version = '1.3.1'
        self.username = None
        self.password = None
        self.authenticated = False

        # sheet properties
        self.sheet_path = None
        self.sheet_type = None
        self.default_sheet_type = 'excel'

        # keep track of parse counts in memory
        self.total_parsed = 0
        self.parsed = 0

        # additional options
        self.log_filename = 'liscrape-log.log'
        self.ignore_duplicates = False
        self.debug = False

        # gui
        self.gui = GUI(self)

        # history, load validity
        self.history = History(self)
        self.history.history = self.history.load()
        self.history.check_validity()

    def start_log(self):
        logging.basicConfig(filename=self.log_filename,
                            level=logging.DEBUG,
                            format='%(asctime)s %(message)s',
                            datefmt='%d/%m/%Y %H:%M:%S')

    def get_log_length(self):
        if not os.path.isfile(self.log_filename):
            return 0

        with open(self.log_filename, 'r') as log_file:
            return sum(1 for row in log_file)

    def load_log(self):
        if self.get_log_length() == 0:
            return '-- Log is empty --\n'

        with open(session.log_filename, 'r') as log_file:
            return log_file.read()

    def clear_log(self):
        try:
            logging.shutdown()
        except Exception as e:
            sg.popup(traceback.format_exc())
            logging.exception(f'Exception attempting to shutdown logging: {e}')
            return

        if os.path.isfile(self.log_filename):
            os.remove(self.log_filename)
            sg.popup(f'Log file {self.log_filename} successfully removed!')

            # restart log, refresh log length
            self.start_log()
            self.gui.window['log_length'].update(
                f'Log file length: {self.get_log_length()} lines')
            self.gui.window['output_window'].update(self.load_log())
        else:
            sg.popup('Nothing to remove!')

    def remove_contacts(self):
        if os.path.isfile('linkedin_scrape.xlsx'):
            os.remove('linkedin_scrape.xlsx')
            sg.popup('Contacts file linkedin_scrape.xlsx removed!')

        if os.path.isfile('linkedin_scrape.csv'):
            os.remove('linkedin_scrape.csv')
            sg.popup('Contacts file linkedin_scrape.xlsx csv!')

    def clear_config(self):
        self.history.history = {}
        if os.path.isfile('config.json'):
            with open('config.json', 'r') as config_file:
                config = json.load(config_file)
                users = config['users']

            with open('config.json', 'w') as config_file:
                config = {'users': users, 'history': {}, 'theme': None}
                json.dump(config, config_file, indent=4)
                sg.popup('Configuration file cleared!')

    def load_sheet_length(self):
        if not os.path.isfile(self.sheet_path):
            logging.info(
                f'Sheet {self.sheet_path} does not exist: returning total_parsed=0'
            )
            self.total_parsed = 0
        else:
            logging.info(f'Sheet {self.sheet_path} exists: getting length.')
            if self.sheet_type == 'csv':
                with open(self.sheet_path, 'r') as csv_file:
                    csv_reader = csv.reader(csv_file)
                    self.total_parsed = sum(1 for row in csv_reader)
            elif self.sheet_type == 'excel':
                df = pd.read_excel(self.sheet_path)
                self.total_parsed = len(df.index)

        return self.total_parsed

    def load_configuration(self):
        if not os.path.isfile('config.json'):
            return []

        with open('config.json', 'r') as config_file:
            try:
                config = json.load(config_file)
            except Exception as error:
                logging.exception(error)
                os.remove('config.json')
                return ()

            return tuple(config['users'].keys()) if len(
                config['users'].keys()) > 0 else ()

    def load_theme(self):
        if not os.path.isfile('config.json'):
            return 'SystemDefault'

        with open('config.json', 'r') as config_file:
            try:
                config = json.load(config_file)
            except Exception as error:
                logging.exception(error)
                os.remove('config.json')
                return ()

            try:
                return config[
                    'theme'] if config['theme'] != None else 'SystemDefault'
            except KeyError:
                with open('config.json', 'r') as config_file:
                    config = json.load(config_file)

                config['theme'] = None
                with open('config.json', 'w') as config_file:
                    json.dump(config, config_file, indent=4)

                return self.load_theme()

    def save_theme(self, theme):
        if not os.path.isfile('config.json'):
            with open('config.json', 'w') as config_file:
                config = {'users': {}, 'history': {}, 'theme': None}
        else:
            with open('config.json', 'r') as config_file:
                config = json.load(config_file)

        config['theme'] = theme
        with open('config.json', 'w') as config_file:
            json.dump(config, config_file, indent=4)

        return True

    def load_password_from_config(self, username):
        with open('config.json', 'r') as config_file:
            config = json.load(config_file)

        try:
            return config['users'][username]
        except:
            sg.popup('Error finding password from configuration!',
                     title='Error',
                     keep_on_top=True)
            raise Exception('Error finding password from configuration!')

    def store_login(self, username, password):
        if not os.path.isfile('config.json'):
            with open('config.json', 'w') as config_file:
                config = {'users': {}, 'history': {}, 'theme': None}
        else:
            with open('config.json', 'r') as config_file:
                config = json.load(config_file)

        config['users'][username] = password
        with open('config.json', 'w') as config_file:
            json.dump(config, config_file, indent=4)

        return True

    def sign_in(self, username, password, remember_login, refresh_cookies):
        self.username = username
        self.password = password
        auth_success = self.authenticate(refresh_cookies)

        if self.authenticated and remember_login:
            success = self.store_login(username, password)
            if success:
                print('Login stored into config file successfully!')

        return auth_success

    def authenticate(self, refresh_cookies):
        try:
            self.application = Linkedin(self.username,
                                        self.password,
                                        debug=True,
                                        refresh_cookies=refresh_cookies)
            self.authenticated = True
            return True
        except Exception as error:
            logging.exception(error)
            if 'BAD_EMAIL' in error.args:
                sg.popup('Incorrect email: try again.',
                         title='Incorrect email',
                         keep_on_top=True)
            elif 'CHALLENGE' in error.args:
                sg.popup('Error: LinkedIn requires a sign-in challenge.',
                         title='Linkedin error',
                         keep_on_top=True)
            elif 'Expecting value: line 1 column 1 (char 0)' in error.args:
                sg.popup(
                    'Linkedin is refusing to sign in. Please try again later.',
                    title='Unable to sign in',
                    keep_on_top=True)
            else:
                sg.popup(
                    f'Error arguments: {error.args}\n{traceback.format_exc()}',
                    title='Unhandled exception',
                    keep_on_top=True)

            return False

    # perform the API calls
    def linkedin_api_call(self, queue, event):
        while not event.is_set() or not queue.empty():
            profile_url = queue.get()
            if not self.debug:
                try:
                    # two API requests: profile and contact info
                    profile = self.application.get_profile(profile_url)
                except Exception as error:
                    logging.exception(f'Error loading profile: {error}')
                    logging.info(traceback.format_exc())
                    return None
                try:
                    contact_info = self.application.get_profile_contact_info(
                        profile_url)
                except Exception as error:
                    logging.exception(f'Error loading contact info: {error}')
                    logging.info(traceback.format_exc())
                    contact_info = {}
            else:
                try:
                    # a sample profile for debugging purposes
                    profile = {
                        'lastName': 'SquarePants',
                        'firstName': 'SpongeBob',
                        'industryName': 'Professional retard',
                        'profile_id': f'DEBUG-{random.randint(0,99999)}'
                    }
                    contact_info = {
                        'email_address': '*****@*****.**',
                        'websites': ['*****@*****.**'],
                        'twitter': '@pants',
                        'phone_numbers': ['+001']
                    }
                except Exception as error:
                    logging.exception(f'Error loading profile: {error}')
                    print(f'⛔️ Error loading profile: {error}')
                    return None

            self.store_profile(profile, contact_info)

    def store_profile(self, profile, contact_info):
        def set_diff(dict, full_set):
            '''
			Calculate the difference between the full key set and the provided key set.
			Return the keys that exist in the dictionary, so the missing ones can be
			set to Nonetypes.
			'''
            ignored_keys = {key for key in full_set if key not in dict.keys()}
            return full_set.difference(ignored_keys)

        # the full set of keys a complete profile would have
        profile_keys_full = {
            'firstName', 'lastName', 'profile_id', 'headline', 'summary',
            'industryName', 'geoCountryName', 'languages'
        }

        contact_keys_full = {'birthdate', 'email_address', 'phone_numbers'}

        # if the profile is lacking keys, replace their values with Nonetypes
        profile_keys = set_diff(profile, profile_keys_full)
        contact_keys = set_diff(contact_info, contact_keys_full)

        # map profile keys to CRM-compatible column names
        column_map = {
            'firstName': 'First name',
            'lastName': 'Last name',
            'profile_id': 'Linkedin profile ID',
            'headline': 'Linkedin headline',
            'summary': 'Linkedin summary',
            'industryName': 'Industry',
            'geoCountryName': 'Location',
            'languages': 'Languages',
            'birthdate': 'Birthday',
            'email_address': 'Email address',
            'phone_numbers': 'Phone number'
        }

        # generate the profile: this is stored later
        profile_dict = {}

        # generate the profile_dict: map API resp. keys to column names, add Nonetypes
        for key in profile_keys_full:
            if key == 'languages' and key in profile_keys:
                # languages: a list of dictionaries with name:value
                try:
                    if type(profile['languages']) == list:
                        if len(profile['languages']) != 0:
                            language_string = ''
                            for dict in profile['languages']:
                                language_string += dict['name']
                                language_string += ', '

                            profile['languages'] = language_string[0:-2]
                        else:
                            profile['languages'] = ''

                except Exception as e:
                    profile['languages'] = ''
                    logging.exception(f'Error setting language: {e}')
                    logging.info(traceback.format_exc())

            if key in profile_keys:
                profile_dict[column_map[key]] = profile[key]
            else:
                profile_dict[column_map[key]] = ''

        # same as above, but for contact keys
        for key in contact_keys_full:
            if key == 'phone_numbers' and key in contact_keys:
                try:
                    for val in contact_info['phone_numbers']:
                        if len(contact_info['phone_numbers']) > 0:
                            numbers = ''
                            for dict in contact_info['phone_numbers']:
                                numbers += dict['number']
                                numbers += f' ({dict["type"]})'
                                numbers += ', '

                    contact_info['phone_numbers'] = numbers[0:-2]
                except:
                    contact_info['phone_numbers'] = ''

            if key in contact_keys:
                profile_dict[column_map[key]] = contact_info[key]
            else:
                profile_dict[column_map[key]] = ''

        logging.info(f'profile_dict generated: {profile_dict}')

        # if this contact is not a duplicate, or we are ignoring duplicates, continue: else, return
        if not self.history.add(profile_dict['Linkedin profile ID'],
                                self.ignore_duplicates):
            #sg.popup('This profile has already been added: avoiding duplicate.', font=('Helvetica', 11), title='Duplicate', keep_on_top=True)
            print(
                f'⚠️ Duplicate detected ({profile_dict["Linkedin profile ID"]})\n'
            )
            return

        if self.sheet_type == 'csv':
            field_names = profile_dict.keys()
            if not os.path.isfile(
                    self.sheet_path) and self.sheet_type == 'csv':
                with open(self.sheet_path, 'w', newline='') as csv_file:
                    csv.DictWriter(csv_file,
                                   fieldnames=field_names).writeheader()
                    print(f'Created file: {self.sheet_path}')

            with open(self.sheet_path, 'a', newline='') as csv_file:
                csv.DictWriter(csv_file,
                               fieldnames=field_names).writerow(profile_dict)

        elif self.sheet_type == 'excel':
            # convert dictionary to a dataframe
            for key, val in profile_dict.items():
                profile_dict[key] = [val]
            try:
                df = pd.DataFrame(profile_dict, columns=column_map.values())
            except Exception as error:
                logging.exception(f'Exception creating df: {error}')
                logging.info(traceback.format_exc())

            # store (file exists)
            if os.path.isfile(self.sheet_path):
                try:
                    book = load_workbook(self.sheet_path)
                    with pd.ExcelWriter(self.sheet_path,
                                        engine='openpyxl') as writer:
                        writer.book = book
                        writer.sheets = {
                            ws.title: ws
                            for ws in book.worksheets
                        }
                        for sheetname in writer.sheets:
                            df.to_excel(
                                writer,
                                sheet_name=sheetname,
                                startrow=writer.sheets[sheetname].max_row,
                                index=False,
                                header=False)

                except Exception as e:
                    logging.exception(f'Error storing profile in file: {e}')
                    logging.info(traceback.format_exc())
            else:
                try:
                    with pd.ExcelWriter(self.sheet_path,
                                        engine='openpyxl') as writer:
                        df.to_excel(writer,
                                    sheet_name='Sheet1',
                                    index=False,
                                    header=True)

                except Exception as e:
                    logging.exception(
                        f'Error storing first profile in file: {e}')
                    logging.info(traceback.format_exc())

        print(
            f'✅ Stored profile {profile_dict["Linkedin profile ID"]} to {self.sheet_path}\n'
        )
        logging.info(
            f'Stored profile {profile_dict["Linkedin profile ID"]} to {self.sheet_path}'
        )

        self.parsed += 1
        self.total_parsed += 1