class SalesforceConnector:
    def __init__(self, **kwargs):
        self.sf_version = kwargs.get('version', '29.0')
        self.sandbox = kwargs.get('sandbox', False)
        self.proxies = kwargs.get('proxies')
        self.domain = kwargs.get('domain', None)

        try:
            credentials = [elem for elem in credential_files if os.path.exists(elem)][0]    
        except:
            raise ValueError('No credentials found')
        #Load Credentials from file
        if os.path.exists(credentials):
            creds = pickle.loads(open(credentials).read())
            username = creds['username']
            password = creds['password']
            security_token = creds['security_token']
            self.session_id, self.sf_instance = SalesforceLogin(
                username=username,
                password=password,
                security_token=security_token,
                sandbox=self.sandbox,
                sf_version=self.sf_version,
                proxies=self.proxies,
                domain = self.domain)
            print 'read credentials'
        ##Read credentials from arguments
        elif 'username' in kwargs and 'password' in kwargs and 'security_token' in kwargs:
            self.auth_type = "password"
            username = kwargs['username']
            password = kwargs['password']
            security_token = kwargs['security_token']
            self.session_id, self.sf_instance = SalesforceLogin(
                username=username,
                password=password,
                security_token=security_token,
                sandbox=self.sandbox,
                sf_version=self.sf_version,
                proxies=self.proxies,
                domain = self.domain)
            self.saveLogin(username, password, security_token)
        else:
            raise TypeError(
                'You must provide login information or an instance and token'
            )
        print self.sf_instance     
        self.bulk = SalesforceBulk(sessionId= self.session_id, host = self.sf_instance)

    def saveLogin(self, username, password, security_token):
        with open(credentials, "w") as f:
            f.write(pickle.dumps(
                    dict(password = password, 
                    username = username, 
                    security_token = security_token)))

    ##Returns csv dict
    # Each row is a dictionary of column_header:row_value
    def query(self, sObject, queryString, contentType):
        job_id = self.bulk.create_query_job(sObject, contentType = contentType)
        batch_id = self.bulk.query(job_id, queryString)
        self.bulk.wait_for_batch(job_id, batch_id, timeout=120)
        self.bulk.close_job(job_id)
        print 'job closed'
        result_id = self.bulk.get_batch_result_ids(batch_id,job_id)[0]
        result = [row for row in self.bulk.get_batch_results(batch_id = batch_id, result_id = result_id, job_id=job_id,
                          parse_csv=True)]
        csv_dict = [dict(zip(result[0],row)) for row in result[1:]]
        return csv_dict
    
    def update(self, sObject, data, contentType):
        job_id = self.bulk.create_update_job(sObject, contentType='CSV')
        csv_iter = CsvDictsAdapter(iter(data))
        batch_id = self.bulk.post_bulk_batch(job_id, csv_iter)
        self.bulk.wait_for_batch(job_id, batch_id, timeout=120)
        self.bulk.close_job(job_id)
        print 'done'
        return
    def get_query_records_dict(self, db_table, soql_query):
        """Execute bulk Salesforce soql queries and return results as generator of dictionaries.

        :param db_table: Database table name
        :param soql_query: Soql queries
        :return: If success, List of result record dictionaries; Else empty list
        """
        self.bulk = SalesforceBulk(sessionId=self.session_id,
                                   host=self.instance)
        job = self.bulk.create_query_job(db_table, contentType="JSON")
        batch = self.bulk.query(job, soql_query)
        self.bulk.close_job(job)
        while not self.bulk.is_batch_done(batch):
            print("Waiting for batch query to complete")
            sleep(10)

        dict_records = []
        rec_count = 0
        print("Iterating through batch result set")
        for result in self.bulk.get_all_results_for_query_batch(batch):
            result = json.load(IteratorBytesIO(result))
            for row in result:
                rec_count += 1
                dict_records.append(row)
            print("Current fetched record count: ", rec_count)

        return dict_records
示例#3
0
 def setUp(self):
     request_patcher = mock.patch('simple_salesforce.api.requests')
     self.mockrequest = request_patcher.start()
     self.addCleanup(request_patcher.stop)
     self.sessionId = '12345'
     self.host = 'https://example.com'
     self.bulk = SalesforceBulk(self.sessionId, self.host)
示例#4
0
    def __init__(self, config_path):
        """
        Bootstrap a fetcher class
        :param config_path: Path to the configuration file to use for this instance
        """
        # Get settings
        with open(config_path, 'r') as f:
            self.settings = yaml.safe_load(f)

        # Configure the logger
        log_level = (logging.WARN, logging.DEBUG)[self.settings['debug']]
        LOG_FORMAT = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        logger = logging.getLogger("salesforce-fetcher")
        logger.setLevel(log_level)

        ch = logging.StreamHandler()
        ch.setFormatter(LOG_FORMAT)
        logger.addHandler(ch)

        logger.debug("Logging is set to DEBUG level")
        # let's not output the password
        #logger.debug("Settings: %s" % self.settings)

        self.logger = logger
        self.salesforce = Salesforce(**self.settings['salesforce']['auth'])
        self.salesforce_bulk = SalesforceBulk(**self.settings['salesforce']
                                              ['auth'],
                                              API_version='46.0')

        # Make sure output dir is created
        output_directory = self.settings['output_dir']
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)
示例#5
0
 def __init__(self, connector_param):
     self.connector_param = connector_param
     self.instance_url = 'https://' + connector_param.url_prefix + 'salesforce.com'
     self.token_url = 'https://' + connector_param.url_prefix + 'salesforce.com/services/oauth2/token'
     self.access_token = None
     self.access_token = self.get_token()
     self.bulk = SalesforceBulk(sessionId=self.access_token, host=urlparse(self.instance_url).hostname)
示例#6
0
 def __init__(self):
     if BulkHelper.__instance != None:
         raise Exception("BulkHelper class is a singleton!")
     else:
         BulkHelper.__instance = self
         self.__bulk = SalesforceBulk(username=Config.USERNAME,
                                      password=Config.PASSWORD,
                                      security_token=Config.SECURITY_TOKEN,
                                      sandbox=Config.IS_SANDBOX,
                                      API_version=Config.API_VERSION)
示例#7
0
    def test_upload(self):
        bulk = SalesforceBulk(self.sessionId, self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_insert_job("Contact", contentType=self.contentType)
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_ids = []
        data = [
            {
                'FirstName': 'BulkTestFirst%s' % i,
                'LastName': 'BulkLastName',
                'Phone': '555-555-5555',
            } for i in range(50)
        ]
        for i in range(2):
            content = self.generate_content(data)
            batch_id = bulk.post_batch(job_id, content)
            self.assertIsNotNone(re.match("\w+", batch_id))
            batch_ids.append(batch_id)

        bulk.close_job(job_id)

        for batch_id in batch_ids:
            bulk.wait_for_batch(job_id, batch_id, timeout=120)

        for batch_id in batch_ids:
            results = bulk.get_batch_results(batch_id)

            print(results)
            self.assertTrue(len(results) > 0)
            self.assertTrue(isinstance(results, list))
            self.assertTrue(isinstance(results[0], UploadResult))
            self.assertEqual(len(results), 50)
    def test_query_pk_chunk(self):
        bulk = SalesforceBulk(self.sessionId, self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_query_job("Contact", contentType=self.contentType, pk_chunking=True)
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        query = "Select Id,Name,Email from Contact"
        batch_id = bulk.query(job_id, query)
        self.assertIsNotNone(re.match("\w+", batch_id))

        try:
            i = 0
            while not bulk.is_batch_done(batch_id):
                print("Job not done yet...")
                print(bulk.batch_status(batch_id))
                time.sleep(2)
                i += 1
                if i == 20:
                    raise Exception
        except BulkBatchFailed as e:
            if e.state != bulk_states.NOT_PROCESSED:
                raise

        batches = bulk.get_batch_list(job_id)
        print (batches)
        batch_ids = [x['id'] for x in batches if x['state'] != bulk_states.NOT_PROCESSED]
        requests = [bulk.get_query_batch_request(x, job_id) for x in batch_ids]
        print (requests)
        for request in requests:
            self.assertTrue(request.startswith(query))

        all_results = []

        i = 0
        while not all(bulk.is_batch_done(j, job_id) for j in batch_ids):
            print("Job not done yet...")
            print(bulk.batch_status(batch_id, job_id))
            time.sleep(2)
            i += 1
            if i == 20:
                raise Exception

        for batch_id in batch_ids:
            results = bulk.get_all_results_for_query_batch(batch_id, job_id)
            for result in results:
                all_results.extend(self.parse_results(result))

            self.assertTrue(len(all_results) > 0)
            self.assertEqual(
                sorted(all_results[0].keys()),
                ['Email', 'Id', 'Name']
            )
示例#9
0
    def setUp(self):
        login = salesforce_oauth_request.login(
            username=USERNAME,
            password=PASSWORD,
            token=SECURITY_TOKEN,
            client_id=CONSUMER_KEY,
            client_secret=CONSUMER_SECRET,
            cache_session=False,
            sandbox=True,
        )

        self.bulk = SalesforceBulk(login['access_token'], login['endpoint'])
        self.jobs = []
示例#10
0
    def test_raw_query(self):
        bulk = SalesforceBulk(self.sessionId, self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_query_job("Contact")
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_id = bulk.query(job_id, "Select Id,Name,Email from Contact Limit 1000")
        self.assertIsNotNone(re.match("\w+", batch_id))

        while not bulk.is_batch_done(job_id, batch_id):
            print "Job not done yet..."
            print bulk.batch_status(job_id, batch_id)
            time.sleep(2)

        self.results = ""
        def save_results(tfile, **kwargs):
            print "in save results"
            self.results = tfile.read()

        flag = bulk.get_batch_results(job_id, batch_id, callback = save_results)
        self.assertTrue(flag)
        self.assertTrue(len(self.results) > 0)
        self.assertIn('"', self.results)
示例#11
0
def bulkUpdate(sObject):

    sfBulk = SalesforceBulk(username=username,
                            password=password,
                            security_token=security_token)
    job = sfBulk.create_insert_job(sObject,
                                   contentType='CSV',
                                   concurrency='Parallel')

    dir = "c:/kenandy/python/stageCSV/"
    stageCSV = dir + sObject + '.csv'
    print(stageCSV)

    with open(stageCSV) as csvfile:
        reader = csv.DictReader(csvfile)
        #print (reader.fieldnames)
        rows = []

        for row in reader:
            print("row****", dict(row))
            #print(row['Id'], row['Name'])
            # print(row['Id'], row['Name'])
            rows.append(dict(row))
            #print("rows****", rows)

        csv_iter = CsvDictsAdapter(iter(rows))
        #print("csv_iter**** ", csv_iter)
        print("rows****", rows)
        batch = sfBulk.post_batch(job, csv_iter)
        sfBulk.wait_for_batch(job, batch)
        sfBulk.close_job(job)
        print("Done. Data Uploaded.")
示例#12
0
def sfBulkUpdate(namespace,sObject):

    myObject =sObject
    if len(namespace) > 0:
        myObject = namespace.upper() + '__' + sObject

    stageCSV = stageCSVDir + myObject  + '_stg.csv'
    print(stageCSV)
    #print (sObject)

    sfBulk = SalesforceBulk(username=username_loc, password=password_loc, security_token=security_token_loc)
    job = sfBulk.create_insert_job(myObject, contentType='CSV', concurrency='Parallel')


    with open(stageCSV) as csvfile:
        reader = csv.DictReader(csvfile)
        #print (reader.fieldnames)
        rows = []

        for row in reader:
            print("row****", dict(row))
            rows.append(dict(row))

        csv_iter = CsvDictsAdapter(iter(rows))
        print("rows****", rows)
        batch = sfBulk.post_batch(job, csv_iter)
        sfBulk.wait_for_batch(job, batch)
        sfBulk.close_job(job)
        print("Done. Data Uploaded.")
示例#13
0
    def test_csv_query(self):
        bulk = SalesforceBulk(self.sessionId, self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_query_job("Account")
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_id = bulk.query(job_id, "Select Id,Name,Description from Account Limit 10000")
        self.assertIsNotNone(re.match("\w+", batch_id))
        bulk.wait_for_batch(job_id, batch_id, timeout=120)

        self.results = None
        def save_results1(rows, **kwargs):
            self.results = rows

        flag = bulk.get_batch_results(job_id, batch_id, callback = save_results1, parse_csv=True)
        self.assertTrue(flag)
        results = self.results
        self.assertTrue(len(results) > 0)
        self.assertTrue(isinstance(results,list))
        self.assertEqual(results[0], ['Id','Name','Description'])
        self.assertTrue(len(results) > 3)

        self.results = None
        self.callback_count = 0
        def save_results2(rows, **kwargs):
            self.results = rows
            print rows
            self.callback_count += 1

        batch = len(results) / 3
        self.callback_count = 0
        flag = bulk.get_batch_results(job_id, batch_id, callback = save_results2, parse_csv=True, batch_size=batch)
        self.assertTrue(self.callback_count >= 3)
    def test_upload(self):
        bulk = SalesforceBulk(self.sessionId, self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_insert_job("Contact", contentType=self.contentType)
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_ids = []
        data = [
            {
                'FirstName': 'BulkTestFirst%s' % i,
                'LastName': 'BulkLastName',
                'Phone': '555-555-5555',
            } for i in range(50)
        ]
        for i in range(2):
            content = self.generate_content(data)
            batch_id = bulk.post_batch(job_id, content)
            self.assertIsNotNone(re.match("\w+", batch_id))
            batch_ids.append(batch_id)

        bulk.close_job(job_id)

        for batch_id in batch_ids:
            bulk.wait_for_batch(job_id, batch_id, timeout=120)

        for batch_id in batch_ids:
            results = bulk.get_batch_results(batch_id)

            print(results)
            self.assertTrue(len(results) > 0)
            self.assertTrue(isinstance(results, list))
            self.assertTrue(isinstance(results[0], UploadResult))
            self.assertEqual(len(results), 50)
def login():
    global bulk
    logging.info('logging in...')
    # domain passed to SalesforceBulk should be 'test' or 'login' or 'something.my'
    bulk = SalesforceBulk(username=os.environ['ORG_USERNAME'], password=os.environ['ORG_PASSWORD'],
                          security_token=os.environ['ORG_SECURITY_TOKEN'], domain=os.environ['ORG_DOMAIN'])
    logging.info('login successful !')
 def setUp(self):
     request_patcher = mock.patch('simple_salesforce.api.requests')
     self.mockrequest = request_patcher.start()
     self.addCleanup(request_patcher.stop)
     self.sessionId = '12345'
     self.host = 'https://example.com'
     self.bulk = SalesforceBulk(self.sessionId, self.host)
示例#17
0
def _init_bulk(sf, org_config):
    from salesforce_bulk import SalesforceBulk

    return SalesforceBulk(
        host=org_config.instance_url.replace("https://", "").rstrip("/"),
        sessionId=org_config.access_token,
        API_version=sf.sf_version,
    )
示例#18
0
    def test_query(self):
        bulk = SalesforceBulk(self.sessionId, self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_query_job("Contact", contentType=self.contentType)
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_id = bulk.query(job_id, "Select Id,Name,Email from Contact Limit 1000")
        self.assertIsNotNone(re.match("\w+", batch_id))

        while not bulk.is_batch_done(batch_id):
            print("Job not done yet...")
            print(bulk.batch_status(batch_id))
            time.sleep(2)

        all_results = []
        results = bulk.get_all_results_for_query_batch(batch_id)
        for result in results:
            all_results.extend(self.parse_results(result))

        self.assertTrue(len(all_results) > 0)
        self.assertEqual(
            sorted(all_results[0].keys()),
            ['Email', 'Id', 'Name']
        )
示例#19
0
 def _init_bulk(self):
     version = self.api_version or self.project_config.project__package__api_version
     if not version:
         raise ConfigError("Cannot find Salesforce version")
     return SalesforceBulk(
         host=self.org_config.instance_url.replace("https://", "").rstrip("/"),
         sessionId=self.org_config.access_token,
         API_version=version,
     )
示例#20
0
    def request(self, data=()):
        # use csv iterator
        csv_iter = CsvDictsAdapter(iter(data))

        bulk = SalesforceBulk(username=self.username,
                              password=self.password,
                              organizationId=self.organizationId)

        job = bulk.create_insert_job('SamanageCMDB__AgentPost__c',
                                     contentType='CSV')
        batch = bulk.post_batch(job, csv_iter)
        bulk.wait_for_batch(job, batch)
        bulk.close_job(job)

        while not bulk.is_batch_done(batch):
            sleep(10)
    def test_query(self):
        bulk = SalesforceBulk(self.sessionId, self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_query_job("Contact", contentType=self.contentType)
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_id = bulk.query(job_id, "Select Id,Name,Email from Contact Limit 1000")
        self.assertIsNotNone(re.match("\w+", batch_id))

        while not bulk.is_batch_done(batch_id):
            print("Job not done yet...")
            print(bulk.batch_status(batch_id))
            time.sleep(2)

        all_results = []
        results = bulk.get_all_results_for_query_batch(batch_id)
        for result in results:
            all_results.extend(self.parse_results(result))

        self.assertTrue(len(all_results) > 0)
        self.assertEqual(
            sorted(all_results[0].keys()),
            ['Email', 'Id', 'Name']
        )
示例#22
0
    def test_csv_upload(self):
        bulk = SalesforceBulk(SALESFORCE_API_VERSION, self.sessionId,
                              self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_insert_job("Contact")
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_ids = []
        content = open("example.csv").read()
        for i in range(5):
            batch_id = bulk.query(job_id, content)
            self.assertIsNotNone(re.match("\w+", batch_id))
            batch_ids.append(batch_id)

        for batch_id in batch_ids:
            bulk.wait_for_batch(job_id, batch_id, timeout=120)

        self.results = None

        def save_results1(rows, failed, remaining):
            self.results = rows

        for batch_id in batch_ids:
            flag = bulk.get_upload_results(job_id,
                                           batch_id,
                                           callback=save_results1)
            self.assertTrue(flag)
            results = self.results
            self.assertTrue(len(results) > 0)
            self.assertTrue(isinstance(results, list))
            self.assertEqual(results[0],
                             UploadResult('Id', 'Success', 'Created', 'Error'))
            self.assertEqual(len(results), 3)

        self.results = None
        self.callback_count = 0

        def save_results2(rows, failed, remaining):
            self.results = rows
            self.callback_count += 1

        batch = len(results) / 3
        self.callback_count = 0
        flag = bulk.get_upload_results(job_id,
                                       batch_id,
                                       callback=save_results2,
                                       batch_size=batch)
        self.assertTrue(self.callback_count >= 3)
    def setUp(self):
        login = salesforce_oauth_request.login(
            username=USERNAME,
            password=PASSWORD,
            token=SECURITY_TOKEN,
            client_id=CONSUMER_KEY,
            client_secret=CONSUMER_SECRET,
            cache_session=False,
            sandbox=True,
        )

        self.bulk = SalesforceBulk(login['access_token'], login['endpoint'])
        self.jobs = []
    def setUpClass(cls):
        username = os.environ.get('SALESFORCE_BULK_TEST_USERNAME')
        password = os.environ.get('SALESFORCE_BULK_TEST_PASSWORD')
        security_token = os.environ.get('SALESFORCE_BULK_TEST_SECURITY_TOKEN')
        sandbox = os.environ.get('SALESFORCE_BULK_TEST_SANDBOX')

        if not all(x for x in [username, password, security_token]):
            raise unittest.SkipTest('Missing Configuration for logged in tests')

        sessionId, endpoint = SalesforceBulk.login_to_salesforce(
            username, password, sandbox, security_token)

        cls.endpoint = endpoint
        cls.sessionId = sessionId
示例#25
0
    def setUpClass(cls):
        username = os.environ.get('SALESFORCE_BULK_TEST_USERNAME')
        password = os.environ.get('SALESFORCE_BULK_TEST_PASSWORD')
        security_token = os.environ.get('SALESFORCE_BULK_TEST_SECURITY_TOKEN')
        domain = os.environ.get('SALESFORCE_BULK_TEST_DOMAIN')

        if not all(x for x in [username, password, security_token]):
            raise unittest.SkipTest('Missing Configuration for logged in tests')

        sessionId, endpoint = SalesforceBulk.login_to_salesforce(
            username, password, domain, security_token)

        cls.endpoint = endpoint
        cls.sessionId = sessionId
示例#26
0
 def __init__(self, username, password, security_token, sandbox=True):
     """
     :param username:
     :type username: str
     :param password:
     :type password: str
     :param security_token:
     :type security_token: str
     :param sandbox: Whether the Salesforce Instance is Production or Sandbox. Default value is False (Production).
     :type sandbox: bool
     """
     # Logging setup
     self.log = logging.getLogger(__name__)
     self.log.info('Signing into Salesforce.')
     try:
         self.bulk = SalesforceBulk(username=username,
                                    password=password,
                                    security_token=security_token,
                                    sandbox=sandbox)
         self.log.info(
             f'Successfully connected to Salesforce as "{username}".')
     except Exception as auth_err:
         self.log.exception(f'Failed to connect to Salesforce: {auth_err}')
         raise
    def __init__(self, **kwargs):
        self.sf_version = kwargs.get('version', '29.0')
        self.sandbox = kwargs.get('sandbox', False)
        self.proxies = kwargs.get('proxies')
        self.domain = kwargs.get('domain', None)

        try:
            credentials = [elem for elem in credential_files if os.path.exists(elem)][0]    
        except:
            raise ValueError('No credentials found')
        #Load Credentials from file
        if os.path.exists(credentials):
            creds = pickle.loads(open(credentials).read())
            username = creds['username']
            password = creds['password']
            security_token = creds['security_token']
            self.session_id, self.sf_instance = SalesforceLogin(
                username=username,
                password=password,
                security_token=security_token,
                sandbox=self.sandbox,
                sf_version=self.sf_version,
                proxies=self.proxies,
                domain = self.domain)
            print 'read credentials'
        ##Read credentials from arguments
        elif 'username' in kwargs and 'password' in kwargs and 'security_token' in kwargs:
            self.auth_type = "password"
            username = kwargs['username']
            password = kwargs['password']
            security_token = kwargs['security_token']
            self.session_id, self.sf_instance = SalesforceLogin(
                username=username,
                password=password,
                security_token=security_token,
                sandbox=self.sandbox,
                sf_version=self.sf_version,
                proxies=self.proxies,
                domain = self.domain)
            self.saveLogin(username, password, security_token)
        else:
            raise TypeError(
                'You must provide login information or an instance and token'
            )
        print self.sf_instance     
        self.bulk = SalesforceBulk(sessionId= self.session_id, host = self.sf_instance)
示例#28
0
def upload_table(sessionId, hostname, tablename, connection_string):
    schema, table = tablename.split('.')

    log.debug('%s, %s, %s, %s, %s, %s', sessionId, hostname, tablename, connection_string, schema, table)

    bulk = SalesforceBulk(
        sessionId=sessionId,
        host=hostname)

    engine = create_engine(connection_string)

    result = engine.execute(text('select column_name from information_schema.columns where table_name = :table and table_schema = :schema'), {'table': table, 'schema': schema})
    exclude = ['sfid', 'id', 'systemmodstamp', 'isdeleted']
    columns = [x[0] for x in result if not x[0].startswith('_') and x[0].lower() not in exclude]

    log.debug('columns: %s', columns)
    column_select = ','.join('"%s"' % x for x in columns)

    result = engine.execute('select %s from %s' % (column_select, tablename))

    dict_iter = (dict(zip(columns, row_modifier(row))) for row in result)
    dict_iter = list(dict_iter)
    log.debug('Sending rows: %s', [x['name'] for x in dict_iter])
    csv_iter = CsvDictsAdapter(iter(dict_iter))

    job = bulk.create_insert_job(table.capitalize(), contentType='CSV')
    batch = bulk.post_bulk_batch(job, csv_iter)

    bulk.wait_for_batch(job, batch)

    bulk_result = []

    def save_results(rows, failed, remaining):
        bulk_result[:] = [rows, failed, remaining]

    flag = bulk.get_upload_results(job, batch, callback=save_results)

    bulk.close_job(job)

    log.debug('results: %s, %s', flag, bulk_result)

    return bulk_result
    def test_csv_upload(self):
        bulk = SalesforceBulk(self.sessionId, self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_insert_job("Contact")
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_ids = []
        content = open("example.csv").read()
        for i in range(5):
            batch_id = bulk.query(job_id, content)
            self.assertIsNotNone(re.match("\w+", batch_id))
            batch_ids.append(batch_id)

        for batch_id in batch_ids:
            bulk.wait_for_batch(job_id, batch_id, timeout=120)

        self.results = None
        def save_results1(rows, failed, remaining):
            self.results = rows

        for batch_id in batch_ids:
            flag = bulk.get_upload_results(job_id, batch_id, callback = save_results1)
            self.assertTrue(flag)
            results = self.results
            self.assertTrue(len(results) > 0)
            self.assertTrue(isinstance(results,list))
            self.assertEqual(results[0], UploadResult('Id','Success','Created','Error'))
            self.assertEqual(len(results), 3)

        self.results = None
        self.callback_count = 0
        def save_results2(rows, failed, remaining):
            self.results = rows
            self.callback_count += 1

        batch = len(results) / 3
        self.callback_count = 0
        flag = bulk.get_upload_results(job_id, batch_id, callback = save_results2, batch_size=batch)
        self.assertTrue(self.callback_count >= 3)
示例#30
0
 def _init_bulk(self):
     return SalesforceBulk(
         host=self.org_config.instance_url.replace("https://",
                                                   "").rstrip("/"),
         sessionId=self.org_config.access_token,
     )
def sfBulk_Login(username, password, security_token):

    sfBulk = SalesforceBulk(username=username,
                            password=password,
                            security_token=security_token)
    return sfBulk
示例#32
0
import csv
from salesforce_bulk import SalesforceBulk
from salesforce_bulk import CsvDictsAdapter


username = '******'
password = '******'
security_token = 'HxK2ciSHbsjN5PvAE8psL9w9F'


bulk = SalesforceBulk(username=username, password=password, security_token=security_token)
job = bulk.create_insert_job("account", contentType='CSV', concurrency='Parallel')


rootDir = "c:/python/kenandy/stageCSV/"
objectName = "Account"
stageCSV = rootDir + objectName + '.csv'
print (stageCSV)
with open(stageCSV) as csvfile:

    reader = csv.DictReader(stageCSV)
    account = [dict(Name="Account%d" % idx) for idx in xrange(5)]
    #disbursals = []
    #for row in reader:
    #    disbursals.append(row)
    #print (disbursals)
    print (account)
    csv_iter = CsvDictsAdapter(iter(account))
    
    #csv_iter = CsvDictsAdapter(iter(disbursals))
    batch = bulk.post_batch(job, csv_iter)
import json
from salesforce_bulk import SalesforceBulk
from salesforce_bulk.util import IteratorBytesIO
from time import sleep
from salesforce_bulk import CsvDictsAdapter
import pandas as pd
import config as cfg
#Authentication

bulk = SalesforceBulk(username=cfg.USERNAME,
                      password=cfg.PASSWORD,
                      security_token=cfg.SECURITY_KEY,
                      sandbox=True)

#Source CSV File path for Account
input_file = "/home/baadmin/NCT_ETL/input_files/pg_extract_prd/staging2_payment.csv"

#Target SFDC Object name
target_obj = "cpm__Payment__c"

# Mapping of Input csv Fields to SalesForce Fields

sf_mapping = {
    'paymentkey': 'Payment_Key__c',
    'accountkey': 'Account_key__c',
    'contactkey': 'Contact_Key__c',
    'installmentkey': 'Installment_Key__c',
    'mandatekey': 'Mandate_Key__c',
    'paymentprofilekey': 'Payment_Profile_Key__c',
    'installment': 'cpm__Installment__c',
    'paymentprofile': 'cpm__Payment_Profile__c',
示例#34
0
    "USERNAME": os.getenv("SALESFORCE_USERNAME"),
    "PASSWORD": os.getenv("SALESFORCE_PASSWORD"),
    "HOST": os.getenv("SALESFORCE_HOST"),
    "TOKEN": os.getenv("SALESFORCE_TOKEN"),
    "CLIENT_ID": os.getenv("SALESFORCE_CLIENT_ID"),
    "CLIENT_SECRET": os.getenv("SALESFORCE_CLIENT_SECRET"),
}

USER = SALESFORCE["USERNAME"]
PASS = SALESFORCE["PASSWORD"]
TOKEN = SALESFORCE["TOKEN"]
HOST = SALESFORCE["HOST"]

sf = Salesforce(username=USER, password=PASS, security_token=TOKEN)

bulk = SalesforceBulk(sessionId=sf.session_id, host=HOST)

job = bulk.create_query_job("Contact", contentType="CSV")

batch = bulk.query(job, query)
while not bulk.is_batch_done(job, batch):
    sleep(3)
bulk.close_job(job)

rows = bulk.get_batch_result_iter(job, batch, parse_csv=True)
bulk_email = list(rows)
email_list = []
emails_sf = [x[COMBINED_EMAIL_FIELD] for x in bulk_email]
print ("The following email addresses appear in Stripe but not Salesforce: \n")
for field in emails_sf:
    for email in field.split(","):
    "USERNAME": os.getenv('SALESFORCE_USERNAME'),
    "PASSWORD": os.getenv('SALESFORCE_PASSWORD'),
    "HOST": os.getenv("SALESFORCE_HOST"),
    "TOKEN": os.getenv("SALESFORCE_TOKEN"),
    "CLIENT_ID": os.getenv("SALESFORCE_CLIENT_ID"),
    "CLIENT_SECRET": os.getenv("SALESFORCE_CLIENT_SECRET"),
}

USER = SALESFORCE['USERNAME']
PASS = SALESFORCE['PASSWORD']
TOKEN = SALESFORCE['TOKEN']
HOST = SALESFORCE['HOST']

sf = Salesforce(username=USER, password=PASS, security_token=TOKEN)

bulk = SalesforceBulk(sessionId=sf.session_id, host=HOST)

job = bulk.create_query_job("Contact", contentType='CSV')

batch = bulk.query(job, query)
while not bulk.is_batch_done(job, batch):
    sleep(3)
bulk.close_job(job)

rows = bulk.get_batch_result_iter(job, batch, parse_csv=True)
bulk_email = list(rows)
email_list = []
emails_sf = [x[COMBINED_EMAIL_FIELD] for x in bulk_email]
print ("The following email addresses appear in Stripe but not Salesforce: \n")
for field in emails_sf:
    for email in field.split(','):
示例#36
0
def sf_data(query):
    """
    Get opportunity data using supplied query.
    Get account data.

    Return both as dataframes.

    """

    USER = SALESFORCE['USERNAME']
    PASS = SALESFORCE['PASSWORD']
    TOKEN = SALESFORCE['TOKEN']
    HOST = SALESFORCE['HOST']

    sf = Salesforce(username=USER, password=PASS, security_token=TOKEN)

    bulk = SalesforceBulk(sessionId=sf.session_id, host=HOST)

    print "Creating Opportunity job..."
    job = bulk.create_query_job("Opportunity", contentType='CSV')
    print "Issuing query..."

    batch = bulk.query(job, query)
    while not bulk.is_batch_done(job, batch):
        print "waiting for query to complete..."
        sleep(3)
    bulk.close_job(job)

    rows = bulk.get_batch_result_iter(job, batch, parse_csv=True)
    all = list(rows)

    opps = DataFrame.from_dict(all)

    job = bulk.create_query_job("Account", contentType='CSV')
    print "Creating Account job..."

    batch = bulk.query(job,
            "SELECT Id, Website, Text_For_Donor_Wall__c FROM Account")
    print "Issuing query..."
    while not bulk.is_batch_done(job, batch):
        print "waiting for query to complete..."
        sleep(3)
    bulk.close_job(job)

    rows = bulk.get_batch_result_iter(job, batch, parse_csv=True)

    accts = DataFrame.from_dict(list(rows))
    accts.rename(columns={'Id': 'AccountId'}, inplace=True)

    return opps, accts
示例#37
0
class SalesforceFetcher(object):
    """
    Class that encapsulates all the fetching logic for SalesForce.
    """
    def __init__(self, config_path):
        """
        Bootstrap a fetcher class
        :param config_path: Path to the configuration file to use for this instance
        """
        # Get settings
        with open(config_path, 'r') as f:
            self.settings = yaml.safe_load(f)

        # Configure the logger
        log_level = (logging.WARN, logging.DEBUG)[self.settings['debug']]
        LOG_FORMAT = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        logger = logging.getLogger("salesforce-fetcher")
        logger.setLevel(log_level)

        ch = logging.StreamHandler()
        ch.setFormatter(LOG_FORMAT)
        logger.addHandler(ch)

        logger.debug("Logging is set to DEBUG level")
        # let's not output the password
        #logger.debug("Settings: %s" % self.settings)

        self.logger = logger
        self.salesforce = Salesforce(**self.settings['salesforce']['auth'])
        self.salesforce_bulk = SalesforceBulk(**self.settings['salesforce']
                                              ['auth'],
                                              API_version='46.0')

        # Make sure output dir is created
        output_directory = self.settings['output_dir']
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

    def fetch_all(self, fetch_only, airflow_date, fetch_method, days_lookback):
        """
        Fetch any reports or queries, writing them out as files in the output_dir
        """
        queries = self.load_queries()
        for name, query in queries.items():
            if fetch_only and name != fetch_only:
                self.logger.debug(
                    "'--fetch-only %s' specified. Skipping fetch of %s" %
                    (fetch_only, name))
                continue
            #if name == 'contacts' or name == 'opportunity':
            if fetch_method and fetch_method == 'bulk':
                self.fetch_soql_query_bulk(name, query, airflow_date)
            else:
                self.fetch_soql_query(name, query, airflow_date)

        reports = self.settings['salesforce']['reports']
        for name, report_url in reports.items():
            if fetch_only and name != fetch_only:
                self.logger.debug(
                    "'--fetch-only %s' specified. Skipping fetch of %s" %
                    (fetch_only, name))
                continue
            self.fetch_report(name, report_url, airflow_date)

        if fetch_only:
            if fetch_only == 'contact_deletes':
                self.fetch_contact_deletes(days=days_lookback,
                                           airflow_date=airflow_date)
        else:
            self.fetch_contact_deletes(days=days_lookback,
                                       airflow_date=airflow_date)

        self.logger.info("Job Completed")

    def fetch_contact_deletes(self, days=29, airflow_date=None):
        """
        Fetches all deletes from Contact for X days
        :param days: Fetch deletes from this number of days to present
        :return:
        """
        path = self.create_output_path('contact_deletes',
                                       airflow_date=airflow_date)
        end = datetime.datetime.now(
            pytz.UTC)  # we need to use UTC as salesforce API requires this!
        records = self.salesforce.Contact.deleted(
            end - datetime.timedelta(days=days), end)
        data_list = records['deletedRecords']
        if len(data_list) > 0:
            fieldnames = list(data_list[0].keys())
            with open(path, 'w') as f:
                writer = DictWriter(f,
                                    fieldnames=fieldnames,
                                    quoting=QUOTE_ALL)
                writer.writeheader()
                for delta_record in data_list:
                    writer.writerow(delta_record)

    def fetch_report(self, name, report_url, airflow_date=None):
        """
        Fetches a single prebuilt Salesforce report via an HTTP request
        :param name: Name of the report to fetch
        :param report_url: Base URL for the report
        :return:
        """

        self.logger.info("Fetching report - %s" % name)
        sf_host = self.settings['salesforce']['host']
        url = "%s%s?view=d&snip&export=1&enc=UTF-8&xf=csv" % (sf_host,
                                                              report_url)

        resp = requests.get(url,
                            headers=self.salesforce.headers,
                            cookies={'sid': self.salesforce.session_id},
                            stream=True)

        path = self.create_output_path(name, airflow_date=airflow_date)
        with open(path, 'w+') as f:
            # Write the full contents
            f.write(resp.text.replace("\"", ""))

            # Remove the Salesforce footer (last 7 lines)
            f.seek(0, os.SEEK_END)
            pos = f.tell() - 1

            count = 0
            while pos > 0 and count < 7:
                pos -= 1
                f.seek(pos, os.SEEK_SET)
                if f.read(1) == "\n":
                    count += 1

            # So long as we're not at the start of the file, delete all the characters ahead of this position
            if pos > 0:
                # preserve the last newline then truncate the file
                pos += 1
                f.seek(pos, os.SEEK_SET)
                f.truncate()

    def fetch_soql_query_bulk(self, name, query, airflow_date=None):
        self.logger.info("BULK Executing %s" % name)
        self.logger.info("BULK Query is: %s" % query)
        if name == 'contacts' or name == 'contact_updates':
            table_name = 'Contact'
        elif name == 'opportunity' or name == 'opportunity_updates':
            table_name = 'Opportunity'
        job = self.salesforce_bulk.create_query_job(table_name,
                                                    contentType='CSV',
                                                    pk_chunking=True,
                                                    concurrency='Parallel')
        self.logger.info("job: %s" % job)
        batch = self.salesforce_bulk.query(job, query)
        #        job = '7504O00000LUxuCQAT'
        #        batch = '7514O00000TvapeQAB'
        self.logger.info("Bulk batch created: %s" % batch)

        while True:
            batch_state = self.salesforce_bulk.batch_state(
                batch, job_id=job, reload=True).lower()
            if batch_state == 'notprocessed':
                self.logger.info("master batch is done")
                break
            elif batch_state == 'aborted' or batch_state == 'failed':
                self.logger.error("master batch failed")
                self.logger.error(
                    self.salesforce_bulk.batch_status(batch_id=batch,
                                                      job_id=job,
                                                      reload=True))
                raise Exception("master batch failed")
            self.logger.info("waiting for batch to be done. status=%s" %
                             batch_state)
            time.sleep(10)

        count = 0
        downloaded = {}

        pool = mp.Pool(5)

        while True:
            stats = {}
            batch_count = 0
            all_batches = self.salesforce_bulk.get_batch_list(job)
            for batch_info in all_batches:
                batch_count += 1

                batch_state = batch_info['state'].lower()
                if batch_state in stats:
                    stats[batch_state] += 1
                else:
                    stats[batch_state] = 1

                if batch_info['id'] == batch:
                    #self.logger.debug("skipping the master batch id")
                    continue
                elif batch_info['id'] in downloaded:
                    #self.logger.debug("batch %s already downloaded" % batch_info['id'])
                    continue

                if batch_state == 'completed':
                    self.logger.debug(
                        "batch %s (%s of %s)" %
                        (batch_info['id'], batch_count, len(all_batches)))

                    for result_id in self.salesforce_bulk.get_query_batch_result_ids(
                            batch_info['id'], job_id=job):
                        self.logger.debug("result_id: %s" % result_id)
                        path = self.create_output_path(
                            name, result_id, airflow_date=airflow_date)
                        pool.apply_async(
                            get_and_write_bulk_results,
                            args=(batch_info['id'], result_id, job,
                                  self.salesforce_bulk.endpoint,
                                  self.salesforce_bulk.headers(), path))

                    downloaded[batch_info['id']] = 1

                elif batch_state == 'failed':
                    downloaded[batch_info['id']] = 1
                    self.logger.error("batch %s failed!" % batch_info['id'])
                    self.logger.error(
                        self.salesforce_bulk.batch_status(
                            batch_id=batch_info['id'], job_id=job,
                            reload=True))

            if 'completed' in stats and stats['completed'] + 1 == batch_count:
                self.logger.info("all batches retrieved")
                break
            elif 'failed' in stats and stats['failed'] + 1 == batch_count:
                self.logger.error("NO batches retrieved")
                self.logger.error(
                    self.salesforce_bulk.batch_status(batch_id=batch,
                                                      job_id=job,
                                                      reload=True))
                raise Exception("NO batches retrieved")
            elif 'failed' in stats and stats['failed'] + stats[
                    'completed'] == batch_count:
                self.logger.warning("all batches WITH SOME FAILURES")
                break
            else:
                self.logger.info(stats)
                time.sleep(5)

        try:
            self.salesforce_bulk.close_job(job)
        except:
            pass
        pool.close()
        pool.join()

    def fetch_soql_query(self, name, query, airflow_date=None):
        self.logger.info("Executing %s" % name)
        self.logger.info("Query is: %s" % query)
        path = self.create_output_path(name, airflow_date=airflow_date)
        result = self.salesforce.query(query)
        self.logger.info("First result set received")
        batch = 0
        count = 0
        if result['records']:
            fieldnames = list(result['records'][0].keys())
            fieldnames.pop(0)  # get rid of attributes
            with open(path, 'w') as f:
                writer = DictWriter(f,
                                    fieldnames=fieldnames,
                                    quoting=QUOTE_ALL)
                writer.writeheader()

                while True:
                    batch += 1
                    for row in result['records']:
                        # each row has a strange attributes key we don't want
                        row.pop('attributes', None)
                        out_dict = {}
                        for key, value in row.items():
                            if type(value) is collections.OrderedDict:
                                out_dict[key] = json.dumps(value)
                            else:
                                out_dict[key] = value
                        writer.writerow(out_dict)
                        count += 1
                        if count % 100000 == 0:
                            self.logger.debug("%s rows fetched" % count)

                    # fetch next batch if we're not done else break out of loop
                    if not result['done']:
                        result = self.salesforce.query_more(
                            result['nextRecordsUrl'], True)
                    else:
                        break

        else:
            self.logger.warn("No results returned for %s" % name)

    def create_output_path(self, name, filename='output', airflow_date=None):
        output_dir = self.settings['output_dir']
        if airflow_date:
            date = airflow_date
        else:
            date = time.strftime("%Y-%m-%d")
        child_dir = os.path.join(output_dir, name, date)
        if not os.path.exists(child_dir):
            os.makedirs(child_dir)

        filename = filename + ".csv"
        file_path = os.path.join(child_dir, filename)
        self.logger.info("Writing output to %s" % file_path)
        return file_path

    def create_custom_query(self,
                            table_name='Contact',
                            dir='/usr/local/salesforce_fetcher/queries',
                            updates_only=False):
        """
        The intention is to have Travis upload the "contact_fields.yaml" file
        to a bucket where it can be pulled down dynamically by this script
        and others (instead of having to rebuild the image on each change)
        """

        fields_file_name = table_name.lower() + '_fields.yaml'
        fields_file = os.path.join(dir, fields_file_name)
        if not os.path.exists(fields_file):
            return
        with open(fields_file, 'r') as stream:
            columns = yaml.safe_load(stream)

        query = "SELECT "
        for field in columns['fields']:
            query += next(iter(field)) + ', '

        query = query[:-2] + " FROM " + table_name
        if updates_only:
            query += " WHERE LastModifiedDate >= LAST_N_DAYS:3"

        return query

    def load_queries(self):
        """
        load queries from an external directory
        :return: a dict containing all the SOQL queries to be executed
        """
        queries = {}

        query_dir = self.settings['salesforce']['query_dir']
        for file in os.listdir(query_dir):
            if file.endswith(".soql"):
                name, ext = os.path.splitext(file)
                query_file = os.path.join(query_dir, file)
                with open(query_file, 'r') as f:
                    queries[name] = f.read().strip().replace('\n', ' ')

        # explicitly add the non-file queries
        queries['contacts'] = self.create_custom_query(table_name='Contact',
                                                       dir=query_dir)
        queries['contact_updates'] = self.create_custom_query(
            table_name='Contact', dir=query_dir, updates_only=True)
        queries['opportunity'] = self.create_custom_query(
            table_name='Opportunity', dir=query_dir)
        queries['opportunity_updates'] = self.create_custom_query(
            table_name='Opportunity', dir=query_dir, updates_only=True)

        return queries
def load_records(test=False, target="Contact1000__c", count=10, batch_size=100000,
                username="******", password=None, token=None,
                sessionId=None, endpoint=None,
                return_records=False, field_spec = None):
    if not test:
        if username and password:
            sf = SalesforceBatch(username=username, password=password, token=token)
        else:
            sf = SalesforceBatch(sessionId=sessionId, endpoint=endpoint)

        user_ids = [r.Id for r in sf.query_salesforce("User", ["Id"], where="ReceivesAdminInfoEmails=true", limit=20).records]
        print "User ids: " + str(user_ids)

        bulk = SalesforceBulk(sessionId=sf.sessionId, host=sf.host)

        job = bulk.create_insert_job(target, concurrency="Parallel")
    else:
        user_ids = [1, 2, 3]

    record_generator.define_lookup("UserId", random_choices=user_ids)
    record_generator.define_lookup("Industry", random_choices=["Finance","Agriculture","Technology","Banking","Chemicals"])
    record_generator.define_lookup("account_type", random_choices=["Analyst","Competitor","Customer","Integrator","Partner"])


    global indexer
    indexer = 0

    def gen_index():
        global indexer
        indexer += 1
        return "{0} {1}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), indexer)

    record_generator.define_lookup("counter", callable=gen_index)

    output = open("records_{0}.json".format(target), "a")

    total = count
    batches = []
    all_records = []

    while count > 0:
        if field_spec:
            records = record_generator.mock_records(field_spec, count=batch_size)
        else:
            if 'Contact' in target:
                records = gen_Contact(min(count,batch_size))
            else:
                records = gen_Account(min(count,batch_size))
        if test:
            return list(records)

        print "Made batch of size {}".format(batch_size)

        if return_records:
            records = list(records)
            all_records += records
            records = iter(records)

        if total < 1000:
            # Use SOAP
            sf.insert_salesforce(target, records)
            count = 0
        else:
            csv_gen = CsvDictsAdapter(records)

            print "Posting batch to BULK API"
            batch = bulk.post_bulk_batch(job, csv_gen)
            print "Posted: %s" % batch
            batches.append(batch)
            count -= batch_size

        for r in records:
            output.write(json.dumps(r))
            output.write("\n")

    for b in batches:
        print "Waiting for %s" % b
        bulk.wait_for_batch(job, b)

    bulk.close_job(job)

    print "DONE!"
    if return_records:
        return all_records
示例#39
0
def sf_Bulk_Login(username, password,security_token):

    sfBulk = SalesforceBulk(username=username, password=password,security_token=security_token)
    print ('login sucessfully')
    return sfBulk
示例#40
0
class RESTConnector:
    def __init__(self, connector_param):
        self.connector_param = connector_param
        self.instance_url = 'https://' + connector_param.url_prefix + 'salesforce.com'
        self.token_url = 'https://' + connector_param.url_prefix + 'salesforce.com/services/oauth2/token'
        self.access_token = None
        self.get_token()
        self.bulk = SalesforceBulk(sessionId=self.access_token,
                                   host=urlparse(self.instance_url).hostname)

    def check_token(self):
        try:
            job = self.bulk.create_query_job(object, contentType='CSV')
            test_query = 'SELECT ID FROM Account LIMIT 1'
            batch = self.bulk.query(job, test_query)
            self.connector_wait(job, batch, 'Query done')
            self.bulk.close_job(job)
            return True
        except:
            return False

    def get_token(self):
        if self.access_token == None:
            cached_token = self.get_cached_token()
            if cached_token:
                self.access_token = cached_token
                if not self.check_token():
                    self.get_oauth2_token()
            else:
                self.get_oauth2_token()
        else:
            self.get_oauth2_token()
        return self.access_token

    def get_oauth2_token(self):
        req_param = {
            'grant_type': 'password',
            'client_id': self.connector_param.consumer_key,
            'client_secret': self.connector_param.consumer_secret,
            'username': self.connector_param.username,
            'password': self.connector_param.password
        }
        result = requests.post(
            self.token_url,
            headers={"Content-Type": "application/x-www-form-urlencoded"},
            data=req_param)
        result_dict = loads(result.content)
        if 'access_token' in result_dict.keys():
            self.access_token = result_dict['access_token']
            self.save_token()
            return result_dict['access_token']
        else:
            print(result_dict)
            return None

    def get_cached_token(self):
        try:
            tokens_dict = load(open(session_file, 'r'))
        except:
            return None
        if self.connector_param.username in tokens_dict.keys():
            return tokens_dict[self.connector_param.username]
        else:
            return None

    def save_token(self):
        tokens_dict = {}
        try:
            tokens_dict = load(open(session_file, 'r'))
        except:
            pass
        tokens_dict[self.connector_param.username] = self.access_token
        dump(tokens_dict, open(session_file, 'w'))

    def remove_token(self):
        tokens_dict = load(open(session_file, 'r'))
        tokens_dict.pop(self.connector_param.username, None)
        dump(tokens_dict, open(session_file, 'w'))

    def bulk_load(self, object, soql, header_columns=None, csv_file=None):
        try:
            job = self.bulk.create_query_job(object, contentType='CSV')
        except:
            self.access_token = None
            self.get_oauth2_token()
            job = self.bulk.create_query_job(object, contentType='CSV')
        batch = self.bulk.query(job, soql)
        self.connector_wait(job, batch, 'Query done')
        self.bulk.close_job(job)

        if csv_file:
            open_mode = 'w'
            with open(csv_file, open_mode) as f_csv:
                writer = csv.DictWriter(f_csv, fieldnames=header_columns)
                writer.writeheader()
                for row in self.bulk.get_batch_result_iter(job,
                                                           batch,
                                                           parse_csv=True):
                    writer.writerow(row)
        else:
            data = []
            for row in self.bulk.get_batch_result_iter(job,
                                                       batch,
                                                       parse_csv=True):
                data.append(row)
            return data

    def bulk_insert(self, object, data):
        job = self.bulk.create_insert_job(object, contentType='CSV')
        csv_iter = CsvDictsAdapter(iter(data))
        batch = self.bulk.post_bulk_batch(job, csv_iter)
        self.connector_wait(job, batch, 'bulk insert done')
        # do not work should return Id`s of created elements
        # res = self.bulk.get_batch_result_iter(job,batch,parse_csv=False)
        self.bulk.close_job(job)

    def bulk_update(self, object, data):
        job = self.bulk.create_update_job(object, contentType='CSV')
        csv_iter = CsvDictsAdapter(iter(data))
        batch = self.bulk.post_bulk_batch(job, csv_iter)
        self.connector_wait(job, batch, 'bulk update done')
        # do not work shuld return Id`s of created elements
        self.bulk.close_job(job)
        rows = []
        for row in self.get_batch_result_iter(job, batch, parse_csv=False):
            rows.append(row)
        return rows

    def bulk_delete(self, object, where):
        delete_job = self.bulk.create_delete_job(object_name=object)
        delete_batch = self.bulk.bulk_delete(delete_job, object, where)
        self.bulk.wait_for_batch(delete_job, delete_batch)
        print('deletion done')

    def bulk_upsert(self, object, external_id_name, data):
        job = self.bulk.create_upsert_job(object_name=object,
                                          external_id_name=external_id_name)
        csv_iter = CsvDictsAdapter(iter(data))
        batch = self.bulk.post_bulk_batch(job, csv_iter)
        self.connector_wait(job, batch, 'upserting done')
        self.bulk.close_job(job)
        rows = []
        for row in self.get_batch_result_iter(job, batch, parse_csv=False):
            rows.append(row)
        return rows

    def connector_wait(self, job, batch, ending_message=''):
        wait_message = 'Wait for job done'
        clock = 0
        while True:
            if clock == 10:
                clock = 0
                if self.bulk.is_batch_done(job, batch):
                    break
            sleep(0.5)
            clock = clock + 1
            spin(wait_message)
        print('\r' + ending_message.ljust(
            len(ending_message) if len(ending_message) > len(wait_message) +
            4 else len(wait_message) + 4))
        self.bulk.wait_for_batch(job, batch)

    def get_batch_result_iter(self,
                              job_id,
                              batch_id,
                              parse_csv=False,
                              logger=None):
        """

        **** This code snippet was taken from salesforce bulk library ****

        Return a line interator over the contents of a batch result document. If
        csv=True then parses the first line as the csv header and the iterator
        returns dicts.
        """
        status = self.bulk.batch_status(job_id, batch_id)
        if status['state'] != 'Completed':
            return None
        elif logger:
            if 'numberRecordsProcessed' in status:
                logger("Bulk batch %d processed %s records" %
                       (batch_id, status['numberRecordsProcessed']))
            if 'numberRecordsFailed' in status:
                failed = int(status['numberRecordsFailed'])
                if failed > 0:
                    logger("Bulk batch %d had %d failed records" %
                           (batch_id, failed))
        print(self.bulk.headers())
        uri = self.bulk.endpoint + \
            "/job/%s/batch/%s/result" % (job_id, batch_id)
        r = requests.get(uri, headers=self.bulk.headers(), stream=True)

        # print(type(r))
        # print(r.text)
        # print(r.keys())
        # result_id = r.text.split("<result>")[1].split("</result>")[0]

        # uri = self.bulk.endpoint + \
        #     "/job/%s/batch/%s/result/%s" % (job_id, batch_id, result_id)
        # r = requests.get(uri, headers=self.bulk.headers(), stream=True)

        if parse_csv:
            return csv.DictReader(r.iter_lines(chunk_size=2048),
                                  delimiter=",",
                                  quotechar='"')
        else:
            return r.iter_lines(chunk_size=2048)
from salesforce_bulk import SalesforceBulk, CsvDictsAdapter
import json

with open('data.json', 'r') as myfile:
    data = json.loads(myfile.read())

username = data["user"]
password = data["password"]
instance = data["instance"]
security_token = data["token"]

try:
    bulk = SalesforceBulk(username=username,
                          password=password,
                          security_token=security_token)
    job = bulk.create_insert_job("Account", contentType='CSV')
    accounts = [dict(Name="Account%d" % idx) for idx in range(5, 10)]
    csv_iter = CsvDictsAdapter(iter(accounts))
    batch = bulk.post_batch(job, csv_iter)
    bulk.wait_for_batch(job, batch)
    bulk.close_job(job)
    result = bulk.get_batch_results(batch, job)
    jsonString = json.dumps(result)
    print(jsonString)
except Exception as e:
    Exception(e)
class SalesforceBulkTestCase(unittest.TestCase):

    def setUp(self):
        login = salesforce_oauth_request.login(
            username=USERNAME,
            password=PASSWORD,
            token=SECURITY_TOKEN,
            client_id=CONSUMER_KEY,
            client_secret=CONSUMER_SECRET,
            cache_session=False,
            sandbox=True,
        )

        self.bulk = SalesforceBulk(login['access_token'], login['endpoint'])
        self.jobs = []

    def tearDown(self):
        if hasattr(self, 'bulk'):
            for job_id in self.jobs:
                print "Closing job: %s" % job_id
                self.bulk.close_job(job_id)

    def test_raw_query(self):
        job_id = self.bulk.create_query_job("Contact")
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_id = self.bulk.query(job_id, "Select Id,Name,Email from Contact Limit 1000")
        self.assertIsNotNone(re.match("\w+", batch_id))

        while not self.bulk.is_batch_done(job_id, batch_id):
            print "Job not done yet..."
            print self.bulk.batch_status(job_id, batch_id)
            time.sleep(2)

        self.results = ""
        def save_results(tfile, **kwargs):
            print "in save results"
            self.results = tfile.read()

        flag = self.bulk.get_batch_results(job_id, batch_id, callback = save_results)
        self.assertTrue(flag)
        self.assertTrue(len(self.results) > 0)
        self.assertIn('"', self.results)


    def test_csv_query(self):
        job_id = self.bulk.create_query_job("Account")
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_id = self.bulk.query(job_id, "Select Id,Name,Description from Account Limit 10000")
        self.assertIsNotNone(re.match("\w+", batch_id))
        self.bulk.wait_for_batch(job_id, batch_id, timeout=120)

        self.results = None
        def save_results1(rows, **kwargs):
            self.results = rows

        flag = self.bulk.get_batch_results(job_id, batch_id, callback = save_results1, parse_csv=True)
        self.assertTrue(flag)
        results = self.results
        self.assertTrue(len(results) > 0)
        self.assertTrue(isinstance(results,list))
        self.assertEqual(results[0], ['Id','Name','Description'])
        self.assertTrue(len(results) > 3)

        self.results = None
        self.callback_count = 0
        def save_results2(rows, **kwargs):
            self.results = rows
            print rows
            self.callback_count += 1

        batch = len(results) / 3
        self.callback_count = 0
        flag = self.bulk.get_batch_results(job_id, batch_id, callback = save_results2, parse_csv=True, batch_size=batch)
        self.assertTrue(self.callback_count >= 3)


    def test_csv_upload(self):
        job_id = self.bulk.create_insert_job("Contact")
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_ids = []
        content = open("example.csv").read()
        for i in range(5):
            batch_id = self.bulk.query(job_id, content)
            self.assertIsNotNone(re.match("\w+", batch_id))
            batch_ids.append(batch_id)

        for batch_id in batch_ids:
            self.bulk.wait_for_batch(job_id, batch_id, timeout=120)

        self.results = None
        def save_results1(rows, failed, remaining):
            self.results = rows

        for batch_id in batch_ids:
            flag = self.bulk.get_upload_results(job_id, batch_id, callback = save_results1)
            self.assertTrue(flag)
            results = self.results
            self.assertTrue(len(results) > 0)
            self.assertTrue(isinstance(results,list))
            self.assertEqual(results[0], UploadResult('Id','Success','Created','Error'))
            self.assertEqual(len(results), 3)

        self.results = None
        self.callback_count = 0
        def save_results2(rows, failed, remaining):
            self.results = rows
            self.callback_count += 1

        batch = len(results) / 3
        self.callback_count = 0
        flag = self.bulk.get_upload_results(job_id, batch_id, callback = save_results2, batch_size=batch)
        self.assertTrue(self.callback_count >= 3)
示例#43
0
	with open(file_name, 'w+', encoding='utf-8') as csv_file:
		write_header = csv.writer(csv_file)
		write_header.writerow(config.csv_header)

	# Create the time_log file that will be used for the daily delta date comparison
	time_log = open("run_time.txt", "a")
	time_log.write(run_time + "\n")

	# Create the log file and write the time the program is run
	log = open("log.txt", "a")
	log.write("\n" + "|---------------------------------------|" + "\n") 
	log.write("PROGRAM STARTED: "),log.write(datetime.now().ctime())
	log.write("\n" + "|---------------------------------------|" + "\n")

	# Set the Salesforce username, password, and token
	sf = SalesforceBulk(username=config.salesforce["username"], password=config.salesforce["password"],
	sandbox=True, security_token=config.salesforce["token"])

	try:
		# Set the sftp hostkeys (if any)
		cnopts = pysftp.CnOpts()
		cnopts.hostkeys = None
	except Exception as e:
		pass
	else:
		pass

	# Set the sftp host, username, and password (optional paramter: port="22")
	sftp = pysftp.Connection(host=config.sftp["host"], username=config.sftp["username"],
	password=config.sftp["password"], cnopts=cnopts)

	# Build a dynamic User list, format the string, and create a variable that can be used in the SOQL filter
    def test_upload_with_mapping_file(self):
        if self.contentType != 'CSV':
            print('Mapping file can only be used with CSV content')
            return
        bulk = SalesforceBulk(self.sessionId, self.endpoint)
        self.bulk = bulk

        job_id = bulk.create_insert_job("Contact", contentType=self.contentType)
        self.jobs.append(job_id)
        self.assertIsNotNone(re.match("\w+", job_id))

        batch_ids = []
        data = [
            {
                'Not FirstName': 'BulkTestFirst%s' % i,
                'Arbitrary Field': 'BulkLastName',
                'Phone': '555-555-5555',
            } for i in range(50)
        ]

        mapping_data = [
            {
                "Salesforce Field": "FirstName",
                "Csv Header": "NotFirstName",
                "Value": "",
                "Hint": ""
            },
            {
                "Salesforce Field": "Phone",
                "Csv Header": "Phone",
                "Value": "",
                "Hint": ""
            },
            {
                "Salesforce Field": "LastName",
                "Csv Header": "Arbitrary Field",
                "Value": "",
                "Hint": ""
            }
        ]
        mapping_data = self.generate_content(mapping_data)

        bulk.post_mapping_file(job_id,mapping_data)
        for i in range(2):
            content = self.generate_content(data)
            batch_id = bulk.post_batch(job_id, content)
            self.assertIsNotNone(re.match("\w+", batch_id))
            batch_ids.append(batch_id)

        bulk.close_job(job_id)

        for batch_id in batch_ids:
            bulk.wait_for_batch(job_id, batch_id, timeout=120)

        for batch_id in batch_ids:
            results = bulk.get_batch_results(batch_id)

            print(results)
            self.assertTrue(len(results) > 0)
            self.assertTrue(isinstance(results, list))
            self.assertTrue(isinstance(results[0], UploadResult))
            self.assertEqual(len(results), 50)
示例#45
0
# ID Extraction from Salesforce and saving to local
import json
from salesforce_bulk import SalesforceBulk
from salesforce_bulk.util import IteratorBytesIO
from time import sleep
from salesforce_bulk import CsvDictsAdapter
import pandas as pd
import unicodecsv
import config as cfg
#Authentication

bulk = SalesforceBulk(username=cfg.USERNAME,
                      password=cfg.PASSWORD,
                      security_token=cfg.SECURITY_KEY,
                      sandbox=True)

#Source CSV File path for Account
input_file = "/home/baadmin/NCT_ETL/input_files/pg_extract_prd/InstallmentId_sf.csv"

#Target SFDC Object name
target_obj = "cpm__Installment__c"

# Mapping of Input csv Fields to SalesForce Fields

sf_fields = ['Contact_Key__c', 'cpm__Contact__c', 'Installment_Key__c', 'Id']

# Extract the data from salesforce and save it to csv

job = bulk.create_query_job(target_obj, contentType='CSV')
sql = "SELECT " + ",".join(sf_fields) + " FROM " + target_obj
batch = bulk.query(job, sql)
class SalesforceBulkTests(unittest.TestCase):

    def setUp(self):
        request_patcher = mock.patch('simple_salesforce.api.requests')
        self.mockrequest = request_patcher.start()
        self.addCleanup(request_patcher.stop)
        self.sessionId = '12345'
        self.host = 'https://example.com'
        self.bulk = SalesforceBulk(self.sessionId, self.host)

    def test_headers_default(self):
        self.assertEqual(
            self.bulk.headers(),
            {
                'X-SFDC-Session': self.sessionId,
                'Content-Type': 'application/xml; charset=UTF-8',
                'Accept-Encoding': 'gzip',
            }
        )

    def test_headers_json(self):
        self.assertEqual(
            self.bulk.headers(content_type='application/json'),
            {
                'X-SFDC-Session': self.sessionId,
                'Content-Type': 'application/json; charset=UTF-8',
                'Accept-Encoding': 'gzip',
            }
        )

    def test_create_job_doc(self):
        doc = self.bulk.create_job_doc(
            'Contact', 'insert'
        )
        tree = ET.fromstring(doc)

        operation = tree.findtext('{%s}operation' % self.bulk.jobNS)
        self.assertEqual(operation, 'insert')

        obj = tree.findtext('{%s}object' % self.bulk.jobNS)
        self.assertEqual(obj, 'Contact')

        contentType = tree.findtext('{%s}contentType' % self.bulk.jobNS)
        self.assertEqual(contentType, 'CSV')

        concurrencyMode = tree.findtext('{%s}concurrencyMode' % self.bulk.jobNS)
        self.assertIsNone(concurrencyMode)

        extIdField = tree.findtext('{%s}externalIdFieldName' % self.bulk.jobNS)
        self.assertIsNone(extIdField)

    def test_create_job_doc_concurrency(self):
        doc = self.bulk.create_job_doc(
            'Contact', 'insert', concurrency='Serial'
        )
        tree = ET.fromstring(doc)

        operation = tree.findtext('{%s}operation' % self.bulk.jobNS)
        self.assertEqual(operation, 'insert')

        obj = tree.findtext('{%s}object' % self.bulk.jobNS)
        self.assertEqual(obj, 'Contact')

        contentType = tree.findtext('{%s}contentType' % self.bulk.jobNS)
        self.assertEqual(contentType, 'CSV')

        concurrencyMode = tree.findtext('{%s}concurrencyMode' % self.bulk.jobNS)
        self.assertEqual(concurrencyMode, 'Serial')

        extIdField = tree.findtext('{%s}externalIdFieldName' % self.bulk.jobNS)
        self.assertIsNone(extIdField)

    def test_create_job_doc_external_id(self):
        doc = self.bulk.create_job_doc(
            'Contact', 'upsert', external_id_name='ext_id__c'
        )
        tree = ET.fromstring(doc)

        operation = tree.findtext('{%s}operation' % self.bulk.jobNS)
        self.assertEqual(operation, 'upsert')

        obj = tree.findtext('{%s}object' % self.bulk.jobNS)
        self.assertEqual(obj, 'Contact')

        contentType = tree.findtext('{%s}contentType' % self.bulk.jobNS)
        self.assertEqual(contentType, 'CSV')

        concurrencyMode = tree.findtext('{%s}concurrencyMode' % self.bulk.jobNS)
        self.assertIsNone(concurrencyMode)

        extIdField = tree.findtext('{%s}externalIdFieldName' % self.bulk.jobNS)
        self.assertEqual(extIdField, 'ext_id__c')

    def test_create_job_doc_json(self):
        doc = self.bulk.create_job_doc(
            'Contact', 'insert', contentType='JSON'
        )
        tree = ET.fromstring(doc)

        operation = tree.findtext('{%s}operation' % self.bulk.jobNS)
        self.assertEqual(operation, 'insert')

        obj = tree.findtext('{%s}object' % self.bulk.jobNS)
        self.assertEqual(obj, 'Contact')

        contentType = tree.findtext('{%s}contentType' % self.bulk.jobNS)
        self.assertEqual(contentType, 'JSON')

        concurrencyMode = tree.findtext('{%s}concurrencyMode' % self.bulk.jobNS)
        self.assertIsNone(concurrencyMode)

        extIdField = tree.findtext('{%s}externalIdFieldName' % self.bulk.jobNS)
        self.assertIsNone(extIdField)

    def test_create_close_job_doc(self):
        doc = self.bulk.create_close_job_doc()
        tree = ET.fromstring(doc)

        state = tree.findtext('{%s}state' % self.bulk.jobNS)
        self.assertEqual(state, 'Closed')

    def test_create_abort_job_doc(self):
        doc = self.bulk.create_abort_job_doc()
        tree = ET.fromstring(doc)

        state = tree.findtext('{%s}state' % self.bulk.jobNS)
        self.assertEqual(state, 'Aborted')

    def test_pickle_roundtrip_bulk_api_error_no_status(self):
        s = pickle.dumps(BulkApiError('message'))
        e = pickle.loads(s)
        assert e.__class__ is BulkApiError
        assert e.args[0] == 'message'
        assert e.status_code is None

    def test_pickle_roundtrip_bulk_api_error_no_status_code(self):
        s = pickle.dumps(BulkApiError('message', 400))
        e = pickle.loads(s)
        assert e.__class__ is BulkApiError
        assert e.args[0] == 'message'
        assert e.status_code == 400

    def test_pickle_roundtrip_bulk_job_aborted(self):
        orig = BulkJobAborted('sfid1234')
        s = pickle.dumps(orig)
        e = pickle.loads(s)
        assert e.__class__ is BulkJobAborted
        assert e.job_id == 'sfid1234'
        assert 'sfid1234' in e.args[0]
        assert e.args[0] == orig.args[0]

    def test_pickle_roundtrip_bulk_batch_failed(self):
        orig = BulkBatchFailed('sfid1234', 'sfid5678', 'some thing happened')
        s = pickle.dumps(orig)
        e = pickle.loads(s)
        assert e.__class__ is BulkBatchFailed
        assert e.job_id == 'sfid1234'
        assert e.batch_id == 'sfid5678'
        assert e.state_message == 'some thing happened'
        assert 'sfid1234' in e.args[0]
        assert 'sfid5678' in e.args[0]
        assert 'some thing happened' in e.args[0]
        assert orig.args[0] == e.args[0]