class TestTypeImport(unittest.TestCase): """ Test TypeImport """ def setUp(self): self.interval = 1 self.periodic = '' self.execute = True self.job = Job(interval=timedelta(seconds=self.interval), execute=self.periodically) def test_run(self): self.job.start() with self.assertRaises(Exception): self.signal_handler() self.job.stop() def periodically(self): self.periodic = 'run test' return 1 def periodically_error(self): raise TypeError('lets see if this works') def test_job_error(self): job = Job(interval=timedelta(seconds=self.interval), execute=self.periodically_error) job.start() signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) with self.assertRaises(Exception): self.periodically_error() job.stop()
def get_job_by_id(self, _id: str) -> Job: """ returns a job given an id :param _id: id as str :return: job as Job object """ _job = list(filter(lambda x: x["id"] == _id, self.jobs)) if len(_job) != 1: raise Exception(f"ERROR: No jobs with id {_id}") return Job(_job[0])
def get_tweets(): bq_service = BigQueryService() print("LIMIT:", LIMIT) job = Job() tweets = [] job.start() for row in bq_service.fetch_labeled_tweets_in_batches(limit=LIMIT): tweets.append(dict(row)) job.counter += 1 if job.counter % BATCH_SIZE == 0: job.progress_report() job.end() print("FETCHED TWEETS:", fmt_n(len(tweets))) return DataFrame(tweets)
def download_data(): job = Job() bq_service = BigQueryService() job.start() records = [] for row in bq_service.fetch_user_details_vq(limit=LIMIT): #print(row) records.append(dict(row)) job.counter += 1 if job.counter % BATCH_SIZE == 0: job.progress_report() job.end() return DataFrame(records)
def test_job_error(self): job = Job(interval=timedelta(seconds=self.interval), execute=self.periodically_error) job.start() signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) with self.assertRaises(Exception): self.periodically_error() job.stop()
class TestJob: test_data = { "id": "90038429", "title": "Python Developer", "company": "Durlston Partners London Limited", "contract": "Permanent", "age": "Expires in 1 day", "date": "27/04/2022", "location": "EC1, City of London", "link": "https://www.google.com", "agency": "CW Jobs", "summary": "They are... ", "interested": "N", "reviewed": "N", "email": "N" } def setup_method(self): self.job = Job(self.test_data) @patch("app.job.requests.put") def test_put_job(self, mock): self.job.put_job() assert json.loads(mock.call_args.kwargs['data']) == self.test_data def test_update_job(self): self.job.update_job(title="1") assert self.job.job['title'] == "1" @patch("app.email_job.Email.send_job_alert") def test_send_alert(self, mock): self.job.send_alert() assert self.job.job['email'] != "N"
print(" LIMIT:", LIMIT) print(" BATCH_SIZE:", BATCH_SIZE) print(" DESTRUCTIVE:", DESTRUCTIVE) #print(" GRAPH_LIMIT:", GRAPH_LIMIT) print(" GRAPH_BATCH_SIZE:", GRAPH_BATCH_SIZE) print(" GRAPH_DESTRUCTIVE:", GRAPH_DESTRUCTIVE) print("------------------------") storage = FileStorage( dirpath=f"daily_active_friend_graphs_v4/{DATE}/tweet_min/{TWEET_MIN}") tweets_csv_filepath = os.path.join(storage.local_dirpath, "tweets.csv") bq_service = BigQueryService() job = Job() # # LOAD TWEETS # tweet_id, text, screen_name, bot, created_at # TODO: de-dup RTs so the model will only train/test on a single RT status text (PREVENT OVERFITTING) if os.path.exists(tweets_csv_filepath) and not DESTRUCTIVE: print("LOADING TWEETS...") statuses_df = read_csv(tweets_csv_filepath) else: job.start() print("DOWNLOADING TWEETS...") statuses = [] for row in bq_service.fetch_daily_active_tweeter_statuses( date=DATE, tweet_min=TWEET_MIN, limit=LIMIT):
import signal import time from datetime import timedelta from app.job import Job, ProgramKilled, signal_handler from app.on_watch_file import OnWatchFile from app.settings import get_logger logger = get_logger('Main Watcher File') if __name__ == "__main__": logger.info('Start Watcher File') watch = OnWatchFile() watch.connect_directory_monitoring() signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGINT, signal_handler) job = Job(interval=timedelta(seconds=watch.wait_time_seconds_job), execute=watch.periodically) job.start() while True: try: time.sleep(1) except ProgramKilled: print("Program killed: running cleanup code") job.stop() break
def setup_method(self): self.job = Job(self.test_data)
def setUp(self): self.interval = 1 self.periodic = '' self.execute = True self.job = Job(interval=timedelta(seconds=self.interval), execute=self.periodically)
return len(batch) if __name__ == "__main__": print("-------------------") print("BASILICA EMBEDDER...") print(" MIN PARTITION VAL:", MIN_VAL) print(" MAX PARTITION VAL:", MAX_VAL) print(" LIMIT:", LIMIT) print(" BATCH SIZE:", BATCH_SIZE) bq_service = BigQueryService() bas_service = BasilicaService() job = Job() job.start() records = list( bq_service.fetch_basilica_embedless_partitioned_statuses( min_val=MIN_VAL, max_val=MAX_VAL, limit=LIMIT)) job.counter = len(records) batches = list(split_into_batches(records, BATCH_SIZE)) print("BATCHES:", len(batches)) job.end() del records job.start() with ThreadPoolExecutor(max_workers=MAX_THREADS,