def generate_dataset(n_user, n_posts, n_comments, var): gen = DocumentGenerator() users = [] for _ in range(n_user): nickname = gen.name() user = User(username=nickname.lower().replace(' ', '_'), nickname=nickname, password=rand_passwd(), email=get_email(nickname)) posts = get_dummy_posts(n=n_posts + randint(-var, var), gen=gen, author=user) comments = get_dummy_comments(n=n_comments + randint(-var, var), gen=gen, author=user) user.posts, user.comments = posts, comments users.append(user) posts = [p for user in users for p in user.posts] comments = [c for user in users for c in user.comments] # attach comments to posts for comment in comments: post = sample(posts, 1)[0] post.comments.append(comment) comment.post = post return users, posts, comments
clean = [f.get() for f in futures] for c in clean: if c is not None: scraper_sessions[c.proxy] = c if len(scraper_sessions) > 0: print(len(scraper_sessions)) c = 0 while True: thread_pool = Pool(pool_size) futures = list() r = random.choice([gen.email(), gen.word()+gen.word(), gen.name()+gen.word()+gen.word()]) #r = get_random_string(random.randrange(120, 500)) request_data = {"doAuth": "1", "login": r.replace(" ", ""), "password": gen.word()+gen.word()+str(gen.small_int())} for i in range(pool_size): if proxy_index < len(proxies): proxy = proxies[proxy_index] proxy_index += 1 else: proxy = proxies[0] proxy_index = 1 if proxy in scraper_sessions: futures.append(thread_pool.apply_async(post_proxy, [url, proxy, request_data, proxies, (None, scraper_sessions[proxy])[cloudflare]])) clean = [f.get() for f in futures] for x in clean:
def handle(self, *args, **options): if len(options['user']) >= 0 and len(options['password']) >= 0: image_rec_count = 0 finding_rec_count = 0 Site.objects.all().delete() MedicalImage.truncate() XrayAnalysisFinding.truncate() PatientInfo.truncate() site_count = 10 doc_gen = DocumentGenerator() for i in range(0, site_count): try: Hospitals = Hospital.objects.all() hospital = random.choice(Hospitals) site = Site.objects.create(code=str(random.randint(10000, 30000)), hospital=hospital) port = 3600 + i connection = mysql.connector.connect(host='10.60.3.4', port=port, database='medical_analysis', user=options['user'][0], password=options['password'][0]) cursor = connection.cursor(dictionary=True) sql_select_Query = 'SELECT * FROM upload_item' cursor.execute(sql_select_Query) upload_items = cursor.fetchall() for item in upload_items: medical_image = MedicalImage.objects.create(image_date=item['image_date'], image_path=item['image_path'], image_name=item['name'], image_size=item['size'], image_type=item['type'], site=site) image_rec_count += 1 PatientInfo.objects.create(date_acquired=item['image_date'], name=doc_gen.name(), identity_no=self.gen_ic(), gender=random.choice([MALE, FEMALE]), dob=random_timestamp(part='DATE'), modality='CT', medical_image=medical_image) sql_select_Query = 'SELECT f.name, f.value FROM upload_item as i INNER JOIN xray_analysis as a ON i.id = a.upload_item_id INNER JOIN xray_analysis_finding AS f ON a.id = f.analysis_id WHERE i.id = %s' cursor.execute(sql_select_Query, (item['id'],)) xray_analysis_findings = cursor.fetchall() for finding in xray_analysis_findings: xray_analysis_finding = XrayAnalysisFinding.objects.create(name=finding['name'], value=finding['value'], medical_image=medical_image) finding_rec_count += 1 except Error as e: print('Error reading data from Mariadb {}'.format(port), e) break finally: if connection: if connection.is_connected(): connection.close() cursor.close() print('Mariadb connection is closed') if image_rec_count > 0 and finding_rec_count > 0: print('{} / {} finished. {} image(s) added. {} finding(s) added.'.format(i+1, site_count, image_rec_count, finding_rec_count)) if image_rec_count > 0 and finding_rec_count > 0: print('Done. Total {} image(s) added. Total {} finding(s) added.'.format(image_rec_count, finding_rec_count))
def handle(self, *args, **options): data_date = None date_str = options['data_date'][0] if date_str == 'reset': XrayAnalysisFinding.truncate() MedicalImage.truncate() PatientInfo.truncate() AnalysisSetting.truncate() Site.truncate() print('demo data truncated') elif date_str == 'today': data_date = date.today() else: from datetime import datetime format_str = '%Y-%m-%d' # The format data_date = datetime.strptime(date_str, format_str) if data_date is None: return image_rec_count = 0 finding_rec_count = 0 index = 0 Hospitals = Hospital.objects.all() site_count = len(Hospitals) doc_gen = DocumentGenerator() today = date.today() for hospital in Hospitals: site = Site.objects.filter(hospital=hospital).first() if site is None: code = str(random.randint(10000, 30000)) site = Site.objects.filter(code=code).first() while site is not None: code = str(random.randint(10000, 30000)) site = Site.objects.filter(code=code).first() site = Site.objects.create(code=code, hospital=hospital) for i in range(random.randint(9, 30)): dob = random_timestamp(part='DATE') age = today.year - dob.year - ((today.month, today.day) < (dob.month, dob.day)) patient_info = PatientInfo.objects.create( date_acquired=data_date, name=doc_gen.name(), identity_no=self.gen_ic(), gender=random.choice([MALE, FEMALE]), dob=dob, age=age, modality='CT', site=site) medical_image = MedicalImage.objects.create( image_date=data_date, image_path='/demo_path', image_name='demo', site=site, patient_info=patient_info) image_rec_count += 1 for disease in DISEASES: xray_analysis_finding = XrayAnalysisFinding.objects.create( name=disease, value=random.randint(0, 100), medical_image=medical_image) finding_rec_count += 1 index += 1 if index >= 0 or (image_rec_count > 0 and finding_rec_count > 0): print( '{} / {} site(s) finished. {} image(s) added. {} finding(s) added.' .format(index, site_count, image_rec_count, finding_rec_count)) if image_rec_count > 0 and finding_rec_count > 0: print('Done. Total {} image(s) added. Total {} finding(s) added.'. format(image_rec_count, finding_rec_count))