Exemplo n.º 1
0
SITE_COUNT = int(sys.argv[1])
MAX_LINKS_PER_PAGE = 1
parent_sites = [
    'site1.adsa-project.local', 'site2.adsa-project.local',
    'site3.adsa-project.local'
]

# %%
g = DocumentGenerator(text_generator=MarkovTextGenerator(
    model='markov_textgen.json'))

# Generate page objects
pages = []
for i in range(SITE_COUNT):
    page = Page(title=cleanup(g.gen_sentence(3, 6)).title(),
                body=cleanup('. '.join([
                    g.gen_sentence(5, 15)
                    for _ in range(random.randint(20, 30))
                ])),
                links=[
                    random.randint(0, SITE_COUNT - 1)
                    for _ in range(0, MAX_LINKS_PER_PAGE)
                ])
    pages.append(page)

# Map pages to parent sites
for page in pages:
    parent_site = parent_sites[random.randint(0, 2)]
    page.set_parent_site(parent_site)
Exemplo n.º 2
0
broker_topics = metadata.topics()

# Deleting topic if it already exists.
# This is NOT recommended in production. Deletion must
admin_client = KafkaAdminClient(bootstrap_servers=kafka_servers)
if topic_name in broker_topics:
    deletion = admin_client.delete_topics([topic_name])
    try:
        future = client.cluster.request_update()
        client.poll(future=future)
    except KafkaError as e:
        print(e)
        pass
admin_client.create_topics(new_topics=topic_list, validate_only=False)

# Setting up producer
print("Connecting producer to cluster...", flush=True)
producer = KafkaProducer(
    bootstrap_servers=kafka_servers,
    max_block_ms=10000,  # connection timeout
    value_serializer=lambda x: pickle.dumps(x))

# Sending a random sentence every 3 seconds
gen = DocumentGenerator()
while (1):
    payload = {"datetime": datetime.now(), "sentence": gen.gen_sentence()}
    print("Sending : ", payload, flush=True)
    producer.send(topic_name, payload)
    print("Sent.\n", flush=True)
    time.sleep(3)