filename="diagrams/tbd", direction="LR", node_attr ={"fontsize": "14"}, edge_attr ={"labelfontsize": "14"}): with Cluster("Google Cloud") as cloud: storage = GCS("Google Cloud Storage") with Cluster("Kubernetes cluster"): with Cluster("Monitoring") as monitoring: metrics = Prometheus("Prometheus") metrics_push = Prometheus("Prometheus Gateway") metrics_push << Edge(label="pull metrics") << metrics service_monitor = CRD("ServiceMonitor") service_monitor << Edge(label="watch") << metrics metrics << Edge(label="query metrics") << Grafana("Grafana") with Cluster("Apache Spark application") as spark: Spark() driver = Pod("driver") driver << service_monitor exec_1 = Pod("executor 1") exec_2 = Pod("executor 2") exec_3 = Pod("executor 3") driver << exec_1 driver << exec_2 driver << exec_3 driver >> Edge(label="push metrics") >> metrics_push driver << Edge(label="pull metrics") << metrics exec_1 >> Edge(label="read/write") >> storage exec_2 >> Edge(label="read/write") >> storage exec_3 >> Edge(label="read/write") >> storage operator = Pod("Spark Operator") crd = CRD("SparkApplication")
from diagrams.gcp.database import Firestore from diagrams.onprem.analytics import Spark from diagrams.gcp.storage import Storage from diagrams.programming.language import Python from diagrams.gcp.devtools import Code from diagrams.onprem.gitops import Argocd with Diagram("StreamState", show=False): kafka_input = Kafka("Kafka") kafka_output = Kafka("Kafka") with Cluster("StreamState cluster"): # svc_group = [ECS("web1"), ECS("web2"), ECS("web3")] with Cluster("Replay"): kafka_storage = Storage("Kafka sink") spark_reload = Spark("Replay") with Cluster("Realtime"): spark_persist = Spark("No transforms") spark_state = Spark("Analytical Stream") argo = Argocd("Gitops") argo >> spark_state argo >> spark_reload with Cluster("Dev"): code = Code("Dev App") code >> argo code >> argo firestore = Firestore("Cache/upsert") spark_persist >> kafka_storage
direction=direction, show=False): ingress = Nginx("ingress") metrics = Prometheus("metric") metrics << Edge(color="firebrick", style="dashed") << Grafana("monitoring") with Cluster("Service Cluster"): grpcsvc = [Server("grpc1"), Server("grpc2"), Server("grpc3")] with Cluster("Sessions HA"): primary = Redis("session") primary - Edge(color="brown", style="dashed") - Redis("replica") << Edge( label="collect") << metrics grpcsvc >> Edge(color="brown") >> primary with Cluster("Database HA"): primary = PostgreSQL("users") primary - Edge(color="brown", style="dotted") - PostgreSQL("replica") << Edge( label="collect") << metrics grpcsvc >> Edge(color="black") >> primary aggregator = Fluentd("logging") aggregator >> Edge(label="parse") >> Kafka("stream") >> Edge( color="black", style="bold") >> Spark("analytics") ingress >> Edge(color="darkgreen") << grpcsvc >> Edge( color="darkorange") >> aggregator
source = Server("HTTP") with Cluster("Data load"): storage = S3("Data storage") download = ECS("ECS download task") unzip_trigger = Lambda("S3 event trigger") unzip = ECS("ECS unzip task") with Cluster("Data processing"): parse = Glue("Glue Spark XML\nparser") catalog = GlueDataCatalog("Data Catalog") with Cluster("Feature engineering"): train_features = Glue("Glue Spark job:\ntrain features") predict_features = Glue("Glue Spark job:\nprediction features") prediction_db = Dynamodb("Prediction database") with Cluster("ML model"): cluster = EMRCluster("EMR Cluster") model_fit = Spark("Spark + MLeap") model = Sagemaker("Sagemaker endpoint") with Cluster("Serving"): app = Internet("Web app") api = APIGateway("REST API") predict_trigger = Lambda("Request/response\nhandler") user = User("End user") source >> download >> storage >> unzip_trigger >> unzip >> storage storage >> parse >> catalog catalog >> [train_features, predict_features] predict_features >> prediction_db >> predict_trigger train_features >> cluster >> model_fit >> model predict_trigger >> model >> predict_trigger storage >> app user >> Edge() << app >> api >> predict_trigger >> api >> app
if len(sys.argv) > 1: file = str(sys.argv[1]) else: file = "diagram" with Diagram("Advanced Web Service with On-Premise", filename=file, show=False): ingress = Nginx("ingress") metrics = Prometheus("metric") metrics << Grafana("monitoring") with Cluster("Service Cluster"): grpcsvc = [Server("grpc1"), Server("grpc2"), Server("grpc3")] with Cluster("Sessions HA"): master = Redis("session") master - Redis("replica") << metrics grpcsvc >> master with Cluster("Database HA"): master = PostgreSQL("users") master - PostgreSQL("slave") << metrics grpcsvc >> master aggregator = Fluentd("logging") aggregator >> Kafka("stream") >> Spark("analytics") ingress >> grpcsvc >> aggregator
graph_attr = { "fontsize":"28" } with Diagram("", show=False, node_attr=node_attr): with Cluster("Brokers", graph_attr=graph_attr): kafka = Kafka("\nKafka") activemq = Activemq("\nActiveMQ") rabbitmq = Rabbitmq("\nRabbitMQ") zeromq = Zeromq("\nZeroMQ") kafka - activemq rabbitmq - zeromq with Cluster("Speed Layer", graph_attr=graph_attr): kstream = Kafka("\nKafka\nStreams") sparks = Spark("\nSpark Streaming") flink = Flink("\nFlink") #stream_group = [kstream, sparks, flink] kstream - [sparks] - flink with Cluster("Batch Layer", graph_attr=graph_attr): hdfs = Hadoop("\nHDFS") with Cluster("Serving Layer", graph_attr=graph_attr): hive = Hive("\nHive") sparksql = Spark("\nSpark SQL") cassandra = Cassandra("\nCassandra") hive - [sparksql] - cassandra serve_group = [hive, sparksql, cassandra] activemq >> kstream
"""This module lets you re-create the dataflow diagram from the README.""" from diagrams import Cluster, Diagram from diagrams.aws.analytics import EMRCluster from diagrams.aws.storage import SimpleStorageServiceS3Bucket from diagrams.onprem.analytics import Spark from diagrams.programming.flowchart import Document, MultipleDocuments from diagrams.programming.language import Python with Diagram("Dataflow NLP-Weather-dataset", show=True): with Cluster('Local input'): local_input = [ MultipleDocuments('Yelp files'), MultipleDocuments('GHCN files') ] with Cluster('AWS'): s3_bucket_input = SimpleStorageServiceS3Bucket('<your_s3_bucket>') s3_bucket_output = SimpleStorageServiceS3Bucket('<your_s3_bucket>') emr = EMRCluster('EMR') spark = Spark('spark_app') with Cluster('Local output'): local_output = Document('nlp_weather_dataset') local_input >> Python('local_to_s3') >> s3_bucket_input s3_bucket_input >> emr >> spark >> s3_bucket_output s3_bucket_output >> Python('s3_to_local') >> local_output
from diagrams import Diagram, Cluster from diagrams.custom import Custom from diagrams.onprem.analytics import Spark from diagrams.onprem.analytics import Hadoop with Diagram("Moody Analytics API Architecture", show=False, filename="moody-architecture", direction="LR"): api = Custom("API", "./api-icon.png") users = Custom("Users", "./users-icon.png") firewall = Custom("Firewall", "./firewall-icon.png") website = Custom("Website", "./website-icon.png") data_service = Custom("Dataservice", "./dataservice-icon.png") spark = Spark("Apache Spark") hdfs = Hadoop("HDFS") api >> firewall users >> firewall firewall >> website website >> data_service data_service >> spark spark >> hdfs
from diagrams.onprem.queue import Kafka from diagrams.onprem.analytics import Hive from diagrams.generic.storage import Storage # Documentation: https://diagrams.mingrammer.com/docs/getting-started/installation#quick-start with Diagram("Click Stream Architecture", show=False): with Cluster("Docker Compose"): producer = Client("Procuder") kafdrop = Client("Kafdrop UI") with Cluster("Kafka"): click_stream_topics = [ Kafka("Click Stream"), Kafka("Click Stream Metadata") ] consumer = Client("Consumer") with Cluster("Spark"): spark_master = Spark("master") spark_worker_1 = Spark("worker-1") spark_worker_2 = Spark("worker-2") parquet = Storage("Parquet File") click_stream_topics >> kafdrop producer >> click_stream_topics click_stream_topics >> consumer consumer >> spark_master >> parquet
from diagrams.onprem.logging import Fluentd from diagrams.onprem.monitoring import Grafana, Prometheus from diagrams.onprem.network import Nginx from diagrams.onprem.queue import Kafka with Diagram(name="Advanced Web Service with On-Premise (colored)", show=False): ingress = Nginx("ingress") metrics = Prometheus("metric") metrics << Edge(color="firebrick", style="dashed") << Grafana("monitoring") with Cluster("Service Cluster"): grpcsvc = [ Server("grpc1"), Server("grpc2"), Server("grpc3")] with Cluster("Sessions HA"): master = Redis("session") master - Edge(color="brown", style="dashed") - Redis("replica") << Edge(label="collect") << metrics grpcsvc >> Edge(color="brown") >> master with Cluster("Database HA"): master = PostgreSQL("users") master - Edge(color="brown", style="dotted") - PostgreSQL("slave") << Edge(label="collect") << metrics grpcsvc >> Edge(color="black") >> master aggregator = Fluentd("logging") aggregator >> Edge(label="parse") >> Kafka("stream") >> Edge(color="black", style="bold") >> Spark("analytics") ingress >> Edge(color="darkgreen") << grpcsvc >> Edge(color="darkorange") >> aggregator