# Performance Configuration for AsyncCorrelator System # # This configuration file contains optimized settings for AsyncCorrelator system # based on performance testing and analysis results. import Config # AsyncCorrelator Performance Configuration config :da_product_app, :async_correlator_performance, [ # ETS Table Optimization Settings ets_config: [ # Use sets with read/write concurrency for correlation tables correlation_table_type: :set, correlation_table_options: [ :public, :named_table, {:read_concurrency, true}, {:write_concurrency, true} ], # Cleanup settings for expired correlations cleanup_interval_ms: 30_000, # Clean every 30 seconds correlation_ttl_ms: 300_000, # 5 minute TTL for correlations max_table_size: 100_000, # Max correlations per table # Memory optimization enable_compression: true, memory_threshold_mb: 100 # Alert if table memory > 100MB ], # Connection Pool Optimization connection_pool: [ # Optimal pool sizes based on testing default_pool_size: 15, # Balanced performance/resource usage max_pool_size: 25, # Maximum connections per network min_idle_connections: 3, # Keep minimum idle connections # Connection timeouts (optimized values) connection_timeout_ms: 10_000, # 10 second connection timeout idle_timeout_ms: 300_000, # 5 minute idle timeout max_connection_lifetime_ms: 3_600_000, # 1 hour max lifetime # Health check settings health_check_interval_ms: 30_000, # Check every 30 seconds connection_retry_attempts: 3, retry_delay_ms: 1_000, # Performance settings enable_keepalive: true, keepalive_interval_ms: 60_000, # Send keepalive every minute tcp_nodelay: true, # Disable Nagle's algorithm tcp_buffer_size: 65_536 # 64KB buffer ], # Memory Management memory_optimization: [ # Garbage collection tuning gc_strategy: :generational, minor_gc_threshold: 1_048_576, # 1MB minor GC threshold major_gc_threshold: 10_485_760, # 10MB major GC threshold # Process memory limits max_process_memory_mb: 50, # 50MB per process limit memory_check_interval_ms: 60_000, # Check every minute # Binary optimization enable_binary_compression: true, compression_threshold_bytes: 1_024, # Compress binaries > 1KB # ETS memory management ets_memory_limit_mb: 200, # Total ETS memory limit enable_ets_compression: true ], # Telemetry Performance Settings telemetry: [ # Sampling configuration for production event_sampling_rate: 0.1, # Sample 10% of events metrics_buffer_size: 5_000, # Buffer 5K metrics flush_interval_ms: 10_000, # Flush every 10 seconds # Event processing async_event_processing: true, event_queue_size: 10_000, event_processing_timeout_ms: 5_000, # Metric aggregation enable_metric_aggregation: true, aggregation_window_ms: 60_000, # 1 minute aggregation windows metric_retention_hours: 24, # Keep metrics for 24 hours # Performance monitoring enable_performance_monitoring: true, monitor_interval_ms: 5_000, # Monitor every 5 seconds alert_thresholds: [ response_time_ms: 1_000, # Alert if avg response > 1s error_rate: 0.05, # Alert if error rate > 5% memory_usage_mb: 500, # Alert if memory > 500MB throughput_rps: 100 # Alert if RPS < 100 ] ], # Load Balancing Optimization load_balancing: [ # Strategy configuration default_strategy: :least_loaded, strategy_config: [ round_robin: [ # Simple round-robin, lowest overhead enable_health_check: true ], least_loaded: [ # Best for uneven request processing times load_check_interval_ms: 1_000, load_calculation_method: :active_requests, enable_predictive_balancing: true ], correlation_hash: [ # Best for stateful correlations hash_function: :phash2, consistent_hashing: true, virtual_nodes: 3 ] ], # Failover settings enable_automatic_failover: true, failover_detection_timeout_ms: 5_000, max_failover_attempts: 3, circuit_breaker: [ enabled: true, failure_threshold: 10, recovery_timeout_ms: 30_000, half_open_max_calls: 5 ] ], # Instance Management instance_management: [ # Optimal instance configuration per network default_instances_per_network: 3, max_instances_per_network: 5, min_instances_per_network: 1, # Auto-scaling configuration enable_auto_scaling: true, scale_up_threshold_rps: 800, # Scale up if RPS > 800 scale_down_threshold_rps: 200, # Scale down if RPS < 200 scale_up_cooldown_ms: 300_000, # 5 minute cooldown scale_down_cooldown_ms: 600_000, # 10 minute cooldown # Instance health management health_check_enabled: true, health_check_interval_ms: 30_000, unhealthy_threshold: 3, recovery_threshold: 2, # Drain configuration for graceful shutdown drain_timeout_ms: 30_000, # 30 second drain timeout max_drain_attempts: 3, drain_check_interval_ms: 5_000 ], # Performance Testing Configuration performance_testing: [ # Default test parameters default_test_duration_ms: 60_000, # 1 minute tests default_concurrent_requests: 50, default_request_rate_rps: 500, # Benchmark thresholds min_throughput_rps: 400, # Minimum acceptable RPS max_response_time_ms: 500, # Maximum acceptable response time min_success_rate: 0.95, # 95% minimum success rate max_memory_usage_mb: 300, # Maximum memory during test # Test scenarios test_scenarios: [ light_load: [ concurrent_requests: 10, request_rate_rps: 100, duration_ms: 30_000 ], medium_load: [ concurrent_requests: 50, request_rate_rps: 500, duration_ms: 60_000 ], high_load: [ concurrent_requests: 100, request_rate_rps: 1_000, duration_ms: 120_000 ], burst_load: [ concurrent_requests: 200, request_rate_rps: 2_000, duration_ms: 30_000 ] ], # Stress test configuration enable_stress_testing: true, stress_test_multiplier: 2.0, # 2x normal load for stress tests stress_test_duration_ms: 300_000, # 5 minute stress tests # Performance reporting enable_detailed_reporting: true, report_format: :json, export_metrics: true, metrics_export_path: "/tmp/async_correlator_metrics" ], # Monitoring and Alerting monitoring: [ # Dashboard configuration dashboard_enabled: true, dashboard_update_interval_ms: 5_000, metrics_retention_hours: 24, # Alert configuration alerting_enabled: true, alert_channels: [:log, :email], # Add :slack, :pagerduty as needed alert_throttling_ms: 300_000, # 5 minute alert throttling # Health check configuration health_checks: [ system_memory: [ enabled: true, threshold_mb: 500, check_interval_ms: 60_000 ], response_time: [ enabled: true, threshold_ms: 1_000, check_interval_ms: 30_000 ], error_rate: [ enabled: true, threshold: 0.05, check_interval_ms: 30_000 ], throughput: [ enabled: true, min_threshold_rps: 100, check_interval_ms: 60_000 ] ], # Performance trending enable_trend_analysis: true, trend_analysis_window_hours: 4, trend_sensitivity: 0.1 # 10% change threshold ] ] # Environment-specific overrides case config_env() do :prod -> config :da_product_app, :async_correlator_performance, telemetry: [ event_sampling_rate: 0.01, # 1% sampling in production metrics_buffer_size: 10_000, # Larger buffer for production metric_retention_hours: 168 # 1 week retention in production ], performance_testing: [ enable_stress_testing: false # Disable stress testing in production ] :dev -> config :da_product_app, :async_correlator_performance, telemetry: [ event_sampling_rate: 1.0, # 100% sampling in development enable_performance_monitoring: true ], performance_testing: [ enable_stress_testing: true, # Enable all testing in development default_test_duration_ms: 10_000 # Shorter tests in development ] :test -> config :da_product_app, :async_correlator_performance, telemetry: [ event_sampling_rate: 1.0, # 100% sampling in test metrics_buffer_size: 100, # Small buffer for tests flush_interval_ms: 1_000 # Fast flush for tests ], connection_pool: [ default_pool_size: 3, # Smaller pools for tests max_pool_size: 5 ], instance_management: [ default_instances_per_network: 1, # Single instance for tests enable_auto_scaling: false # No auto-scaling in tests ] end # Additional performance tuning for BEAM VM config :kernel, # Increase ETS table limit ets_limit: 32_768 # Logger performance configuration config :logger, # Reduce logging overhead in production level: case config_env() do :prod -> :info :dev -> :debug :test -> :warn end, # Async logging for better performance backends: [{LoggerFileBackend, :performance_log}] config :logger, :performance_log, path: "/tmp/async_correlator_performance.log", level: :info, format: "$time $metadata[$level] $message\n", metadata: [:request_id, :correlation_id, :network, :instance]