# Inference server configuration HOST=0.0.0.0 PORT=8000 # VRAM management VRAM_BUDGET_MB=10240 STRATEGY=sequential # sequential | concurrent | auto # Model defaults YOLO_MODEL=yolov8n.pt YOLO_CONFIDENCE=0.3 # Device DEVICE=auto # auto | cpu | cuda | cuda:0