Spaces:
Sleeping
Sleeping
| import subprocess | |
| import threading | |
| import os | |
| import time | |
| import spaces | |
| def setup_mixinputs(): | |
| # Step 1: Run mixinputs setup | |
| subprocess.run(["mixinputs", "setup"], check=True) | |
| def launch_vllm_server(beta=1.0): | |
| # Step 2: Set environment variables | |
| env = os.environ.copy() | |
| env["MIXINPUTS_BETA"] = str(beta) | |
| env["VLLM_USE_V1"] = "1" | |
| # Step 3: Launch vLLM with custom options | |
| cmd = [ | |
| "vllm", "serve", | |
| "Qwen/Qwen3-4B", | |
| "--tensor-parallel-size", "1", | |
| "--enforce-eager", | |
| "--max-model-len", "4096", | |
| "--max-seq-len-to-capture", "4096", | |
| "--max-num-seqs", "36", | |
| "--host", "0.0.0.0", | |
| "--port", "8000", | |
| "--enable-reasoning", | |
| "--reasoning-parser", "deepseek_r1", | |
| "--api-key", "EMPTY" | |
| ] | |
| _server_process = subprocess.Popen(cmd, env=env) | |
| # # Wait for the process to complete or be terminated | |
| # _server_process.wait() | |
| # Step 1: Setup | |
| setup_mixinputs() | |
| # Step 2: Launch vLLM server in background | |
| threading.Thread(target=launch_vllm_server, daemon=True).start() | |