I am creating simple proxy for streaming, It works but I need to put sleep, without sleep it only sending last message, I tried to sleep 0, and other approaches but not working.
import logging
import yaml
import httpx
import json
from fastapi import FastAPI, Request, Response, HTTPException
from fastapi.responses import StreamingResponse
import asyncio
import time
logger = logging.getLogger("proxy")
handler = logging.StreamHandler()
logger.addHandler(handler)
class Upstream:
def __init__(self, url, cls):
self.url = url
self.cls = cls
class Config:
def __init__(self, log_level, upstreams: list = []):
self.log_level = log_level
self.upstream = []
if log_level == "development":
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
for upstream in upstreams:
self.upstream.append(Upstream(upstream.get("url"), upstream.get("cls")))
@staticmethod
def load_config(config_file):
with open(config_file, "r") as f:
yml = yaml.safe_load(f)
return Config(yml.get("log_level"), yml.get("upstreams"))
def pick_random_upstream(self) -> Upstream:
return self.upstream[0]
cfg = Config.load_config("config.yaml")
app = FastAPI()
@app.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
async def proxy(request: Request, full_path: str):
body = await request.body()
try:
req_body = json.loads(body)
except json.JSONDecodeError:
raise HTTPException(status_code=400, detail="invalid request body")
model = req_body.get("model")
if not model:
raise HTTPException(status_code=400, detail="model is required")
logger.debug(f"model: {model} - full_path: {full_path} - body: {req_body}")
upstream = cfg.pick_random_upstream()
upstream_url = upstream.url
async with httpx.AsyncClient() as client:
# Use streaming=True to enable line-by-line reading
upstream_response = await client.request(
request.method,
f"{upstream_url}/{full_path}",
content=body,
headers=request.headers,
extensions={"trace_request": True, "trace_response": True}
)
# More direct streaming approach
async def direct_stream_generator():
try:
async for line in upstream_response.aiter_lines():
if line.strip():
yield f"data: {line}\n\n"
await asyncio.sleep(0.01)
except Exception as e:
print(f"Streaming error: {e}")
yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
finally:
# Explicitly close the response
await upstream_response.aclose()
# Check for streaming content type
content_type = upstream_response.headers.get("Content-Type", "")
if "text/event-stream" in content_type or "stream" in content_type.lower():
return StreamingResponse(
direct_stream_generator(),
media_type="text/event-stream",
headers=upstream_response.headers
)
else:
# Fallback to full response if not a stream
response_data = await upstream_response.aread()
return Response(
content=response_data,
status_code=upstream_response.status_code,
headers=upstream_response.headers
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
In above code I am basically calling the model with streaming method, receiving data and again sending back to client. if i remove sleep it only sends last message, but if i add small delay it works well. I think flushing issue.
I want to remove sleep, If anybody help, thanks.