I have a relatively simple FastAPI app that accepts a query and streams back the response from ChatGPT's API. ChatGPT is streaming back the result and I can see this being printed to console as it comes in.
What's not working is the StreamingResponse back via FastAPI. The response gets sent all together instead. I'm really at a loss as to why this isn't working.
Here is the FastAPI app code:
import os
import time
import openai
import fastapi
from fastapi import Depends, HTTPException, status, Request
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from fastapi.responses import StreamingResponse
auth_scheme = HTTPBearer()
app = fastapi.FastAPI()
openai.api_key = os.environ["OPENAI_API_KEY"]
def ask_statesman(query: str):
    #prompt = router(query)
    
    completion_reason = None
    response = ""
    while not completion_reason or completion_reason == "length":
        openai_stream = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": query}],
            temperature=0.0,
            stream=True,
        )
        for line in openai_stream:
            completion_reason = line["choices"][0]["finish_reason"]
            if "content" in line["choices"][0].delta:
                current_response = line["choices"][0].delta.content
                print(current_response)
                yield current_response
                time.sleep(0.25)
@app.post("/")
async def request_handler(auth_key: str, query: str):
    if auth_key != "123":
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="Invalid authentication credentials",
            headers={"WWW-Authenticate": auth_scheme.scheme_name},
        )
    else:
        stream_response = ask_statesman(query)
        return StreamingResponse(stream_response, media_type="text/plain")
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000, debug=True, log_level="debug")
And here is the very simple test.py file to test this:
import requests
query = "How tall is the Eiffel tower?"
url = "http://localhost:8000"
params = {"auth_key": "123", "query": query}
response = requests.post(url, params=params, stream=True)
for chunk in response.iter_lines():
    if chunk:
        print(chunk.decode("utf-8"))
 
     
     
     
    