@@ -85,6 +85,10 @@ class Settings(BaseSettings):
8585 port : int = Field (
8686 default = 8000 , description = "Listen port"
8787 )
88+ interrupt_requests : bool = Field (
89+ default = True ,
90+ description = "Whether to interrupt requests when a new request is received." ,
91+ )
8892
8993
9094router = APIRouter ()
@@ -379,7 +383,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
379383 await inner_send_chan .send (dict (data = json .dumps (chunk )))
380384 if await request .is_disconnected ():
381385 raise anyio .get_cancelled_exc_class ()()
382- if llama_outer_lock .locked ():
386+ if settings . interrupt_requests and llama_outer_lock .locked ():
383387 await inner_send_chan .send (dict (data = "[DONE]" ))
384388 raise anyio .get_cancelled_exc_class ()()
385389 await inner_send_chan .send (dict (data = "[DONE]" ))
@@ -486,6 +490,7 @@ async def create_chat_completion(
486490 request : Request ,
487491 body : CreateChatCompletionRequest ,
488492 llama : llama_cpp .Llama = Depends (get_llama ),
493+ settings : Settings = Depends (get_settings ),
489494) -> Union [llama_cpp .ChatCompletion , EventSourceResponse ]:
490495 exclude = {
491496 "n" ,
@@ -511,7 +516,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
511516 await inner_send_chan .send (dict (data = json .dumps (chat_chunk )))
512517 if await request .is_disconnected ():
513518 raise anyio .get_cancelled_exc_class ()()
514- if llama_outer_lock .locked ():
519+ if settings . interrupt_requests and llama_outer_lock .locked ():
515520 await inner_send_chan .send (dict (data = "[DONE]" ))
516521 raise anyio .get_cancelled_exc_class ()()
517522 await inner_send_chan .send (dict (data = "[DONE]" ))
0 commit comments