From a47740c093dcc5291fb7e43788a9432b20093834 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 9 Jan 2026 09:47:16 -1000 Subject: [PATCH] Restore BodyPartReader.decode() as sync method, add decode_async() for non-blocking decompression (#11940) --- CHANGES/11898.bugfix.rst | 6 ++++ aiohttp/multipart.py | 60 +++++++++++++++++++++++++++++------- aiohttp/web_request.py | 2 +- docs/multipart_reference.rst | 30 +++++++++++++++++- tests/test_multipart.py | 39 ++++++++++++++++++++++- 5 files changed, 123 insertions(+), 14 deletions(-) create mode 100644 CHANGES/11898.bugfix.rst diff --git a/CHANGES/11898.bugfix.rst b/CHANGES/11898.bugfix.rst new file mode 100644 index 00000000000..f430bcce997 --- /dev/null +++ b/CHANGES/11898.bugfix.rst @@ -0,0 +1,6 @@ +Restored :py:meth:`~aiohttp.BodyPartReader.decode` as a synchronous method +for backward compatibility. The method was inadvertently changed to async +in 3.13.3 as part of the decompression bomb security fix. A new +:py:meth:`~aiohttp.BodyPartReader.decode_async` method is now available +for non-blocking decompression of large payloads. Internal aiohttp code +uses the async variant to maintain security protections -- by :user:`bdraco`. diff --git a/aiohttp/multipart.py b/aiohttp/multipart.py index af935232772..21697b8c175 100644 --- a/aiohttp/multipart.py +++ b/aiohttp/multipart.py @@ -314,7 +314,7 @@ async def read(self, *, decode: bool = False) -> bytes: data.extend(await self.read_chunk(self.chunk_size)) # https://github.com/python/mypy/issues/17537 if decode: # type: ignore[unreachable] - return await self.decode(data) + return await self.decode_async(data) return data async def read_chunk(self, size: int = chunk_size) -> bytes: @@ -492,20 +492,58 @@ def at_eof(self) -> bool: """Returns True if the boundary was reached or False otherwise.""" return self._at_eof - async def decode(self, data: bytes) -> bytes: - """Decodes data. + def _apply_content_transfer_decoding(self, data: bytes) -> bytes: + """Apply Content-Transfer-Encoding decoding if header is present.""" + if CONTENT_TRANSFER_ENCODING in self.headers: + return self._decode_content_transfer(data) + return data + + def _needs_content_decoding(self) -> bool: + """Check if Content-Encoding decoding should be applied.""" + # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8 + return not self._is_form_data and CONTENT_ENCODING in self.headers + + def decode(self, data: bytes) -> bytes: + """Decodes data synchronously. - Decoding is done according the specified Content-Encoding + Decodes data according the specified Content-Encoding or Content-Transfer-Encoding headers value. + + Note: For large payloads, consider using decode_async() instead + to avoid blocking the event loop during decompression. """ - if CONTENT_TRANSFER_ENCODING in self.headers: - data = self._decode_content_transfer(data) - # https://datatracker.ietf.org/doc/html/rfc7578#section-4.8 - if not self._is_form_data and CONTENT_ENCODING in self.headers: - return await self._decode_content(data) + data = self._apply_content_transfer_decoding(data) + if self._needs_content_decoding(): + return self._decode_content(data) return data - async def _decode_content(self, data: bytes) -> bytes: + async def decode_async(self, data: bytes) -> bytes: + """Decodes data asynchronously. + + Decodes data according the specified Content-Encoding + or Content-Transfer-Encoding headers value. + + This method offloads decompression to an executor for large payloads + to avoid blocking the event loop. + """ + data = self._apply_content_transfer_decoding(data) + if self._needs_content_decoding(): + return await self._decode_content_async(data) + return data + + def _decode_content(self, data: bytes) -> bytes: + encoding = self.headers.get(CONTENT_ENCODING, "").lower() + if encoding == "identity": + return data + if encoding in {"deflate", "gzip"}: + return ZLibDecompressor( + encoding=encoding, + suppress_deflate_header=True, + ).decompress_sync(data, max_length=self._max_decompress_size) + + raise RuntimeError(f"unknown content encoding: {encoding}") + + async def _decode_content_async(self, data: bytes) -> bytes: encoding = self.headers.get(CONTENT_ENCODING, "").lower() if encoding == "identity": return data @@ -588,7 +626,7 @@ async def write(self, writer: AbstractStreamWriter) -> None: field = self._value chunk = await field.read_chunk(size=2**16) while chunk: - await writer.write(await field.decode(chunk)) + await writer.write(await field.decode_async(chunk)) chunk = await field.read_chunk(size=2**16) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 4a2e6f0bf8e..09126b944cf 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -716,7 +716,7 @@ async def post(self) -> "MultiDictProxy[str | bytes | FileField]": ) chunk = await field.read_chunk(size=2**16) while chunk: - chunk = await field.decode(chunk) + chunk = await field.decode_async(chunk) await self._loop.run_in_executor(None, tmp.write, chunk) size += len(chunk) if 0 < max_size < size: diff --git a/docs/multipart_reference.rst b/docs/multipart_reference.rst index e0f6e4a0162..2c13c8cfec9 100644 --- a/docs/multipart_reference.rst +++ b/docs/multipart_reference.rst @@ -102,7 +102,7 @@ Multipart reference .. method:: decode(data) - Decodes data according the specified ``Content-Encoding`` + Decodes data synchronously according the specified ``Content-Encoding`` or ``Content-Transfer-Encoding`` headers value. Supports ``gzip``, ``deflate`` and ``identity`` encodings for @@ -117,6 +117,34 @@ Multipart reference :rtype: bytes + .. note:: + + For large payloads, consider using :meth:`decode_async` instead + to avoid blocking the event loop during decompression. + + .. method:: decode_async(data) + :async: + + Decodes data asynchronously according the specified ``Content-Encoding`` + or ``Content-Transfer-Encoding`` headers value. + + This method offloads decompression to an executor for large payloads + to avoid blocking the event loop. + + Supports ``gzip``, ``deflate`` and ``identity`` encodings for + ``Content-Encoding`` header. + + Supports ``base64``, ``quoted-printable``, ``binary`` encodings for + ``Content-Transfer-Encoding`` header. + + :param bytearray data: Data to decode. + + :raises: :exc:`RuntimeError` - if encoding is unknown. + + :rtype: bytes + + .. versionadded:: 3.13.4 + .. method:: get_charset(default=None) Returns charset parameter from ``Content-Type`` header or default. diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 395f620aed2..25672c9005a 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -394,9 +394,46 @@ async def test_decode_with_content_transfer_encoding_base64(self) -> None: result = b"" while not obj.at_eof(): chunk = await obj.read_chunk(size=6) - result += await obj.decode(chunk) + result += obj.decode(chunk) assert b"Time to Relax!" == result + async def test_decode_async_with_content_transfer_encoding_base64(self) -> None: + h = CIMultiDictProxy(CIMultiDict({CONTENT_TRANSFER_ENCODING: "base64"})) + with Stream(b"VG\r\r\nltZSB0byBSZ\r\nWxheCE=\r\n--:--") as stream: + obj = aiohttp.BodyPartReader(BOUNDARY, h, stream) + result = b"" + while not obj.at_eof(): + chunk = await obj.read_chunk(size=6) + result += await obj.decode_async(chunk) + assert b"Time to Relax!" == result + + async def test_decode_with_content_encoding_deflate(self) -> None: + h = CIMultiDictProxy(CIMultiDict({CONTENT_ENCODING: "deflate"})) + data = b"\x0b\xc9\xccMU(\xc9W\x08J\xcdI\xacP\x04\x00" + with Stream(data + b"\r\n--:--") as stream: + obj = aiohttp.BodyPartReader(BOUNDARY, h, stream) + chunk = await obj.read_chunk(size=len(data)) + result = obj.decode(chunk) + assert b"Time to Relax!" == result + + async def test_decode_with_content_encoding_identity(self) -> None: + h = CIMultiDictProxy(CIMultiDict({CONTENT_ENCODING: "identity"})) + data = b"Time to Relax!" + with Stream(data + b"\r\n--:--") as stream: + obj = aiohttp.BodyPartReader(BOUNDARY, h, stream) + chunk = await obj.read_chunk(size=len(data)) + result = obj.decode(chunk) + assert data == result + + async def test_decode_with_content_encoding_unknown(self) -> None: + h = CIMultiDictProxy(CIMultiDict({CONTENT_ENCODING: "snappy"})) + data = b"Time to Relax!" + with Stream(data + b"\r\n--:--") as stream: + obj = aiohttp.BodyPartReader(BOUNDARY, h, stream) + chunk = await obj.read_chunk(size=len(data)) + with pytest.raises(RuntimeError, match="unknown content encoding"): + obj.decode(chunk) + async def test_read_with_content_transfer_encoding_quoted_printable(self) -> None: h = CIMultiDictProxy( CIMultiDict({CONTENT_TRANSFER_ENCODING: "quoted-printable"})