Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGES/11898.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Restored :py:meth:`~aiohttp.BodyPartReader.decode` as a synchronous method
for backward compatibility. The method was inadvertently changed to async
in 3.13.3 as part of the decompression bomb security fix. A new
:py:meth:`~aiohttp.BodyPartReader.decode_async` method is now available
for non-blocking decompression of large payloads. Internal aiohttp code
uses the async variant to maintain security protections -- by :user:`bdraco`.
60 changes: 49 additions & 11 deletions aiohttp/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ async def read(self, *, decode: bool = False) -> bytes:
data.extend(await self.read_chunk(self.chunk_size))
# https://github.com/python/mypy/issues/17537
if decode: # type: ignore[unreachable]
return await self.decode(data)
return await self.decode_async(data)
return data

async def read_chunk(self, size: int = chunk_size) -> bytes:
Expand Down Expand Up @@ -492,20 +492,58 @@ def at_eof(self) -> bool:
"""Returns True if the boundary was reached or False otherwise."""
return self._at_eof

async def decode(self, data: bytes) -> bytes:
"""Decodes data.
def _apply_content_transfer_decoding(self, data: bytes) -> bytes:
"""Apply Content-Transfer-Encoding decoding if header is present."""
if CONTENT_TRANSFER_ENCODING in self.headers:
return self._decode_content_transfer(data)
return data

def _needs_content_decoding(self) -> bool:
"""Check if Content-Encoding decoding should be applied."""
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
return not self._is_form_data and CONTENT_ENCODING in self.headers

def decode(self, data: bytes) -> bytes:
"""Decodes data synchronously.

Decoding is done according the specified Content-Encoding
Decodes data according the specified Content-Encoding
or Content-Transfer-Encoding headers value.

Note: For large payloads, consider using decode_async() instead
to avoid blocking the event loop during decompression.
"""
if CONTENT_TRANSFER_ENCODING in self.headers:
data = self._decode_content_transfer(data)
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
if not self._is_form_data and CONTENT_ENCODING in self.headers:
return await self._decode_content(data)
data = self._apply_content_transfer_decoding(data)
if self._needs_content_decoding():
return self._decode_content(data)
return data

async def _decode_content(self, data: bytes) -> bytes:
async def decode_async(self, data: bytes) -> bytes:
"""Decodes data asynchronously.

Decodes data according the specified Content-Encoding
or Content-Transfer-Encoding headers value.

This method offloads decompression to an executor for large payloads
to avoid blocking the event loop.
"""
data = self._apply_content_transfer_decoding(data)
if self._needs_content_decoding():
return await self._decode_content_async(data)
return data

def _decode_content(self, data: bytes) -> bytes:
encoding = self.headers.get(CONTENT_ENCODING, "").lower()
if encoding == "identity":
return data
if encoding in {"deflate", "gzip"}:
return ZLibDecompressor(
encoding=encoding,
suppress_deflate_header=True,
).decompress_sync(data, max_length=self._max_decompress_size)

raise RuntimeError(f"unknown content encoding: {encoding}")

async def _decode_content_async(self, data: bytes) -> bytes:
encoding = self.headers.get(CONTENT_ENCODING, "").lower()
if encoding == "identity":
return data
Expand Down Expand Up @@ -588,7 +626,7 @@ async def write(self, writer: AbstractStreamWriter) -> None:
field = self._value
chunk = await field.read_chunk(size=2**16)
while chunk:
await writer.write(await field.decode(chunk))
await writer.write(await field.decode_async(chunk))
chunk = await field.read_chunk(size=2**16)


Expand Down
2 changes: 1 addition & 1 deletion aiohttp/web_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ async def post(self) -> "MultiDictProxy[str | bytes | FileField]":
)
chunk = await field.read_chunk(size=2**16)
while chunk:
chunk = await field.decode(chunk)
chunk = await field.decode_async(chunk)
await self._loop.run_in_executor(None, tmp.write, chunk)
size += len(chunk)
if 0 < max_size < size:
Expand Down
30 changes: 29 additions & 1 deletion docs/multipart_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ Multipart reference

.. method:: decode(data)

Decodes data according the specified ``Content-Encoding``
Decodes data synchronously according the specified ``Content-Encoding``
or ``Content-Transfer-Encoding`` headers value.

Supports ``gzip``, ``deflate`` and ``identity`` encodings for
Expand All @@ -117,6 +117,34 @@ Multipart reference

:rtype: bytes

.. note::

For large payloads, consider using :meth:`decode_async` instead
to avoid blocking the event loop during decompression.

.. method:: decode_async(data)
:async:

Decodes data asynchronously according the specified ``Content-Encoding``
or ``Content-Transfer-Encoding`` headers value.

This method offloads decompression to an executor for large payloads
to avoid blocking the event loop.

Supports ``gzip``, ``deflate`` and ``identity`` encodings for
``Content-Encoding`` header.

Supports ``base64``, ``quoted-printable``, ``binary`` encodings for
``Content-Transfer-Encoding`` header.

:param bytearray data: Data to decode.

:raises: :exc:`RuntimeError` - if encoding is unknown.

:rtype: bytes

.. versionadded:: 3.13.4

.. method:: get_charset(default=None)

Returns charset parameter from ``Content-Type`` header or default.
Expand Down
39 changes: 38 additions & 1 deletion tests/test_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,9 +394,46 @@ async def test_decode_with_content_transfer_encoding_base64(self) -> None:
result = b""
while not obj.at_eof():
chunk = await obj.read_chunk(size=6)
result += await obj.decode(chunk)
result += obj.decode(chunk)
assert b"Time to Relax!" == result

async def test_decode_async_with_content_transfer_encoding_base64(self) -> None:
h = CIMultiDictProxy(CIMultiDict({CONTENT_TRANSFER_ENCODING: "base64"}))
with Stream(b"VG\r\r\nltZSB0byBSZ\r\nWxheCE=\r\n--:--") as stream:
obj = aiohttp.BodyPartReader(BOUNDARY, h, stream)
result = b""
while not obj.at_eof():
chunk = await obj.read_chunk(size=6)
result += await obj.decode_async(chunk)
assert b"Time to Relax!" == result

async def test_decode_with_content_encoding_deflate(self) -> None:
h = CIMultiDictProxy(CIMultiDict({CONTENT_ENCODING: "deflate"}))
data = b"\x0b\xc9\xccMU(\xc9W\x08J\xcdI\xacP\x04\x00"
with Stream(data + b"\r\n--:--") as stream:
obj = aiohttp.BodyPartReader(BOUNDARY, h, stream)
chunk = await obj.read_chunk(size=len(data))
result = obj.decode(chunk)
assert b"Time to Relax!" == result

async def test_decode_with_content_encoding_identity(self) -> None:
h = CIMultiDictProxy(CIMultiDict({CONTENT_ENCODING: "identity"}))
data = b"Time to Relax!"
with Stream(data + b"\r\n--:--") as stream:
obj = aiohttp.BodyPartReader(BOUNDARY, h, stream)
chunk = await obj.read_chunk(size=len(data))
result = obj.decode(chunk)
assert data == result

async def test_decode_with_content_encoding_unknown(self) -> None:
h = CIMultiDictProxy(CIMultiDict({CONTENT_ENCODING: "snappy"}))
data = b"Time to Relax!"
with Stream(data + b"\r\n--:--") as stream:
obj = aiohttp.BodyPartReader(BOUNDARY, h, stream)
chunk = await obj.read_chunk(size=len(data))
with pytest.raises(RuntimeError, match="unknown content encoding"):
obj.decode(chunk)

async def test_read_with_content_transfer_encoding_quoted_printable(self) -> None:
h = CIMultiDictProxy(
CIMultiDict({CONTENT_TRANSFER_ENCODING: "quoted-printable"})
Expand Down
Loading