From 8f986fd5ef7919cb368245687a7069ad93b42d76 Mon Sep 17 00:00:00 2001 From: Charalampos Stratakis Date: Wed, 28 Feb 2024 15:41:29 +0100 Subject: [PATCH 1/8] Fix tests for XMLPullParser with Expat 2.6.0 See also: https://bugzilla.redhat.com/2264859 --- ...s-for-xmlpullparser-with-expat-2-6-0.patch | 107 ++++++++++++++++++ python3.6.spec | 12 +- 2 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 00422-gh-115133-fix-tests-for-xmlpullparser-with-expat-2-6-0.patch diff --git a/00422-gh-115133-fix-tests-for-xmlpullparser-with-expat-2-6-0.patch b/00422-gh-115133-fix-tests-for-xmlpullparser-with-expat-2-6-0.patch new file mode 100644 index 0000000..e053302 --- /dev/null +++ b/00422-gh-115133-fix-tests-for-xmlpullparser-with-expat-2-6-0.patch @@ -0,0 +1,107 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Sun, 11 Feb 2024 12:08:39 +0200 +Subject: [PATCH] 00422: gh-115133: Fix tests for XMLPullParser with Expat + 2.6.0 + +Feeding the parser by too small chunks defers parsing to prevent +CVE-2023-52425. Future versions of Expat may be more reactive. + +(cherry picked from commit 4a08e7b3431cd32a0daf22a33421cd3035343dc4) +--- + Lib/test/test_xml_etree.py | 58 ++++++++++++------- + ...-02-08-14-21-28.gh-issue-115133.ycl4ko.rst | 2 + + 2 files changed, 38 insertions(+), 22 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst + +diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py +index acaa519f42..2195eb9485 100644 +--- a/Lib/test/test_xml_etree.py ++++ b/Lib/test/test_xml_etree.py +@@ -10,6 +10,7 @@ import html + import io + import operator + import pickle ++import pyexpat + import sys + import types + import unittest +@@ -97,6 +98,10 @@ EXTERNAL_ENTITY_XML = """\ + &entity; + """ + ++fails_with_expat_2_6_0 = (unittest.expectedFailure ++ if pyexpat.version_info >= (2, 6, 0) else ++ lambda test: test) ++ + class ModuleTest(unittest.TestCase): + def test_sanity(self): + # Import sanity. +@@ -1044,28 +1049,37 @@ class XMLPullParserTest(unittest.TestCase): + self.assertEqual([(action, elem.tag) for action, elem in events], + expected) + +- def test_simple_xml(self): +- for chunk_size in (None, 1, 5): +- with self.subTest(chunk_size=chunk_size): +- parser = ET.XMLPullParser() +- self.assert_event_tags(parser, []) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, []) +- self._feed(parser, +- "\n text\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'element')]) +- self._feed(parser, "texttail\n", chunk_size) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [ +- ('end', 'element'), +- ('end', 'empty-element'), +- ]) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'root')]) +- self.assertIsNone(parser.close()) ++ def test_simple_xml(self, chunk_size=None): ++ parser = ET.XMLPullParser() ++ self.assert_event_tags(parser, []) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, []) ++ self._feed(parser, ++ "\n text\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'element')]) ++ self._feed(parser, "texttail\n", chunk_size) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [ ++ ('end', 'element'), ++ ('end', 'empty-element'), ++ ]) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'root')]) ++ self.assertIsNone(parser.close()) ++ ++ @fails_with_expat_2_6_0 ++ def test_simple_xml_chunk_1(self): ++ self.test_simple_xml(chunk_size=1) ++ ++ @fails_with_expat_2_6_0 ++ def test_simple_xml_chunk_5(self): ++ self.test_simple_xml(chunk_size=5) ++ ++ def test_simple_xml_chunk_22(self): ++ self.test_simple_xml(chunk_size=22) + + def test_feed_while_iterating(self): + parser = ET.XMLPullParser() +diff --git a/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst b/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst +new file mode 100644 +index 0000000000..6f1015235c +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst +@@ -0,0 +1,2 @@ ++Fix tests for :class:`~xml.etree.ElementTree.XMLPullParser` with Expat ++2.6.0. diff --git a/python3.6.spec b/python3.6.spec index 38e23b8..efbfe7d 100644 --- a/python3.6.spec +++ b/python3.6.spec @@ -17,7 +17,7 @@ URL: https://www.python.org/ #global prerel ... %global upstream_version %{general_version}%{?prerel} Version: %{general_version}%{?prerel:~%{prerel}} -Release: 22%{?dist} +Release: 23%{?dist} # Python is Python # pip MIT is and bundles: # appdirs: MIT @@ -655,6 +655,13 @@ Patch410: 00410-bpo-42598-fix-implicit-function-declarations-in-configure.patch # Thomas Dwyer. Patch415: 00415-cve-2023-27043-gh-102988-reject-malformed-addresses-in-email-parseaddr-111116.patch +# 00422 # fefea32e0c70109a5c88e3d22ec9ff554fcbc6ab +# gh-115133: Fix tests for XMLPullParser with Expat 2.6.0 +# +# Feeding the parser by too small chunks defers parsing to prevent +# CVE-2023-52425. Future versions of Expat may be more reactive. +Patch422: 00422-gh-115133-fix-tests-for-xmlpullparser-with-expat-2-6-0.patch + # (New patches go here ^^^) # # When adding new patches to "python" and "python3" in Fedora, EL, etc., @@ -1912,6 +1919,9 @@ CheckPython optimized # ====================================================== %changelog +* Wed Feb 28 2024 Charalampos Stratakis - 3.6.15-23 +- Fix tests for XMLPullParser with Expat 2.6.0 + * Mon Dec 18 2023 Lumír Balhar - 3.6.15-22 - Security fix for CVE-2023-27043 (rhbz#2196191) From 5d9b6ada5ce73abb0375623093180bebbf866d20 Mon Sep 17 00:00:00 2001 From: Charalampos Stratakis Date: Thu, 29 Feb 2024 02:20:46 +0100 Subject: [PATCH 2/8] Security fix for CVE-2007-4559 Fixes: rhzb#2141080 --- ...59-filter-api-for-tarfile-extractall.patch | 2465 +++++++++++++++++ pip-CVE-2007-4559.patch | 42 + python3.6.spec | 29 +- 3 files changed, 2535 insertions(+), 1 deletion(-) create mode 100644 00397-pep-706-cve-2007-4559-filter-api-for-tarfile-extractall.patch create mode 100644 pip-CVE-2007-4559.patch diff --git a/00397-pep-706-cve-2007-4559-filter-api-for-tarfile-extractall.patch b/00397-pep-706-cve-2007-4559-filter-api-for-tarfile-extractall.patch new file mode 100644 index 0000000..b158945 --- /dev/null +++ b/00397-pep-706-cve-2007-4559-filter-api-for-tarfile-extractall.patch @@ -0,0 +1,2465 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Petr Viktorin +Date: Wed, 7 Jun 2023 14:53:48 +0200 +Subject: [PATCH] 00397: PEP 706, CVE-2007-4559: Filter API for + tarfile.extractall + +Add API for allowing checks on the content of tar files, allowing callers to mitigate +directory traversal (CVE-2007-4559) and related issues. + +Python 3.12 will warn if this API is not used. +Python 3.14 will fail if it's not used. + +Backport from https://github.com/python/cpython/issues/102950 + +Change document: https://peps.python.org/pep-0706/ +--- + Doc/library/shutil.rst | 31 +- + Doc/library/tarfile.rst | 447 ++++++++- + Doc/whatsnew/3.6.rst | 17 +- + Lib/shutil.py | 18 +- + Lib/tarfile.py | 359 ++++++- + Lib/test/support/__init__.py | 24 + + Lib/test/test_shutil.py | 40 +- + Lib/test/test_tarfile.py | 926 +++++++++++++++++- + ...-03-23-15-24-38.gh-issue-102953.YR4KaK.rst | 4 + + 9 files changed, 1789 insertions(+), 77 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst + +diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst +index c3f7bd6640..29ef5259cb 100644 +--- a/Doc/library/shutil.rst ++++ b/Doc/library/shutil.rst +@@ -537,7 +537,7 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. + Remove the archive format *name* from the list of supported formats. + + +-.. function:: unpack_archive(filename[, extract_dir[, format]]) ++.. function:: unpack_archive(filename[, extract_dir[, format[, filter]]]) + + Unpack an archive. *filename* is the full path of the archive. + +@@ -551,6 +551,24 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. + registered for that extension. In case none is found, + a :exc:`ValueError` is raised. + ++ The keyword-only *filter* argument, which was added in Python 3.6.16, ++ is passed to the underlying unpacking function. ++ For zip files, *filter* is not accepted. ++ For tar files, it is recommended to set it to ``'data'``, ++ unless using features specific to tar and UNIX-like filesystems. ++ (See :ref:`tarfile-extraction-filter` for details.) ++ The ``'data'`` filter will become the default for tar files ++ in Python 3.14. ++ ++ .. warning:: ++ ++ Never extract archives from untrusted sources without prior inspection. ++ It is possible that files are created outside of the path specified in ++ the *extract_dir* argument, e.g. members that have absolute filenames ++ starting with "/" or filenames with two dots "..". ++ ++ .. versionchanged:: 3.6.16 ++ Added the *filter* argument. + + .. function:: register_unpack_format(name, extensions, function[, extra_args[, description]]) + +@@ -559,11 +577,14 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. + ``.zip`` for Zip files. + + *function* is the callable that will be used to unpack archives. The +- callable will receive the path of the archive, followed by the directory +- the archive must be extracted to. ++ callable will receive: + +- When provided, *extra_args* is a sequence of ``(name, value)`` tuples that +- will be passed as keywords arguments to the callable. ++ - the path of the archive, as a positional argument; ++ - the directory the archive must be extracted to, as a positional argument; ++ - possibly a *filter* keyword argument, if it was given to ++ :func:`unpack_archive`; ++ - additional keyword arguments, specified by *extra_args* as a sequence ++ of ``(name, value)`` tuples. + + *description* can be provided to describe the format, and will be returned + by the :func:`get_unpack_formats` function. +diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst +index 337c061107..f6dc9a7038 100644 +--- a/Doc/library/tarfile.rst ++++ b/Doc/library/tarfile.rst +@@ -199,6 +199,38 @@ The :mod:`tarfile` module defines the following exceptions: + Is raised by :meth:`TarInfo.frombuf` if the buffer it gets is invalid. + + ++.. exception:: FilterError ++ ++ Base class for members :ref:`refused ` by ++ filters. ++ ++ .. attribute:: tarinfo ++ ++ Information about the member that the filter refused to extract, ++ as :ref:`TarInfo `. ++ ++.. exception:: AbsolutePathError ++ ++ Raised to refuse extracting a member with an absolute path. ++ ++.. exception:: OutsideDestinationError ++ ++ Raised to refuse extracting a member outside the destination directory. ++ ++.. exception:: SpecialFileError ++ ++ Raised to refuse extracting a special file (e.g. a device or pipe). ++ ++.. exception:: AbsoluteLinkError ++ ++ Raised to refuse extracting a symbolic link with an absolute path. ++ ++.. exception:: LinkOutsideDestinationError ++ ++ Raised to refuse extracting a symbolic link pointing outside the destination ++ directory. ++ ++ + The following constants are available at the module level: + + .. data:: ENCODING +@@ -304,11 +336,8 @@ be finalized; only the internally used file object will be closed. See the + *debug* can be set from ``0`` (no debug messages) up to ``3`` (all debug + messages). The messages are written to ``sys.stderr``. + +- If *errorlevel* is ``0``, all errors are ignored when using :meth:`TarFile.extract`. +- Nevertheless, they appear as error messages in the debug output, when debugging +- is enabled. If ``1``, all *fatal* errors are raised as :exc:`OSError` +- exceptions. If ``2``, all *non-fatal* errors are raised as :exc:`TarError` +- exceptions as well. ++ *errorlevel* controls how extraction errors are handled, ++ see :attr:`the corresponding attribute <~TarFile.errorlevel>`. + + The *encoding* and *errors* arguments define the character encoding to be + used for reading or writing the archive and how conversion errors are going +@@ -375,7 +404,7 @@ be finalized; only the internally used file object will be closed. See the + available. + + +-.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False) ++.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False, filter=None) + + Extract all members from the archive to the current working directory or + directory *path*. If optional *members* is given, it must be a subset of the +@@ -389,6 +418,12 @@ be finalized; only the internally used file object will be closed. See the + are used to set the owner/group for the extracted files. Otherwise, the named + values from the tarfile are used. + ++ The *filter* argument, which was added in Python 3.6.16, specifies how ++ ``members`` are modified or rejected before extraction. ++ See :ref:`tarfile-extraction-filter` for details. ++ It is recommended to set this explicitly depending on which *tar* features ++ you need to support. ++ + .. warning:: + + Never extract archives from untrusted sources without prior inspection. +@@ -396,14 +431,20 @@ be finalized; only the internally used file object will be closed. See the + that have absolute filenames starting with ``"/"`` or filenames with two + dots ``".."``. + ++ Set ``filter='data'`` to prevent the most dangerous security issues, ++ and read the :ref:`tarfile-extraction-filter` section for details. ++ + .. versionchanged:: 3.5 + Added the *numeric_owner* parameter. + + .. versionchanged:: 3.6 + The *path* parameter accepts a :term:`path-like object`. + ++ .. versionchanged:: 3.6.16 ++ Added the *filter* parameter. + +-.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False) ++ ++.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False, filter=None) + + Extract a member from the archive to the current working directory, using its + full name. Its file information is extracted as accurately as possible. *member* +@@ -411,9 +452,8 @@ be finalized; only the internally used file object will be closed. See the + directory using *path*. *path* may be a :term:`path-like object`. + File attributes (owner, mtime, mode) are set unless *set_attrs* is false. + +- If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile +- are used to set the owner/group for the extracted files. Otherwise, the named +- values from the tarfile are used. ++ The *numeric_owner* and *filter* arguments are the same as ++ for :meth:`extractall`. + + .. note:: + +@@ -424,6 +464,9 @@ be finalized; only the internally used file object will be closed. See the + + See the warning for :meth:`extractall`. + ++ Set ``filter='data'`` to prevent the most dangerous security issues, ++ and read the :ref:`tarfile-extraction-filter` section for details. ++ + .. versionchanged:: 3.2 + Added the *set_attrs* parameter. + +@@ -433,6 +476,9 @@ be finalized; only the internally used file object will be closed. See the + .. versionchanged:: 3.6 + The *path* parameter accepts a :term:`path-like object`. + ++ .. versionchanged:: 3.6.16 ++ Added the *filter* parameter. ++ + + .. method:: TarFile.extractfile(member) + +@@ -444,6 +490,56 @@ be finalized; only the internally used file object will be closed. See the + .. versionchanged:: 3.3 + Return an :class:`io.BufferedReader` object. + ++.. attribute:: TarFile.errorlevel ++ ++ If *errorlevel* is ``0``, errors are ignored when using :meth:`TarFile.extract` ++ and :meth:`TarFile.extractall`. ++ Nevertheless, they appear as error messages in the debug output when ++ *debug* is greater than 0. ++ If ``1`` (the default), all *fatal* errors are raised as :exc:`OSError` or ++ :exc:`FilterError` exceptions. If ``2``, all *non-fatal* errors are raised ++ as :exc:`TarError` exceptions as well. ++ ++ Some exceptions, e.g. ones caused by wrong argument types or data ++ corruption, are always raised. ++ ++ Custom :ref:`extraction filters ` ++ should raise :exc:`FilterError` for *fatal* errors ++ and :exc:`ExtractError` for *non-fatal* ones. ++ ++ Note that when an exception is raised, the archive may be partially ++ extracted. It is the user’s responsibility to clean up. ++ ++.. attribute:: TarFile.extraction_filter ++ ++ .. versionadded:: 3.6.16 ++ ++ The :ref:`extraction filter ` used ++ as a default for the *filter* argument of :meth:`~TarFile.extract` ++ and :meth:`~TarFile.extractall`. ++ ++ The attribute may be ``None`` or a callable. ++ String names are not allowed for this attribute, unlike the *filter* ++ argument to :meth:`~TarFile.extract`. ++ ++ If ``extraction_filter`` is ``None`` (the default), ++ calling an extraction method without a *filter* argument will ++ use the :func:`fully_trusted ` filter for ++ compatibility with previous Python versions. ++ ++ In Python 3.12+, leaving ``extraction_filter=None`` will emit a ++ ``DeprecationWarning``. ++ ++ In Python 3.14+, leaving ``extraction_filter=None`` will cause ++ extraction methods to use the :func:`data ` filter by default. ++ ++ The attribute may be set on instances or overridden in subclasses. ++ It also is possible to set it on the ``TarFile`` class itself to set a ++ global default, although, since it affects all uses of *tarfile*, ++ it is best practice to only do so in top-level applications or ++ :mod:`site configuration `. ++ To set a global default this way, a filter function needs to be wrapped in ++ :func:`staticmethod()` to prevent injection of a ``self`` argument. + + .. method:: TarFile.add(name, arcname=None, recursive=True, exclude=None, *, filter=None) + +@@ -522,7 +618,27 @@ permissions, owner etc.), it provides some useful methods to determine its type. + It does *not* contain the file's data itself. + + :class:`TarInfo` objects are returned by :class:`TarFile`'s methods +-:meth:`getmember`, :meth:`getmembers` and :meth:`gettarinfo`. ++:meth:`~TarFile.getmember`, :meth:`~TarFile.getmembers` and ++:meth:`~TarFile.gettarinfo`. ++ ++Modifying the objects returned by :meth:`~!TarFile.getmember` or ++:meth:`~!TarFile.getmembers` will affect all subsequent ++operations on the archive. ++For cases where this is unwanted, you can use :mod:`copy.copy() ` or ++call the :meth:`~TarInfo.replace` method to create a modified copy in one step. ++ ++Several attributes can be set to ``None`` to indicate that a piece of metadata ++is unused or unknown. ++Different :class:`TarInfo` methods handle ``None`` differently: ++ ++- The :meth:`~TarFile.extract` or :meth:`~TarFile.extractall` methods will ++ ignore the corresponding metadata, leaving it set to a default. ++- :meth:`~TarFile.addfile` will fail. ++- :meth:`~TarFile.list` will print a placeholder string. ++ ++ ++.. versionchanged:: 3.6.16 ++ Added :meth:`~TarInfo.replace` and handling of ``None``. + + + .. class:: TarInfo(name="") +@@ -567,13 +683,24 @@ A ``TarInfo`` object has the following public data attributes: + + .. attribute:: TarInfo.mtime + +- Time of last modification. ++ Time of last modification in seconds since the :ref:`epoch `, ++ as in :attr:`os.stat_result.st_mtime`. + ++ .. versionchanged:: 3.6.16 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.mode + +- Permission bits. ++ Permission bits, as for :func:`os.chmod`. + ++ .. versionchanged:: 3.6.16 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.type + +@@ -594,26 +721,61 @@ A ``TarInfo`` object has the following public data attributes: + + User ID of the user who originally stored this member. + ++ .. versionchanged:: 3.6.16 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.gid + + Group ID of the user who originally stored this member. + ++ .. versionchanged:: 3.6.16 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.uname + + User name. + ++ .. versionchanged:: 3.6.16 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.gname + + Group name. + ++ .. versionchanged:: 3.6.16 ++ ++ Can be set to ``None`` for :meth:`~TarFile.extract` and ++ :meth:`~TarFile.extractall`, causing extraction to skip applying this ++ attribute. + + .. attribute:: TarInfo.pax_headers + + A dictionary containing key-value pairs of an associated pax extended header. + ++.. method:: TarInfo.replace(name=..., mtime=..., mode=..., linkname=..., ++ uid=..., gid=..., uname=..., gname=..., ++ deep=True) ++ ++ .. versionadded:: 3.6.16 ++ ++ Return a *new* copy of the :class:`!TarInfo` object with the given attributes ++ changed. For example, to return a ``TarInfo`` with the group name set to ++ ``'staff'``, use:: ++ ++ new_tarinfo = old_tarinfo.replace(gname='staff') ++ ++ By default, a deep copy is made. ++ If *deep* is false, the copy is shallow, i.e. ``pax_headers`` ++ and any custom attributes are shared with the original ``TarInfo`` object. + + A :class:`TarInfo` object also provides some convenient query methods: + +@@ -663,9 +825,259 @@ A :class:`TarInfo` object also provides some convenient query methods: + Return :const:`True` if it is one of character device, block device or FIFO. + + ++.. _tarfile-extraction-filter: ++ ++Extraction filters ++------------------ ++ ++.. versionadded:: 3.6.16 ++ ++The *tar* format is designed to capture all details of a UNIX-like filesystem, ++which makes it very powerful. ++Unfortunately, the features make it easy to create tar files that have ++unintended -- and possibly malicious -- effects when extracted. ++For example, extracting a tar file can overwrite arbitrary files in various ++ways (e.g. by using absolute paths, ``..`` path components, or symlinks that ++affect later members). ++ ++In most cases, the full functionality is not needed. ++Therefore, *tarfile* supports extraction filters: a mechanism to limit ++functionality, and thus mitigate some of the security issues. ++ ++.. seealso:: ++ ++ :pep:`706` ++ Contains further motivation and rationale behind the design. ++ ++The *filter* argument to :meth:`TarFile.extract` or :meth:`~TarFile.extractall` ++can be: ++ ++* the string ``'fully_trusted'``: Honor all metadata as specified in the ++ archive. ++ Should be used if the user trusts the archive completely, or implements ++ their own complex verification. ++ ++* the string ``'tar'``: Honor most *tar*-specific features (i.e. features of ++ UNIX-like filesystems), but block features that are very likely to be ++ surprising or malicious. See :func:`tar_filter` for details. ++ ++* the string ``'data'``: Ignore or block most features specific to UNIX-like ++ filesystems. Intended for extracting cross-platform data archives. ++ See :func:`data_filter` for details. ++ ++* ``None`` (default): Use :attr:`TarFile.extraction_filter`. ++ ++ If that is also ``None`` (the default), the ``'fully_trusted'`` ++ filter will be used (for compatibility with earlier versions of Python). ++ ++ In Python 3.12, the default will emit a ``DeprecationWarning``. ++ ++ In Python 3.14, the ``'data'`` filter will become the default instead. ++ It's possible to switch earlier; see :attr:`TarFile.extraction_filter`. ++ ++* A callable which will be called for each extracted member with a ++ :ref:`TarInfo ` describing the member and the destination ++ path to where the archive is extracted (i.e. the same path is used for all ++ members):: ++ ++ filter(/, member: TarInfo, path: str) -> TarInfo | None ++ ++ The callable is called just before each member is extracted, so it can ++ take the current state of the disk into account. ++ It can: ++ ++ - return a :class:`TarInfo` object which will be used instead of the metadata ++ in the archive, or ++ - return ``None``, in which case the member will be skipped, or ++ - raise an exception to abort the operation or skip the member, ++ depending on :attr:`~TarFile.errorlevel`. ++ Note that when extraction is aborted, :meth:`~TarFile.extractall` may leave ++ the archive partially extracted. It does not attempt to clean up. ++ ++Default named filters ++~~~~~~~~~~~~~~~~~~~~~ ++ ++The pre-defined, named filters are available as functions, so they can be ++reused in custom filters: ++ ++.. function:: fully_trusted_filter(/, member, path) ++ ++ Return *member* unchanged. ++ ++ This implements the ``'fully_trusted'`` filter. ++ ++.. function:: tar_filter(/, member, path) ++ ++ Implements the ``'tar'`` filter. ++ ++ - Strip leading slashes (``/`` and :attr:`os.sep`) from filenames. ++ - :ref:`Refuse ` to extract files with absolute ++ paths (in case the name is absolute ++ even after stripping slashes, e.g. ``C:/foo`` on Windows). ++ This raises :class:`~tarfile.AbsolutePathError`. ++ - :ref:`Refuse ` to extract files whose absolute ++ path (after following symlinks) would end up outside the destination. ++ This raises :class:`~tarfile.OutsideDestinationError`. ++ - Clear high mode bits (setuid, setgid, sticky) and group/other write bits ++ (:attr:`~stat.S_IWGRP`|:attr:`~stat.S_IWOTH`). ++ ++ Return the modified ``TarInfo`` member. ++ ++.. function:: data_filter(/, member, path) ++ ++ Implements the ``'data'`` filter. ++ In addition to what ``tar_filter`` does: ++ ++ - :ref:`Refuse ` to extract links (hard or soft) ++ that link to absolute paths, or ones that link outside the destination. ++ ++ This raises :class:`~tarfile.AbsoluteLinkError` or ++ :class:`~tarfile.LinkOutsideDestinationError`. ++ ++ Note that such files are refused even on platforms that do not support ++ symbolic links. ++ ++ - :ref:`Refuse ` to extract device files ++ (including pipes). ++ This raises :class:`~tarfile.SpecialFileError`. ++ ++ - For regular files, including hard links: ++ ++ - Set the owner read and write permissions ++ (:attr:`~stat.S_IRUSR`|:attr:`~stat.S_IWUSR`). ++ - Remove the group & other executable permission ++ (:attr:`~stat.S_IXGRP`|:attr:`~stat.S_IXOTH`) ++ if the owner doesn’t have it (:attr:`~stat.S_IXUSR`). ++ ++ - For other files (directories), set ``mode`` to ``None``, so ++ that extraction methods skip applying permission bits. ++ - Set user and group info (``uid``, ``gid``, ``uname``, ``gname``) ++ to ``None``, so that extraction methods skip setting it. ++ ++ Return the modified ``TarInfo`` member. ++ ++ ++.. _tarfile-extraction-refuse: ++ ++Filter errors ++~~~~~~~~~~~~~ ++ ++When a filter refuses to extract a file, it will raise an appropriate exception, ++a subclass of :class:`~tarfile.FilterError`. ++This will abort the extraction if :attr:`TarFile.errorlevel` is 1 or more. ++With ``errorlevel=0`` the error will be logged and the member will be skipped, ++but extraction will continue. ++ ++ ++Hints for further verification ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++Even with ``filter='data'``, *tarfile* is not suited for extracting untrusted ++files without prior inspection. ++Among other issues, the pre-defined filters do not prevent denial-of-service ++attacks. Users should do additional checks. ++ ++Here is an incomplete list of things to consider: ++ ++* Extract to a :func:`new temporary directory ` ++ to prevent e.g. exploiting pre-existing links, and to make it easier to ++ clean up after a failed extraction. ++* When working with untrusted data, use external (e.g. OS-level) limits on ++ disk, memory and CPU usage. ++* Check filenames against an allow-list of characters ++ (to filter out control characters, confusables, foreign path separators, ++ etc.). ++* Check that filenames have expected extensions (discouraging files that ++ execute when you “click on them”, or extension-less files like Windows special device names). ++* Limit the number of extracted files, total size of extracted data, ++ filename length (including symlink length), and size of individual files. ++* Check for files that would be shadowed on case-insensitive filesystems. ++ ++Also note that: ++ ++* Tar files may contain multiple versions of the same file. ++ Later ones are expected to overwrite any earlier ones. ++ This feature is crucial to allow updating tape archives, but can be abused ++ maliciously. ++* *tarfile* does not protect against issues with “live” data, ++ e.g. an attacker tinkering with the destination (or source) directory while ++ extraction (or archiving) is in progress. ++ ++ ++Supporting older Python versions ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++Extraction filters were added to Python 3.12, and are backported to older ++versions as security updates. ++To check whether the feature is available, use e.g. ++``hasattr(tarfile, 'data_filter')`` rather than checking the Python version. ++ ++The following examples show how to support Python versions with and without ++the feature. ++Note that setting ``extraction_filter`` will affect any subsequent operations. ++ ++* Fully trusted archive:: ++ ++ my_tarfile.extraction_filter = (lambda member, path: member) ++ my_tarfile.extractall() ++ ++* Use the ``'data'`` filter if available, but revert to Python 3.11 behavior ++ (``'fully_trusted'``) if this feature is not available:: ++ ++ my_tarfile.extraction_filter = getattr(tarfile, 'data_filter', ++ (lambda member, path: member)) ++ my_tarfile.extractall() ++ ++* Use the ``'data'`` filter; *fail* if it is not available:: ++ ++ my_tarfile.extractall(filter=tarfile.data_filter) ++ ++ or:: ++ ++ my_tarfile.extraction_filter = tarfile.data_filter ++ my_tarfile.extractall() ++ ++* Use the ``'data'`` filter; *warn* if it is not available:: ++ ++ if hasattr(tarfile, 'data_filter'): ++ my_tarfile.extractall(filter='data') ++ else: ++ # remove this when no longer needed ++ warn_the_user('Extracting may be unsafe; consider updating Python') ++ my_tarfile.extractall() ++ ++ ++Stateful extraction filter example ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++While *tarfile*'s extraction methods take a simple *filter* callable, ++custom filters may be more complex objects with an internal state. ++It may be useful to write these as context managers, to be used like this:: ++ ++ with StatefulFilter() as filter_func: ++ tar.extractall(path, filter=filter_func) ++ ++Such a filter can be written as, for example:: ++ ++ class StatefulFilter: ++ def __init__(self): ++ self.file_count = 0 ++ ++ def __enter__(self): ++ return self ++ ++ def __call__(self, member, path): ++ self.file_count += 1 ++ return member ++ ++ def __exit__(self, *exc_info): ++ print(f'{self.file_count} files extracted') ++ ++ + .. _tarfile-commandline: + .. program:: tarfile + ++ + Command-Line Interface + ---------------------- + +@@ -735,6 +1147,15 @@ Command-line options + + Verbose output. + ++.. cmdoption:: --filter ++ ++ Specifies the *filter* for ``--extract``. ++ See :ref:`tarfile-extraction-filter` for details. ++ Only string names are accepted (that is, ``fully_trusted``, ``tar``, ++ and ``data``). ++ ++ .. versionadded:: 3.6.16 ++ + .. _tar-examples: + + Examples +diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst +index 19811f4a81..7b018dfed7 100644 +--- a/Doc/whatsnew/3.6.rst ++++ b/Doc/whatsnew/3.6.rst +@@ -2489,7 +2489,6 @@ URL by the parser :func:`urllib.parse` preventing such attacks. The removal + characters are controlled by a new module level variable + ``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`) + +- + Notable security feature in 3.6.15-13 + ===================================== + +@@ -2503,3 +2502,19 @@ This limit can be configured or disabled by environment variable, command + line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion + length limitation ` documentation. The default limit + is 4300 digits in string form. ++ ++Notable Changes in a backport ++============================= ++ ++tarfile ++------- ++ ++* The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`, ++ have a new a *filter* argument that allows limiting tar features than may be ++ surprising or dangerous, such as creating files outside the destination ++ directory. ++ See :ref:`tarfile-extraction-filter` for details. ++ In Python 3.12, use without the *filter* argument will show a ++ :exc:`DeprecationWarning`. ++ In Python 3.14, the default will switch to ``'data'``. ++ (Contributed by Petr Viktorin in :pep:`706`.) +diff --git a/Lib/shutil.py b/Lib/shutil.py +index dd12448454..d687de1bbf 100644 +--- a/Lib/shutil.py ++++ b/Lib/shutil.py +@@ -908,7 +908,7 @@ def _unpack_zipfile(filename, extract_dir): + finally: + zip.close() + +-def _unpack_tarfile(filename, extract_dir): ++def _unpack_tarfile(filename, extract_dir, *, filter=None): + """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir` + """ + import tarfile # late import for breaking circular dependency +@@ -918,7 +918,7 @@ def _unpack_tarfile(filename, extract_dir): + raise ReadError( + "%s is not a compressed or uncompressed tar file" % filename) + try: +- tarobj.extractall(extract_dir) ++ tarobj.extractall(extract_dir, filter=filter) + finally: + tarobj.close() + +@@ -946,7 +946,7 @@ def _find_unpack_format(filename): + return name + return None + +-def unpack_archive(filename, extract_dir=None, format=None): ++def unpack_archive(filename, extract_dir=None, format=None, *, filter=None): + """Unpack an archive. + + `filename` is the name of the archive. +@@ -960,10 +960,19 @@ def unpack_archive(filename, extract_dir=None, format=None): + was registered for that extension. + + In case none is found, a ValueError is raised. ++ ++ If `filter` is given, it is passed to the underlying ++ extraction function. + """ + if extract_dir is None: + extract_dir = os.getcwd() + ++ ++ if filter is None: ++ filter_kwargs = {} ++ else: ++ filter_kwargs = {'filter': filter} ++ + if format is not None: + try: + format_info = _UNPACK_FORMATS[format] +@@ -971,7 +980,7 @@ def unpack_archive(filename, extract_dir=None, format=None): + raise ValueError("Unknown unpack format '{0}'".format(format)) + + func = format_info[1] +- func(filename, extract_dir, **dict(format_info[2])) ++ func(filename, extract_dir, **dict(format_info[2]), **filter_kwargs) + else: + # we need to look at the registered unpackers supported extensions + format = _find_unpack_format(filename) +@@ -980,6 +989,7 @@ def unpack_archive(filename, extract_dir=None, format=None): + + func = _UNPACK_FORMATS[format][1] + kwargs = dict(_UNPACK_FORMATS[format][2]) ++ kwargs.update(filter_kwargs) + func(filename, extract_dir, **kwargs) + + +diff --git a/Lib/tarfile.py b/Lib/tarfile.py +index 2ea47978ff..c18590325a 100755 +--- a/Lib/tarfile.py ++++ b/Lib/tarfile.py +@@ -48,6 +48,7 @@ import time + import struct + import copy + import re ++import warnings + + try: + import pwd +@@ -73,6 +74,7 @@ __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", + "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", + "DEFAULT_FORMAT", "open"] + ++ + #--------------------------------------------------------- + # tar constants + #--------------------------------------------------------- +@@ -160,6 +162,8 @@ else: + def stn(s, length, encoding, errors): + """Convert a string to a null-terminated bytes object. + """ ++ if s is None: ++ raise ValueError("metadata cannot contain None") + s = s.encode(encoding, errors) + return s[:length] + (length - len(s)) * NUL + +@@ -721,9 +725,127 @@ class ExFileObject(io.BufferedReader): + super().__init__(fileobj) + #class ExFileObject + ++ ++#----------------------------- ++# extraction filters (PEP 706) ++#----------------------------- ++ ++class FilterError(TarError): ++ pass ++ ++class AbsolutePathError(FilterError): ++ def __init__(self, tarinfo): ++ self.tarinfo = tarinfo ++ super().__init__(f'member {tarinfo.name!r} has an absolute path') ++ ++class OutsideDestinationError(FilterError): ++ def __init__(self, tarinfo, path): ++ self.tarinfo = tarinfo ++ self._path = path ++ super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, ' ++ + 'which is outside the destination') ++ ++class SpecialFileError(FilterError): ++ def __init__(self, tarinfo): ++ self.tarinfo = tarinfo ++ super().__init__(f'{tarinfo.name!r} is a special file') ++ ++class AbsoluteLinkError(FilterError): ++ def __init__(self, tarinfo): ++ self.tarinfo = tarinfo ++ super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path') ++ ++class LinkOutsideDestinationError(FilterError): ++ def __init__(self, tarinfo, path): ++ self.tarinfo = tarinfo ++ self._path = path ++ super().__init__(f'{tarinfo.name!r} would link to {path!r}, ' ++ + 'which is outside the destination') ++ ++def _get_filtered_attrs(member, dest_path, for_data=True): ++ new_attrs = {} ++ name = member.name ++ dest_path = os.path.realpath(dest_path) ++ # Strip leading / (tar's directory separator) from filenames. ++ # Include os.sep (target OS directory separator) as well. ++ if name.startswith(('/', os.sep)): ++ name = new_attrs['name'] = member.path.lstrip('/' + os.sep) ++ if os.path.isabs(name): ++ # Path is absolute even after stripping. ++ # For example, 'C:/foo' on Windows. ++ raise AbsolutePathError(member) ++ # Ensure we stay in the destination ++ target_path = os.path.realpath(os.path.join(dest_path, name)) ++ if os.path.commonpath([target_path, dest_path]) != dest_path: ++ raise OutsideDestinationError(member, target_path) ++ # Limit permissions (no high bits, and go-w) ++ mode = member.mode ++ if mode is not None: ++ # Strip high bits & group/other write bits ++ mode = mode & 0o755 ++ if for_data: ++ # For data, handle permissions & file types ++ if member.isreg() or member.islnk(): ++ if not mode & 0o100: ++ # Clear executable bits if not executable by user ++ mode &= ~0o111 ++ # Ensure owner can read & write ++ mode |= 0o600 ++ elif member.isdir() or member.issym(): ++ # Ignore mode for directories & symlinks ++ mode = None ++ else: ++ # Reject special files ++ raise SpecialFileError(member) ++ if mode != member.mode: ++ new_attrs['mode'] = mode ++ if for_data: ++ # Ignore ownership for 'data' ++ if member.uid is not None: ++ new_attrs['uid'] = None ++ if member.gid is not None: ++ new_attrs['gid'] = None ++ if member.uname is not None: ++ new_attrs['uname'] = None ++ if member.gname is not None: ++ new_attrs['gname'] = None ++ # Check link destination for 'data' ++ if member.islnk() or member.issym(): ++ if os.path.isabs(member.linkname): ++ raise AbsoluteLinkError(member) ++ target_path = os.path.realpath(os.path.join(dest_path, member.linkname)) ++ if os.path.commonpath([target_path, dest_path]) != dest_path: ++ raise LinkOutsideDestinationError(member, target_path) ++ return new_attrs ++ ++def fully_trusted_filter(member, dest_path): ++ return member ++ ++def tar_filter(member, dest_path): ++ new_attrs = _get_filtered_attrs(member, dest_path, False) ++ if new_attrs: ++ return member.replace(**new_attrs, deep=False) ++ return member ++ ++def data_filter(member, dest_path): ++ new_attrs = _get_filtered_attrs(member, dest_path, True) ++ if new_attrs: ++ return member.replace(**new_attrs, deep=False) ++ return member ++ ++_NAMED_FILTERS = { ++ "fully_trusted": fully_trusted_filter, ++ "tar": tar_filter, ++ "data": data_filter, ++} ++ + #------------------ + # Exported Classes + #------------------ ++ ++# Sentinel for replace() defaults, meaning "don't change the attribute" ++_KEEP = object() ++ + class TarInfo(object): + """Informational class which holds the details about an + archive member given by a tar header block. +@@ -779,12 +901,44 @@ class TarInfo(object): + def __repr__(self): + return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) + ++ def replace(self, *, ++ name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP, ++ uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP, ++ deep=True, _KEEP=_KEEP): ++ """Return a deep copy of self with the given attributes replaced. ++ """ ++ if deep: ++ result = copy.deepcopy(self) ++ else: ++ result = copy.copy(self) ++ if name is not _KEEP: ++ result.name = name ++ if mtime is not _KEEP: ++ result.mtime = mtime ++ if mode is not _KEEP: ++ result.mode = mode ++ if linkname is not _KEEP: ++ result.linkname = linkname ++ if uid is not _KEEP: ++ result.uid = uid ++ if gid is not _KEEP: ++ result.gid = gid ++ if uname is not _KEEP: ++ result.uname = uname ++ if gname is not _KEEP: ++ result.gname = gname ++ return result ++ + def get_info(self): + """Return the TarInfo's attributes as a dictionary. + """ ++ if self.mode is None: ++ mode = None ++ else: ++ mode = self.mode & 0o7777 + info = { + "name": self.name, +- "mode": self.mode & 0o7777, ++ "mode": mode, + "uid": self.uid, + "gid": self.gid, + "size": self.size, +@@ -807,6 +961,9 @@ class TarInfo(object): + """Return a tar header as a string of 512 byte blocks. + """ + info = self.get_info() ++ for name, value in info.items(): ++ if value is None: ++ raise ValueError("%s may not be None" % name) + + if format == USTAR_FORMAT: + return self.create_ustar_header(info, encoding, errors) +@@ -920,6 +1077,20 @@ class TarInfo(object): + """Return a header block. info is a dictionary with file + information, format must be one of the *_FORMAT constants. + """ ++ has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE) ++ if has_device_fields: ++ devmajor = itn(info.get("devmajor", 0), 8, format) ++ devminor = itn(info.get("devminor", 0), 8, format) ++ else: ++ devmajor = stn("", 8, encoding, errors) ++ devminor = stn("", 8, encoding, errors) ++ ++ # None values in metadata should cause ValueError. ++ # itn()/stn() do this for all fields except type. ++ filetype = info.get("type", REGTYPE) ++ if filetype is None: ++ raise ValueError("TarInfo.type must not be None") ++ + parts = [ + stn(info.get("name", ""), 100, encoding, errors), + itn(info.get("mode", 0) & 0o7777, 8, format), +@@ -928,7 +1099,7 @@ class TarInfo(object): + itn(info.get("size", 0), 12, format), + itn(info.get("mtime", 0), 12, format), + b" ", # checksum field +- info.get("type", REGTYPE), ++ filetype, + stn(info.get("linkname", ""), 100, encoding, errors), + info.get("magic", POSIX_MAGIC), + stn(info.get("uname", ""), 32, encoding, errors), +@@ -1410,6 +1581,8 @@ class TarFile(object): + + fileobject = ExFileObject # The file-object for extractfile(). + ++ extraction_filter = None # The default filter for extraction. ++ + def __init__(self, name=None, mode="r", fileobj=None, format=None, + tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, + errors="surrogateescape", pax_headers=None, debug=None, +@@ -1882,7 +2055,10 @@ class TarFile(object): + members = self + for tarinfo in members: + if verbose: +- _safe_print(stat.filemode(tarinfo.mode)) ++ if tarinfo.mode is None: ++ _safe_print("??????????") ++ else: ++ _safe_print(stat.filemode(tarinfo.mode)) + _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, + tarinfo.gname or tarinfo.gid)) + if tarinfo.ischr() or tarinfo.isblk(): +@@ -1890,8 +2066,11 @@ class TarFile(object): + ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) + else: + _safe_print("%10d" % tarinfo.size) +- _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ +- % time.localtime(tarinfo.mtime)[:6]) ++ if tarinfo.mtime is None: ++ _safe_print("????-??-?? ??:??:??") ++ else: ++ _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ ++ % time.localtime(tarinfo.mtime)[:6]) + + _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) + +@@ -1988,32 +2167,58 @@ class TarFile(object): + + self.members.append(tarinfo) + +- def extractall(self, path=".", members=None, *, numeric_owner=False): ++ def _get_filter_function(self, filter): ++ if filter is None: ++ filter = self.extraction_filter ++ if filter is None: ++ return fully_trusted_filter ++ if isinstance(filter, str): ++ raise TypeError( ++ 'String names are not supported for ' ++ + 'TarFile.extraction_filter. Use a function such as ' ++ + 'tarfile.data_filter directly.') ++ return filter ++ if callable(filter): ++ return filter ++ try: ++ return _NAMED_FILTERS[filter] ++ except KeyError: ++ raise ValueError(f"filter {filter!r} not found") from None ++ ++ def extractall(self, path=".", members=None, *, numeric_owner=False, ++ filter=None): + """Extract all members from the archive to the current working + directory and set owner, modification time and permissions on + directories afterwards. `path' specifies a different directory + to extract to. `members' is optional and must be a subset of the + list returned by getmembers(). If `numeric_owner` is True, only + the numbers for user/group names are used and not the names. ++ ++ The `filter` function will be called on each member just ++ before extraction. ++ It can return a changed TarInfo or None to skip the member. ++ String names of common filters are accepted. + """ + directories = [] + ++ filter_function = self._get_filter_function(filter) + if members is None: + members = self + +- for tarinfo in members: ++ for member in members: ++ tarinfo = self._get_extract_tarinfo(member, filter_function, path) ++ if tarinfo is None: ++ continue + if tarinfo.isdir(): +- # Extract directories with a safe mode. ++ # For directories, delay setting attributes until later, ++ # since permissions can interfere with extraction and ++ # extracting contents can reset mtime. + directories.append(tarinfo) +- tarinfo = copy.copy(tarinfo) +- tarinfo.mode = 0o700 +- # Do not set_attrs directories, as we will do that further down +- self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), +- numeric_owner=numeric_owner) ++ self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), ++ numeric_owner=numeric_owner) + + # Reverse sort directories. +- directories.sort(key=lambda a: a.name) +- directories.reverse() ++ directories.sort(key=lambda a: a.name, reverse=True) + + # Set correct owner, mtime and filemode on directories. + for tarinfo in directories: +@@ -2023,12 +2228,10 @@ class TarFile(object): + self.utime(tarinfo, dirpath) + self.chmod(tarinfo, dirpath) + except ExtractError as e: +- if self.errorlevel > 1: +- raise +- else: +- self._dbg(1, "tarfile: %s" % e) ++ self._handle_nonfatal_error(e) + +- def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): ++ def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, ++ filter=None): + """Extract a member from the archive to the current working directory, + using its full name. Its file information is extracted as accurately + as possible. `member' may be a filename or a TarInfo object. You can +@@ -2036,35 +2239,70 @@ class TarFile(object): + mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` + is True, only the numbers for user/group names are used and not + the names. ++ ++ The `filter` function will be called before extraction. ++ It can return a changed TarInfo or None to skip the member. ++ String names of common filters are accepted. + """ +- self._check("r") ++ filter_function = self._get_filter_function(filter) ++ tarinfo = self._get_extract_tarinfo(member, filter_function, path) ++ if tarinfo is not None: ++ self._extract_one(tarinfo, path, set_attrs, numeric_owner) + ++ def _get_extract_tarinfo(self, member, filter_function, path): ++ """Get filtered TarInfo (or None) from member, which might be a str""" + if isinstance(member, str): + tarinfo = self.getmember(member) + else: + tarinfo = member + ++ unfiltered = tarinfo ++ try: ++ tarinfo = filter_function(tarinfo, path) ++ except (OSError, FilterError) as e: ++ self._handle_fatal_error(e) ++ except ExtractError as e: ++ self._handle_nonfatal_error(e) ++ if tarinfo is None: ++ self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) ++ return None + # Prepare the link target for makelink(). + if tarinfo.islnk(): ++ tarinfo = copy.copy(tarinfo) + tarinfo._link_target = os.path.join(path, tarinfo.linkname) ++ return tarinfo ++ ++ def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): ++ """Extract from filtered tarinfo to disk""" ++ self._check("r") + + try: + self._extract_member(tarinfo, os.path.join(path, tarinfo.name), + set_attrs=set_attrs, + numeric_owner=numeric_owner) + except OSError as e: +- if self.errorlevel > 0: +- raise +- else: +- if e.filename is None: +- self._dbg(1, "tarfile: %s" % e.strerror) +- else: +- self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) ++ self._handle_fatal_error(e) + except ExtractError as e: +- if self.errorlevel > 1: +- raise ++ self._handle_nonfatal_error(e) ++ ++ def _handle_nonfatal_error(self, e): ++ """Handle non-fatal error (ExtractError) according to errorlevel""" ++ if self.errorlevel > 1: ++ raise ++ else: ++ self._dbg(1, "tarfile: %s" % e) ++ ++ def _handle_fatal_error(self, e): ++ """Handle "fatal" error according to self.errorlevel""" ++ if self.errorlevel > 0: ++ raise ++ elif isinstance(e, OSError): ++ if e.filename is None: ++ self._dbg(1, "tarfile: %s" % e.strerror) + else: +- self._dbg(1, "tarfile: %s" % e) ++ self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) ++ else: ++ self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e)) + + def extractfile(self, member): + """Extract a member from the archive as a file object. `member' may be +@@ -2150,9 +2388,13 @@ class TarFile(object): + """Make a directory called targetpath. + """ + try: +- # Use a safe mode for the directory, the real mode is set +- # later in _extract_member(). +- os.mkdir(targetpath, 0o700) ++ if tarinfo.mode is None: ++ # Use the system's default mode ++ os.mkdir(targetpath) ++ else: ++ # Use a safe mode for the directory, the real mode is set ++ # later in _extract_member(). ++ os.mkdir(targetpath, 0o700) + except FileExistsError: + pass + +@@ -2195,6 +2437,9 @@ class TarFile(object): + raise ExtractError("special devices not supported by system") + + mode = tarinfo.mode ++ if mode is None: ++ # Use mknod's default ++ mode = 0o600 + if tarinfo.isblk(): + mode |= stat.S_IFBLK + else: +@@ -2213,7 +2458,6 @@ class TarFile(object): + if tarinfo.issym(): + os.symlink(tarinfo.linkname, targetpath) + else: +- # See extract(). + if os.path.exists(tarinfo._link_target): + os.link(tarinfo._link_target, targetpath) + else: +@@ -2238,15 +2482,19 @@ class TarFile(object): + u = tarinfo.uid + if not numeric_owner: + try: +- if grp: ++ if grp and tarinfo.gname: + g = grp.getgrnam(tarinfo.gname)[2] + except KeyError: + pass + try: +- if pwd: ++ if pwd and tarinfo.uname: + u = pwd.getpwnam(tarinfo.uname)[2] + except KeyError: + pass ++ if g is None: ++ g = -1 ++ if u is None: ++ u = -1 + try: + if tarinfo.issym() and hasattr(os, "lchown"): + os.lchown(targetpath, u, g) +@@ -2258,6 +2506,8 @@ class TarFile(object): + def chmod(self, tarinfo, targetpath): + """Set file permissions of targetpath according to tarinfo. + """ ++ if tarinfo.mode is None: ++ return + if hasattr(os, 'chmod'): + try: + os.chmod(targetpath, tarinfo.mode) +@@ -2267,10 +2517,13 @@ class TarFile(object): + def utime(self, tarinfo, targetpath): + """Set modification time of targetpath according to tarinfo. + """ ++ mtime = tarinfo.mtime ++ if mtime is None: ++ return + if not hasattr(os, 'utime'): + return + try: +- os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) ++ os.utime(targetpath, (mtime, mtime)) + except OSError: + raise ExtractError("could not change modification time") + +@@ -2337,13 +2590,26 @@ class TarFile(object): + members = self.getmembers() + + # Limit the member search list up to tarinfo. ++ skipping = False + if tarinfo is not None: +- members = members[:members.index(tarinfo)] ++ try: ++ index = members.index(tarinfo) ++ except ValueError: ++ # The given starting point might be a (modified) copy. ++ # We'll later skip members until we find an equivalent. ++ skipping = True ++ else: ++ # Happy fast path ++ members = members[:index] + + if normalize: + name = os.path.normpath(name) + + for member in reversed(members): ++ if skipping: ++ if tarinfo.offset == member.offset: ++ skipping = False ++ continue + if normalize: + member_name = os.path.normpath(member.name) + else: +@@ -2352,6 +2618,10 @@ class TarFile(object): + if name == member_name: + return member + ++ if skipping: ++ # Starting point was not found ++ raise ValueError(tarinfo) ++ + def _load(self): + """Read through the entire archive file and look for readable + members. +@@ -2444,6 +2714,7 @@ class TarFile(object): + #-------------------- + # exported functions + #-------------------- ++ + def is_tarfile(name): + """Return True if name points to a tar archive that we + are able to handle, else return False. +@@ -2465,6 +2736,10 @@ def main(): + parser = argparse.ArgumentParser(description=description) + parser.add_argument('-v', '--verbose', action='store_true', default=False, + help='Verbose output') ++ parser.add_argument('--filter', metavar='', ++ choices=_NAMED_FILTERS, ++ help='Filter for extraction') ++ + group = parser.add_mutually_exclusive_group() + group.add_argument('-l', '--list', metavar='', + help='Show listing of a tarfile') +@@ -2476,8 +2751,12 @@ def main(): + help='Create tarfile from sources') + group.add_argument('-t', '--test', metavar='', + help='Test if a tarfile is valid') ++ + args = parser.parse_args() + ++ if args.filter and not args.extract: ++ parser.exit(1, '--filter is only valid for extraction\n') ++ + if args.test: + src = args.test + if is_tarfile(src): +@@ -2508,7 +2787,7 @@ def main(): + + if is_tarfile(src): + with TarFile.open(src, 'r:*') as tf: +- tf.extractall(path=curdir) ++ tf.extractall(path=curdir, filter=args.filter) + if args.verbose: + if curdir == '.': + msg = '{!r} file is extracted.'.format(src) +diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py +index 9ca9ae79e6..da1f2ee719 100644 +--- a/Lib/test/support/__init__.py ++++ b/Lib/test/support/__init__.py +@@ -1244,6 +1244,30 @@ def check_warnings(*filters, **kwargs): + return _filterwarnings(filters, quiet) + + ++@contextlib.contextmanager ++def check_no_warnings(testcase, message='', category=Warning, force_gc=False): ++ """Context manager to check that no warnings are emitted. ++ ++ This context manager enables a given warning within its scope ++ and checks that no warnings are emitted even with that warning ++ enabled. ++ ++ If force_gc is True, a garbage collection is attempted before checking ++ for warnings. This may help to catch warnings emitted when objects ++ are deleted, such as ResourceWarning. ++ ++ Other keyword arguments are passed to warnings.filterwarnings(). ++ """ ++ with warnings.catch_warnings(record=True) as warns: ++ warnings.filterwarnings('always', ++ message=message, ++ category=category) ++ yield ++ if force_gc: ++ gc_collect() ++ testcase.assertEqual(warns, []) ++ ++ + @contextlib.contextmanager + def check_no_resource_warning(testcase): + """Context manager to check that no ResourceWarning is emitted. +diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py +index 81457c8f09..77101251ac 100644 +--- a/Lib/test/test_shutil.py ++++ b/Lib/test/test_shutil.py +@@ -1234,12 +1234,16 @@ class TestShutil(unittest.TestCase): + formats = [name for name, params in get_archive_formats()] + self.assertNotIn('xxx', formats) + +- def check_unpack_archive(self, format): +- self.check_unpack_archive_with_converter(format, lambda path: path) +- self.check_unpack_archive_with_converter(format, pathlib.Path) +- self.check_unpack_archive_with_converter(format, FakePath) ++ ### shutil.unpack_archive + +- def check_unpack_archive_with_converter(self, format, converter): ++ def check_unpack_archive(self, format, **kwargs): ++ self.check_unpack_archive_with_converter( ++ format, lambda path: path, **kwargs) ++ self.check_unpack_archive_with_converter( ++ format, pathlib.Path, **kwargs) ++ self.check_unpack_archive_with_converter(format, FakePath, **kwargs) ++ ++ def check_unpack_archive_with_converter(self, format, converter, **kwargs): + root_dir, base_dir = self._create_files() + expected = rlistdir(root_dir) + expected.remove('outer') +@@ -1249,35 +1253,45 @@ class TestShutil(unittest.TestCase): + + # let's try to unpack it now + tmpdir2 = self.mkdtemp() +- unpack_archive(filename, tmpdir2) ++ unpack_archive(filename, tmpdir2, **kwargs) + self.assertEqual(rlistdir(tmpdir2), expected) + + # and again, this time with the format specified + tmpdir3 = self.mkdtemp() +- unpack_archive(filename, tmpdir3, format=format) ++ unpack_archive(filename, tmpdir3, format=format, **kwargs) + self.assertEqual(rlistdir(tmpdir3), expected) + +- self.assertRaises(shutil.ReadError, unpack_archive, TESTFN) +- self.assertRaises(ValueError, unpack_archive, TESTFN, format='xxx') ++ with self.assertRaises(shutil.ReadError): ++ unpack_archive(TESTFN, **kwargs) ++ with self.assertRaises(ValueError): ++ unpack_archive(TESTFN, format='xxx', **kwargs) ++ ++ def check_unpack_tarball(self, format): ++ self.check_unpack_archive(format, filter='fully_trusted') ++ self.check_unpack_archive(format, filter='data') ++ with support.check_no_warnings(self): ++ self.check_unpack_archive(format) + + def test_unpack_archive_tar(self): +- self.check_unpack_archive('tar') ++ self.check_unpack_tarball('tar') + + @support.requires_zlib + def test_unpack_archive_gztar(self): +- self.check_unpack_archive('gztar') ++ self.check_unpack_tarball('gztar') + + @support.requires_bz2 + def test_unpack_archive_bztar(self): +- self.check_unpack_archive('bztar') ++ self.check_unpack_tarball('bztar') + + @support.requires_lzma + def test_unpack_archive_xztar(self): +- self.check_unpack_archive('xztar') ++ self.check_unpack_tarball('xztar') + + @support.requires_zlib + def test_unpack_archive_zip(self): + self.check_unpack_archive('zip') ++ with self.assertRaises(TypeError): ++ self.check_unpack_archive('zip', filter='data') + + def test_unpack_registry(self): + +diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py +index 8e0b275972..f261048615 100644 +--- a/Lib/test/test_tarfile.py ++++ b/Lib/test/test_tarfile.py +@@ -5,6 +5,10 @@ from hashlib import md5 + from contextlib import contextmanager + from random import Random + import pathlib ++import shutil ++import re ++import warnings ++import stat + + import unittest + import unittest.mock +@@ -2191,7 +2195,12 @@ class MiscTest(unittest.TestCase): + 'EmptyHeaderError', 'TruncatedHeaderError', + 'EOFHeaderError', 'InvalidHeaderError', + 'SubsequentHeaderError', 'ExFileObject', +- 'main'} ++ 'main', ++ 'fully_trusted_filter', 'data_filter', ++ 'tar_filter', 'FilterError', 'AbsoluteLinkError', ++ 'OutsideDestinationError', 'SpecialFileError', ++ 'AbsolutePathError', 'LinkOutsideDestinationError', ++ } + support.check__all__(self, tarfile, blacklist=blacklist) + + +@@ -2214,6 +2223,15 @@ class CommandLineTest(unittest.TestCase): + for tardata in files: + tf.add(tardata, arcname=os.path.basename(tardata)) + ++ def make_evil_tarfile(self, tar_name): ++ files = [support.findfile('tokenize_tests.txt')] ++ self.addCleanup(support.unlink, tar_name) ++ with tarfile.open(tar_name, 'w') as tf: ++ benign = tarfile.TarInfo('benign') ++ tf.addfile(benign, fileobj=io.BytesIO(b'')) ++ evil = tarfile.TarInfo('../evil') ++ tf.addfile(evil, fileobj=io.BytesIO(b'')) ++ + def test_test_command(self): + for tar_name in testtarnames: + for opt in '-t', '--test': +@@ -2357,6 +2375,25 @@ class CommandLineTest(unittest.TestCase): + finally: + support.rmtree(tarextdir) + ++ def test_extract_command_filter(self): ++ self.make_evil_tarfile(tmpname) ++ # Make an inner directory, so the member named '../evil' ++ # is still extracted into `tarextdir` ++ destdir = os.path.join(tarextdir, 'dest') ++ os.mkdir(tarextdir) ++ try: ++ with support.temp_cwd(destdir): ++ self.tarfilecmd_failure('-e', tmpname, ++ '-v', ++ '--filter', 'data') ++ out = self.tarfilecmd('-e', tmpname, ++ '-v', ++ '--filter', 'fully_trusted', ++ PYTHONIOENCODING='utf-8') ++ self.assertIn(b' file is extracted.', out) ++ finally: ++ support.rmtree(tarextdir) ++ + def test_extract_command_different_directory(self): + self.make_simple_tarfile(tmpname) + try: +@@ -2630,6 +2667,893 @@ class NumericOwnerTest(unittest.TestCase): + tarfl.extract, filename_1, TEMPDIR, False, True) + + ++class ReplaceTests(ReadTest, unittest.TestCase): ++ def test_replace_name(self): ++ member = self.tar.getmember('ustar/regtype') ++ replaced = member.replace(name='misc/other') ++ self.assertEqual(replaced.name, 'misc/other') ++ self.assertEqual(member.name, 'ustar/regtype') ++ self.assertEqual(self.tar.getmember('ustar/regtype').name, ++ 'ustar/regtype') ++ ++ def test_replace_deep(self): ++ member = self.tar.getmember('pax/regtype1') ++ replaced = member.replace() ++ replaced.pax_headers['gname'] = 'not-bar' ++ self.assertEqual(member.pax_headers['gname'], 'bar') ++ self.assertEqual( ++ self.tar.getmember('pax/regtype1').pax_headers['gname'], 'bar') ++ ++ def test_replace_shallow(self): ++ member = self.tar.getmember('pax/regtype1') ++ replaced = member.replace(deep=False) ++ replaced.pax_headers['gname'] = 'not-bar' ++ self.assertEqual(member.pax_headers['gname'], 'not-bar') ++ self.assertEqual( ++ self.tar.getmember('pax/regtype1').pax_headers['gname'], 'not-bar') ++ ++ def test_replace_all(self): ++ member = self.tar.getmember('ustar/regtype') ++ for attr_name in ('name', 'mtime', 'mode', 'linkname', ++ 'uid', 'gid', 'uname', 'gname'): ++ with self.subTest(attr_name=attr_name): ++ replaced = member.replace(**{attr_name: None}) ++ self.assertEqual(getattr(replaced, attr_name), None) ++ self.assertNotEqual(getattr(member, attr_name), None) ++ ++ def test_replace_internal(self): ++ member = self.tar.getmember('ustar/regtype') ++ with self.assertRaises(TypeError): ++ member.replace(offset=123456789) ++ ++ ++class NoneInfoExtractTests(ReadTest): ++ # These mainly check that all kinds of members are extracted successfully ++ # if some metadata is None. ++ # Some of the methods do additional spot checks. ++ ++ # We also test that the default filters can deal with None. ++ ++ extraction_filter = None ++ ++ @classmethod ++ def setUpClass(cls): ++ tar = tarfile.open(tarname, mode='r', encoding="iso8859-1") ++ cls.control_dir = pathlib.Path(TEMPDIR) / "extractall_ctrl" ++ tar.errorlevel = 0 ++ tar.extractall(cls.control_dir, filter=cls.extraction_filter) ++ tar.close() ++ cls.control_paths = set( ++ p.relative_to(cls.control_dir) ++ for p in pathlib.Path(cls.control_dir).glob('**/*')) ++ ++ @classmethod ++ def tearDownClass(cls): ++ shutil.rmtree(cls.control_dir) ++ ++ def check_files_present(self, directory): ++ got_paths = set( ++ p.relative_to(directory) ++ for p in pathlib.Path(directory).glob('**/*')) ++ self.assertEqual(self.control_paths, got_paths) ++ ++ @contextmanager ++ def extract_with_none(self, *attr_names): ++ DIR = pathlib.Path(TEMPDIR) / "extractall_none" ++ self.tar.errorlevel = 0 ++ for member in self.tar.getmembers(): ++ for attr_name in attr_names: ++ setattr(member, attr_name, None) ++ with support.temp_dir(DIR): ++ self.tar.extractall(DIR, filter='fully_trusted') ++ self.check_files_present(DIR) ++ yield DIR ++ ++ def test_extractall_none_mtime(self): ++ # mtimes of extracted files should be later than 'now' -- the mtime ++ # of a previously created directory. ++ now = pathlib.Path(TEMPDIR).stat().st_mtime ++ with self.extract_with_none('mtime') as DIR: ++ for path in pathlib.Path(DIR).glob('**/*'): ++ with self.subTest(path=path): ++ try: ++ mtime = path.stat().st_mtime ++ except OSError: ++ # Some systems can't stat symlinks, ignore those ++ if not path.is_symlink(): ++ raise ++ else: ++ self.assertGreaterEqual(path.stat().st_mtime, now) ++ ++ def test_extractall_none_mode(self): ++ # modes of directories and regular files should match the mode ++ # of a "normally" created directory or regular file ++ dir_mode = pathlib.Path(TEMPDIR).stat().st_mode ++ regular_file = pathlib.Path(TEMPDIR) / 'regular_file' ++ regular_file.write_text('') ++ regular_file_mode = regular_file.stat().st_mode ++ with self.extract_with_none('mode') as DIR: ++ for path in pathlib.Path(DIR).glob('**/*'): ++ with self.subTest(path=path): ++ if path.is_dir(): ++ self.assertEqual(path.stat().st_mode, dir_mode) ++ elif path.is_file(): ++ self.assertEqual(path.stat().st_mode, ++ regular_file_mode) ++ ++ def test_extractall_none_uid(self): ++ with self.extract_with_none('uid'): ++ pass ++ ++ def test_extractall_none_gid(self): ++ with self.extract_with_none('gid'): ++ pass ++ ++ def test_extractall_none_uname(self): ++ with self.extract_with_none('uname'): ++ pass ++ ++ def test_extractall_none_gname(self): ++ with self.extract_with_none('gname'): ++ pass ++ ++ def test_extractall_none_ownership(self): ++ with self.extract_with_none('uid', 'gid', 'uname', 'gname'): ++ pass ++ ++class NoneInfoExtractTests_Data(NoneInfoExtractTests, unittest.TestCase): ++ extraction_filter = 'data' ++ ++class NoneInfoExtractTests_FullyTrusted(NoneInfoExtractTests, ++ unittest.TestCase): ++ extraction_filter = 'fully_trusted' ++ ++class NoneInfoExtractTests_Tar(NoneInfoExtractTests, unittest.TestCase): ++ extraction_filter = 'tar' ++ ++class NoneInfoExtractTests_Default(NoneInfoExtractTests, ++ unittest.TestCase): ++ extraction_filter = None ++ ++class NoneInfoTests_Misc(unittest.TestCase): ++ def test_add(self): ++ # When addfile() encounters None metadata, it raises a ValueError ++ bio = io.BytesIO() ++ for tarformat in (tarfile.USTAR_FORMAT, tarfile.GNU_FORMAT, ++ tarfile.PAX_FORMAT): ++ with self.subTest(tarformat=tarformat): ++ tar = tarfile.open(fileobj=bio, mode='w', format=tarformat) ++ tarinfo = tar.gettarinfo(tarname) ++ try: ++ tar.addfile(tarinfo) ++ except Exception: ++ if tarformat == tarfile.USTAR_FORMAT: ++ # In the old, limited format, adding might fail for ++ # reasons like the UID being too large ++ pass ++ else: ++ raise ++ else: ++ for attr_name in ('mtime', 'mode', 'uid', 'gid', ++ 'uname', 'gname'): ++ with self.subTest(attr_name=attr_name): ++ replaced = tarinfo.replace(**{attr_name: None}) ++ with self.assertRaisesRegex(ValueError, ++ f"{attr_name}"): ++ tar.addfile(replaced) ++ ++ def test_list(self): ++ # Change some metadata to None, then compare list() output ++ # word-for-word. We want list() to not raise, and to only change ++ # printout for the affected piece of metadata. ++ # (n.b.: some contents of the test archive are hardcoded.) ++ for attr_names in ({'mtime'}, {'mode'}, {'uid'}, {'gid'}, ++ {'uname'}, {'gname'}, ++ {'uid', 'uname'}, {'gid', 'gname'}): ++ with self.subTest(attr_names=attr_names), \ ++ tarfile.open(tarname, encoding="iso8859-1") as tar: ++ tio_prev = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') ++ with support.swap_attr(sys, 'stdout', tio_prev): ++ tar.list() ++ for member in tar.getmembers(): ++ for attr_name in attr_names: ++ setattr(member, attr_name, None) ++ tio_new = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') ++ with support.swap_attr(sys, 'stdout', tio_new): ++ tar.list() ++ for expected, got in zip(tio_prev.detach().getvalue().split(), ++ tio_new.detach().getvalue().split()): ++ if attr_names == {'mtime'} and re.match(rb'2003-01-\d\d', expected): ++ self.assertEqual(got, b'????-??-??') ++ elif attr_names == {'mtime'} and re.match(rb'\d\d:\d\d:\d\d', expected): ++ self.assertEqual(got, b'??:??:??') ++ elif attr_names == {'mode'} and re.match( ++ rb'.([r-][w-][x-]){3}', expected): ++ self.assertEqual(got, b'??????????') ++ elif attr_names == {'uname'} and expected.startswith( ++ (b'tarfile/', b'lars/', b'foo/')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_group, exp_group) ++ self.assertRegex(got_user, b'[0-9]+') ++ elif attr_names == {'gname'} and expected.endswith( ++ (b'/tarfile', b'/users', b'/bar')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_user, exp_user) ++ self.assertRegex(got_group, b'[0-9]+') ++ elif attr_names == {'uid'} and expected.startswith( ++ (b'1000/')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_group, exp_group) ++ self.assertEqual(got_user, b'None') ++ elif attr_names == {'gid'} and expected.endswith((b'/100')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_user, exp_user) ++ self.assertEqual(got_group, b'None') ++ elif attr_names == {'uid', 'uname'} and expected.startswith( ++ (b'tarfile/', b'lars/', b'foo/', b'1000/')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_group, exp_group) ++ self.assertEqual(got_user, b'None') ++ elif attr_names == {'gname', 'gid'} and expected.endswith( ++ (b'/tarfile', b'/users', b'/bar', b'/100')): ++ exp_user, exp_group = expected.split(b'/') ++ got_user, got_group = got.split(b'/') ++ self.assertEqual(got_user, exp_user) ++ self.assertEqual(got_group, b'None') ++ else: ++ # In other cases the output should be the same ++ self.assertEqual(expected, got) ++ ++def _filemode_to_int(mode): ++ """Inverse of `stat.filemode` (for permission bits) ++ ++ Using mode strings rather than numbers makes the later tests more readable. ++ """ ++ str_mode = mode[1:] ++ result = ( ++ {'r': stat.S_IRUSR, '-': 0}[str_mode[0]] ++ | {'w': stat.S_IWUSR, '-': 0}[str_mode[1]] ++ | {'x': stat.S_IXUSR, '-': 0, ++ 's': stat.S_IXUSR | stat.S_ISUID, ++ 'S': stat.S_ISUID}[str_mode[2]] ++ | {'r': stat.S_IRGRP, '-': 0}[str_mode[3]] ++ | {'w': stat.S_IWGRP, '-': 0}[str_mode[4]] ++ | {'x': stat.S_IXGRP, '-': 0, ++ 's': stat.S_IXGRP | stat.S_ISGID, ++ 'S': stat.S_ISGID}[str_mode[5]] ++ | {'r': stat.S_IROTH, '-': 0}[str_mode[6]] ++ | {'w': stat.S_IWOTH, '-': 0}[str_mode[7]] ++ | {'x': stat.S_IXOTH, '-': 0, ++ 't': stat.S_IXOTH | stat.S_ISVTX, ++ 'T': stat.S_ISVTX}[str_mode[8]] ++ ) ++ # check we did this right ++ assert stat.filemode(result)[1:] == mode[1:] ++ ++ return result ++ ++class ArchiveMaker: ++ """Helper to create a tar file with specific contents ++ ++ Usage: ++ ++ with ArchiveMaker() as t: ++ t.add('filename', ...) ++ ++ with t.open() as tar: ++ ... # `tar` is now a TarFile with 'filename' in it! ++ """ ++ def __init__(self): ++ self.bio = io.BytesIO() ++ ++ def __enter__(self): ++ self.tar_w = tarfile.TarFile(mode='w', fileobj=self.bio) ++ return self ++ ++ def __exit__(self, *exc): ++ self.tar_w.close() ++ self.contents = self.bio.getvalue() ++ self.bio = None ++ ++ def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, ++ mode=None, **kwargs): ++ """Add a member to the test archive. Call within `with`.""" ++ name = str(name) ++ tarinfo = tarfile.TarInfo(name).replace(**kwargs) ++ if mode: ++ tarinfo.mode = _filemode_to_int(mode) ++ if symlink_to is not None: ++ type = tarfile.SYMTYPE ++ tarinfo.linkname = str(symlink_to) ++ if hardlink_to is not None: ++ type = tarfile.LNKTYPE ++ tarinfo.linkname = str(hardlink_to) ++ if name.endswith('/') and type is None: ++ type = tarfile.DIRTYPE ++ if type is not None: ++ tarinfo.type = type ++ if tarinfo.isreg(): ++ fileobj = io.BytesIO(bytes(tarinfo.size)) ++ else: ++ fileobj = None ++ self.tar_w.addfile(tarinfo, fileobj) ++ ++ def open(self, **kwargs): ++ """Open the resulting archive as TarFile. Call after `with`.""" ++ bio = io.BytesIO(self.contents) ++ return tarfile.open(fileobj=bio, **kwargs) ++ ++ ++class TestExtractionFilters(unittest.TestCase): ++ ++ # A temporary directory for the extraction results. ++ # All files that "escape" the destination path should still end ++ # up in this directory. ++ outerdir = pathlib.Path(TEMPDIR) / 'outerdir' ++ ++ # The destination for the extraction, within `outerdir` ++ destdir = outerdir / 'dest' ++ ++ @contextmanager ++ def check_context(self, tar, filter): ++ """Extracts `tar` to `self.destdir` and allows checking the result ++ ++ If an error occurs, it must be checked using `expect_exception` ++ ++ Otherwise, all resulting files must be checked using `expect_file`, ++ except the destination directory itself and parent directories of ++ other files. ++ When checking directories, do so before their contents. ++ """ ++ with support.temp_dir(self.outerdir): ++ try: ++ tar.extractall(self.destdir, filter=filter) ++ except Exception as exc: ++ self.raised_exception = exc ++ self.expected_paths = set() ++ else: ++ self.raised_exception = None ++ self.expected_paths = set(self.outerdir.glob('**/*')) ++ self.expected_paths.discard(self.destdir) ++ try: ++ yield ++ finally: ++ tar.close() ++ if self.raised_exception: ++ raise self.raised_exception ++ self.assertEqual(self.expected_paths, set()) ++ ++ def expect_file(self, name, type=None, symlink_to=None, mode=None): ++ """Check a single file. See check_context.""" ++ if self.raised_exception: ++ raise self.raised_exception ++ # use normpath() rather than resolve() so we don't follow symlinks ++ path = pathlib.Path(os.path.normpath(self.destdir / name)) ++ self.assertIn(path, self.expected_paths) ++ self.expected_paths.remove(path) ++ ++ # When checking mode, ignore Windows (which can only set user read and ++ # user write bits). Newer versions of Python use `os_helper.can_chmod()` ++ # instead of hardcoding Windows. ++ if mode is not None and sys.platform != 'win32': ++ got = stat.filemode(stat.S_IMODE(path.stat().st_mode)) ++ self.assertEqual(got, mode) ++ ++ if type is None and isinstance(name, str) and name.endswith('/'): ++ type = tarfile.DIRTYPE ++ if symlink_to is not None: ++ got = pathlib.Path(os.readlink(str(self.destdir / name))) ++ expected = pathlib.Path(symlink_to) ++ # The symlink might be the same (textually) as what we expect, ++ # but some systems change the link to an equivalent path, so ++ # we fall back to samefile(). ++ if expected != got: ++ self.assertTrue(got.samefile(expected)) ++ elif type == tarfile.REGTYPE or type is None: ++ self.assertTrue(path.is_file()) ++ elif type == tarfile.DIRTYPE: ++ self.assertTrue(path.is_dir()) ++ elif type == tarfile.FIFOTYPE: ++ self.assertTrue(path.is_fifo()) ++ else: ++ raise NotImplementedError(type) ++ for parent in path.parents: ++ self.expected_paths.discard(parent) ++ ++ def expect_exception(self, exc_type, message_re='.'): ++ with self.assertRaisesRegex(exc_type, message_re): ++ if self.raised_exception is not None: ++ raise self.raised_exception ++ self.raised_exception = None ++ ++ def test_benign_file(self): ++ with ArchiveMaker() as arc: ++ arc.add('benign.txt') ++ for filter in 'fully_trusted', 'tar', 'data': ++ with self.check_context(arc.open(), filter): ++ self.expect_file('benign.txt') ++ ++ def test_absolute(self): ++ # Test handling a member with an absolute path ++ # Inspired by 'absolute1' in https://github.com/jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add(self.outerdir / 'escaped.evil') ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ self.expect_file('../escaped.evil') ++ ++ for filter in 'tar', 'data': ++ with self.check_context(arc.open(), filter): ++ if str(self.outerdir).startswith('/'): ++ # We strip leading slashes, as e.g. GNU tar does ++ # (without --absolute-filenames). ++ outerdir_stripped = str(self.outerdir).lstrip('/') ++ self.expect_file(f'{outerdir_stripped}/escaped.evil') ++ else: ++ # On this system, absolute paths don't have leading ++ # slashes. ++ # So, there's nothing to strip. We refuse to unpack ++ # to an absolute path, nonetheless. ++ self.expect_exception( ++ tarfile.AbsolutePathError, ++ """['"].*escaped.evil['"] has an absolute path""") ++ ++ def test_parent_symlink(self): ++ # Test interplaying symlinks ++ # Inspired by 'dirsymlink2a' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('current', symlink_to='.') ++ arc.add('parent', symlink_to='current/..') ++ arc.add('parent/evil') ++ ++ if support.can_symlink(): ++ with self.check_context(arc.open(), 'fully_trusted'): ++ if self.raised_exception is not None: ++ # Windows will refuse to create a file that's a symlink to itself ++ # (and tarfile doesn't swallow that exception) ++ self.expect_exception(FileExistsError) ++ # The other cases will fail with this error too. ++ # Skip the rest of this test. ++ return ++ else: ++ self.expect_file('current', symlink_to='.') ++ self.expect_file('parent', symlink_to='current/..') ++ self.expect_file('../evil') ++ ++ with self.check_context(arc.open(), 'tar'): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ """'parent/evil' would be extracted to ['"].*evil['"], """ ++ + "which is outside the destination") ++ ++ with self.check_context(arc.open(), 'data'): ++ self.expect_exception( ++ tarfile.LinkOutsideDestinationError, ++ """'parent' would link to ['"].*outerdir['"], """ ++ + "which is outside the destination") ++ ++ else: ++ # No symlink support. The symlinks are ignored. ++ with self.check_context(arc.open(), 'fully_trusted'): ++ self.expect_file('parent/evil') ++ with self.check_context(arc.open(), 'tar'): ++ self.expect_file('parent/evil') ++ with self.check_context(arc.open(), 'data'): ++ self.expect_file('parent/evil') ++ ++ def test_parent_symlink2(self): ++ # Test interplaying symlinks ++ # Inspired by 'dirsymlink2b' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('current', symlink_to='.') ++ arc.add('current/parent', symlink_to='..') ++ arc.add('parent/evil') ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ if support.can_symlink(): ++ self.expect_file('current', symlink_to='.') ++ self.expect_file('parent', symlink_to='..') ++ self.expect_file('../evil') ++ else: ++ self.expect_file('current/') ++ self.expect_file('parent/evil') ++ ++ with self.check_context(arc.open(), 'tar'): ++ if support.can_symlink(): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ "'parent/evil' would be extracted to " ++ + """['"].*evil['"], which is outside """ ++ + "the destination") ++ else: ++ self.expect_file('current/') ++ self.expect_file('parent/evil') ++ ++ with self.check_context(arc.open(), 'data'): ++ self.expect_exception( ++ tarfile.LinkOutsideDestinationError, ++ """'current/parent' would link to ['"].*['"], """ ++ + "which is outside the destination") ++ ++ def test_absolute_symlink(self): ++ # Test symlink to an absolute path ++ # Inspired by 'dirsymlink' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('parent', symlink_to=self.outerdir) ++ arc.add('parent/evil') ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ if support.can_symlink(): ++ self.expect_file('parent', symlink_to=self.outerdir) ++ self.expect_file('../evil') ++ else: ++ self.expect_file('parent/evil') ++ ++ with self.check_context(arc.open(), 'tar'): ++ if support.can_symlink(): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ "'parent/evil' would be extracted to " ++ + """['"].*evil['"], which is outside """ ++ + "the destination") ++ else: ++ self.expect_file('parent/evil') ++ ++ with self.check_context(arc.open(), 'data'): ++ self.expect_exception( ++ tarfile.AbsoluteLinkError, ++ "'parent' is a symlink to an absolute path") ++ ++ def test_sly_relative0(self): ++ # Inspired by 'relative0' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('../moo', symlink_to='..//tmp/moo') ++ ++ try: ++ with self.check_context(arc.open(), filter='fully_trusted'): ++ if support.can_symlink(): ++ if isinstance(self.raised_exception, FileExistsError): ++ # XXX TarFile happens to fail creating a parent ++ # directory. ++ # This might be a bug, but fixing it would hurt ++ # security. ++ # Note that e.g. GNU `tar` rejects '..' components, ++ # so you could argue this is an invalid archive and we ++ # just raise an bad type of exception. ++ self.expect_exception(FileExistsError) ++ else: ++ self.expect_file('../moo', symlink_to='..//tmp/moo') ++ else: ++ # The symlink can't be extracted and is ignored ++ pass ++ except FileExistsError: ++ pass ++ ++ for filter in 'tar', 'data': ++ with self.check_context(arc.open(), filter): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ "'../moo' would be extracted to " ++ + "'.*moo', which is outside " ++ + "the destination") ++ ++ def test_sly_relative2(self): ++ # Inspired by 'relative2' in jwilk/traversal-archives ++ with ArchiveMaker() as arc: ++ arc.add('tmp/') ++ arc.add('tmp/../../moo', symlink_to='tmp/../..//tmp/moo') ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ self.expect_file('tmp', type=tarfile.DIRTYPE) ++ if support.can_symlink(): ++ self.expect_file('../moo', symlink_to='tmp/../../tmp/moo') ++ ++ for filter in 'tar', 'data': ++ with self.check_context(arc.open(), filter): ++ self.expect_exception( ++ tarfile.OutsideDestinationError, ++ "'tmp/../../moo' would be extracted to " ++ + """['"].*moo['"], which is outside the """ ++ + "destination") ++ ++ def test_modes(self): ++ # Test how file modes are extracted ++ # (Note that the modes are ignored on platforms without working chmod) ++ with ArchiveMaker() as arc: ++ arc.add('all_bits', mode='?rwsrwsrwt') ++ arc.add('perm_bits', mode='?rwxrwxrwx') ++ arc.add('exec_group_other', mode='?rw-rwxrwx') ++ arc.add('read_group_only', mode='?---r-----') ++ arc.add('no_bits', mode='?---------') ++ arc.add('dir/', mode='?---rwsrwt') ++ ++ # On some systems, setting the sticky bit is a no-op. ++ # Check if that's the case. ++ tmp_filename = os.path.join(TEMPDIR, "tmp.file") ++ with open(tmp_filename, 'w'): ++ pass ++ os.chmod(tmp_filename, os.stat(tmp_filename).st_mode | stat.S_ISVTX) ++ have_sticky_files = (os.stat(tmp_filename).st_mode & stat.S_ISVTX) ++ os.unlink(tmp_filename) ++ ++ os.mkdir(tmp_filename) ++ os.chmod(tmp_filename, os.stat(tmp_filename).st_mode | stat.S_ISVTX) ++ have_sticky_dirs = (os.stat(tmp_filename).st_mode & stat.S_ISVTX) ++ os.rmdir(tmp_filename) ++ ++ with self.check_context(arc.open(), 'fully_trusted'): ++ if have_sticky_files: ++ self.expect_file('all_bits', mode='?rwsrwsrwt') ++ else: ++ self.expect_file('all_bits', mode='?rwsrwsrwx') ++ self.expect_file('perm_bits', mode='?rwxrwxrwx') ++ self.expect_file('exec_group_other', mode='?rw-rwxrwx') ++ self.expect_file('read_group_only', mode='?---r-----') ++ self.expect_file('no_bits', mode='?---------') ++ if have_sticky_dirs: ++ self.expect_file('dir/', mode='?---rwsrwt') ++ else: ++ self.expect_file('dir/', mode='?---rwsrwx') ++ ++ with self.check_context(arc.open(), 'tar'): ++ self.expect_file('all_bits', mode='?rwxr-xr-x') ++ self.expect_file('perm_bits', mode='?rwxr-xr-x') ++ self.expect_file('exec_group_other', mode='?rw-r-xr-x') ++ self.expect_file('read_group_only', mode='?---r-----') ++ self.expect_file('no_bits', mode='?---------') ++ self.expect_file('dir/', mode='?---r-xr-x') ++ ++ with self.check_context(arc.open(), 'data'): ++ normal_dir_mode = stat.filemode(stat.S_IMODE( ++ self.outerdir.stat().st_mode)) ++ self.expect_file('all_bits', mode='?rwxr-xr-x') ++ self.expect_file('perm_bits', mode='?rwxr-xr-x') ++ self.expect_file('exec_group_other', mode='?rw-r--r--') ++ self.expect_file('read_group_only', mode='?rw-r-----') ++ self.expect_file('no_bits', mode='?rw-------') ++ self.expect_file('dir/', mode=normal_dir_mode) ++ ++ def test_pipe(self): ++ # Test handling of a special file ++ with ArchiveMaker() as arc: ++ arc.add('foo', type=tarfile.FIFOTYPE) ++ ++ for filter in 'fully_trusted', 'tar': ++ with self.check_context(arc.open(), filter): ++ if hasattr(os, 'mkfifo'): ++ self.expect_file('foo', type=tarfile.FIFOTYPE) ++ else: ++ # The pipe can't be extracted and is skipped. ++ pass ++ ++ with self.check_context(arc.open(), 'data'): ++ self.expect_exception( ++ tarfile.SpecialFileError, ++ "'foo' is a special file") ++ ++ def test_special_files(self): ++ # Creating device files is tricky. Instead of attempting that let's ++ # only check the filter result. ++ for special_type in tarfile.FIFOTYPE, tarfile.CHRTYPE, tarfile.BLKTYPE: ++ tarinfo = tarfile.TarInfo('foo') ++ tarinfo.type = special_type ++ trusted = tarfile.fully_trusted_filter(tarinfo, '') ++ self.assertIs(trusted, tarinfo) ++ tar = tarfile.tar_filter(tarinfo, '') ++ self.assertEqual(tar.type, special_type) ++ with self.assertRaises(tarfile.SpecialFileError) as cm: ++ tarfile.data_filter(tarinfo, '') ++ self.assertIsInstance(cm.exception.tarinfo, tarfile.TarInfo) ++ self.assertEqual(cm.exception.tarinfo.name, 'foo') ++ ++ def test_fully_trusted_filter(self): ++ # The 'fully_trusted' filter returns the original TarInfo objects. ++ with tarfile.TarFile.open(tarname) as tar: ++ for tarinfo in tar.getmembers(): ++ filtered = tarfile.fully_trusted_filter(tarinfo, '') ++ self.assertIs(filtered, tarinfo) ++ ++ def test_tar_filter(self): ++ # The 'tar' filter returns TarInfo objects with the same name/type. ++ # (It can also fail for particularly "evil" input, but we don't have ++ # that in the test archive.) ++ with tarfile.TarFile.open(tarname) as tar: ++ for tarinfo in tar.getmembers(): ++ filtered = tarfile.tar_filter(tarinfo, '') ++ self.assertIs(filtered.name, tarinfo.name) ++ self.assertIs(filtered.type, tarinfo.type) ++ ++ def test_data_filter(self): ++ # The 'data' filter either raises, or returns TarInfo with the same ++ # name/type. ++ with tarfile.TarFile.open(tarname) as tar: ++ for tarinfo in tar.getmembers(): ++ try: ++ filtered = tarfile.data_filter(tarinfo, '') ++ except tarfile.FilterError: ++ continue ++ self.assertIs(filtered.name, tarinfo.name) ++ self.assertIs(filtered.type, tarinfo.type) ++ ++ def test_default_filter_warns_not(self): ++ """Ensure the default filter does not warn (like in 3.12)""" ++ with ArchiveMaker() as arc: ++ arc.add('foo') ++ with support.check_no_warnings(self): ++ with self.check_context(arc.open(), None): ++ self.expect_file('foo') ++ ++ def test_change_default_filter_on_instance(self): ++ tar = tarfile.TarFile(tarname, 'r') ++ def strict_filter(tarinfo, path): ++ if tarinfo.name == 'ustar/regtype': ++ return tarinfo ++ else: ++ return None ++ tar.extraction_filter = strict_filter ++ with self.check_context(tar, None): ++ self.expect_file('ustar/regtype') ++ ++ def test_change_default_filter_on_class(self): ++ def strict_filter(tarinfo, path): ++ if tarinfo.name == 'ustar/regtype': ++ return tarinfo ++ else: ++ return None ++ tar = tarfile.TarFile(tarname, 'r') ++ with support.swap_attr(tarfile.TarFile, 'extraction_filter', ++ staticmethod(strict_filter)): ++ with self.check_context(tar, None): ++ self.expect_file('ustar/regtype') ++ ++ def test_change_default_filter_on_subclass(self): ++ class TarSubclass(tarfile.TarFile): ++ def extraction_filter(self, tarinfo, path): ++ if tarinfo.name == 'ustar/regtype': ++ return tarinfo ++ else: ++ return None ++ ++ tar = TarSubclass(tarname, 'r') ++ with self.check_context(tar, None): ++ self.expect_file('ustar/regtype') ++ ++ def test_change_default_filter_to_string(self): ++ tar = tarfile.TarFile(tarname, 'r') ++ tar.extraction_filter = 'data' ++ with self.check_context(tar, None): ++ self.expect_exception(TypeError) ++ ++ def test_custom_filter(self): ++ def custom_filter(tarinfo, path): ++ self.assertIs(path, self.destdir) ++ if tarinfo.name == 'move_this': ++ return tarinfo.replace(name='moved') ++ if tarinfo.name == 'ignore_this': ++ return None ++ return tarinfo ++ ++ with ArchiveMaker() as arc: ++ arc.add('move_this') ++ arc.add('ignore_this') ++ arc.add('keep') ++ with self.check_context(arc.open(), custom_filter): ++ self.expect_file('moved') ++ self.expect_file('keep') ++ ++ def test_bad_filter_name(self): ++ with ArchiveMaker() as arc: ++ arc.add('foo') ++ with self.check_context(arc.open(), 'bad filter name'): ++ self.expect_exception(ValueError) ++ ++ def test_stateful_filter(self): ++ # Stateful filters should be possible. ++ # (This doesn't really test tarfile. Rather, it demonstrates ++ # that third parties can implement a stateful filter.) ++ class StatefulFilter: ++ def __enter__(self): ++ self.num_files_processed = 0 ++ return self ++ ++ def __call__(self, tarinfo, path): ++ try: ++ tarinfo = tarfile.data_filter(tarinfo, path) ++ except tarfile.FilterError: ++ return None ++ self.num_files_processed += 1 ++ return tarinfo ++ ++ def __exit__(self, *exc_info): ++ self.done = True ++ ++ with ArchiveMaker() as arc: ++ arc.add('good') ++ arc.add('bad', symlink_to='/') ++ arc.add('good') ++ with StatefulFilter() as custom_filter: ++ with self.check_context(arc.open(), custom_filter): ++ self.expect_file('good') ++ self.assertEqual(custom_filter.num_files_processed, 2) ++ self.assertEqual(custom_filter.done, True) ++ ++ def test_errorlevel(self): ++ def extracterror_filter(tarinfo, path): ++ raise tarfile.ExtractError('failed with ExtractError') ++ def filtererror_filter(tarinfo, path): ++ raise tarfile.FilterError('failed with FilterError') ++ def oserror_filter(tarinfo, path): ++ raise OSError('failed with OSError') ++ def tarerror_filter(tarinfo, path): ++ raise tarfile.TarError('failed with base TarError') ++ def valueerror_filter(tarinfo, path): ++ raise ValueError('failed with ValueError') ++ ++ with ArchiveMaker() as arc: ++ arc.add('file') ++ ++ # If errorlevel is 0, errors affected by errorlevel are ignored ++ ++ with self.check_context(arc.open(errorlevel=0), extracterror_filter): ++ self.expect_file('file') ++ ++ with self.check_context(arc.open(errorlevel=0), filtererror_filter): ++ self.expect_file('file') ++ ++ with self.check_context(arc.open(errorlevel=0), oserror_filter): ++ self.expect_file('file') ++ ++ with self.check_context(arc.open(errorlevel=0), tarerror_filter): ++ self.expect_exception(tarfile.TarError) ++ ++ with self.check_context(arc.open(errorlevel=0), valueerror_filter): ++ self.expect_exception(ValueError) ++ ++ # If 1, all fatal errors are raised ++ ++ with self.check_context(arc.open(errorlevel=1), extracterror_filter): ++ self.expect_file('file') ++ ++ with self.check_context(arc.open(errorlevel=1), filtererror_filter): ++ self.expect_exception(tarfile.FilterError) ++ ++ with self.check_context(arc.open(errorlevel=1), oserror_filter): ++ self.expect_exception(OSError) ++ ++ with self.check_context(arc.open(errorlevel=1), tarerror_filter): ++ self.expect_exception(tarfile.TarError) ++ ++ with self.check_context(arc.open(errorlevel=1), valueerror_filter): ++ self.expect_exception(ValueError) ++ ++ # If 2, all non-fatal errors are raised as well. ++ ++ with self.check_context(arc.open(errorlevel=2), extracterror_filter): ++ self.expect_exception(tarfile.ExtractError) ++ ++ with self.check_context(arc.open(errorlevel=2), filtererror_filter): ++ self.expect_exception(tarfile.FilterError) ++ ++ with self.check_context(arc.open(errorlevel=2), oserror_filter): ++ self.expect_exception(OSError) ++ ++ with self.check_context(arc.open(errorlevel=2), tarerror_filter): ++ self.expect_exception(tarfile.TarError) ++ ++ with self.check_context(arc.open(errorlevel=2), valueerror_filter): ++ self.expect_exception(ValueError) ++ ++ # We only handle ExtractionError, FilterError & OSError specially. ++ ++ with self.check_context(arc.open(errorlevel='boo!'), filtererror_filter): ++ self.expect_exception(TypeError) # errorlevel is not int ++ ++ + def setUpModule(): + support.unlink(TEMPDIR) + os.makedirs(TEMPDIR) +diff --git a/Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst b/Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst +new file mode 100644 +index 0000000000..48a105a4a1 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-03-23-15-24-38.gh-issue-102953.YR4KaK.rst +@@ -0,0 +1,4 @@ ++The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`, ++have a new a *filter* argument that allows limiting tar features than may be ++surprising or dangerous, such as creating files outside the destination ++directory. See :ref:`tarfile-extraction-filter` for details. diff --git a/pip-CVE-2007-4559.patch b/pip-CVE-2007-4559.patch new file mode 100644 index 0000000..635f90c --- /dev/null +++ b/pip-CVE-2007-4559.patch @@ -0,0 +1,42 @@ +diff --git a/pip/_internal/utils/misc.py b/pip/_internal/utils/misc.py +index 84a421f..fbdb654 100644 +--- a/pip/_internal/utils/misc.py ++++ b/pip/_internal/utils/misc.py +@@ -532,6 +532,13 @@ def untar_file(filename, location): + if leading: + fn = split_leading_dir(fn)[1] + path = os.path.join(location, fn) ++ ++ # Call the `data` filter for its side effect (raising exception) ++ try: ++ tarfile.data_filter(member.replace(name=fn), location) ++ except tarfile.LinkOutsideDestinationError: ++ pass ++ + if member.isdir(): + ensure_dir(path) + elif member.issym(): +diff --git a/pip/_vendor/distlib/util.py b/pip/_vendor/distlib/util.py +index 0b14a93..8f3f12e 100644 +--- a/pip/_vendor/distlib/util.py ++++ b/pip/_vendor/distlib/util.py +@@ -1238,6 +1238,19 @@ def unarchive(archive_filename, dest_dir, format=None, check=True): + for tarinfo in archive.getmembers(): + if not isinstance(tarinfo.name, text_type): + tarinfo.name = tarinfo.name.decode('utf-8') ++ ++ # Limit extraction of dangerous items, if this Python ++ # allows it easily. If not, just trust the input. ++ # See: https://docs.python.org/3/library/tarfile.html#extraction-filters ++ def extraction_filter(member, path): ++ """Run tarfile.tar_fillter, but raise the expected ValueError""" ++ # This is only called if the current Python has tarfile filters ++ try: ++ return tarfile.tar_filter(member, path) ++ except tarfile.FilterError as exc: ++ raise ValueError(str(exc)) ++ archive.extraction_filter = extraction_filter ++ + archive.extractall(dest_dir) + + finally: diff --git a/python3.6.spec b/python3.6.spec index efbfe7d..bd35776 100644 --- a/python3.6.spec +++ b/python3.6.spec @@ -17,7 +17,7 @@ URL: https://www.python.org/ #global prerel ... %global upstream_version %{general_version}%{?prerel} Version: %{general_version}%{?prerel:~%{prerel}} -Release: 23%{?dist} +Release: 27%{?dist} # Python is Python # pip MIT is and bundles: # appdirs: MIT @@ -319,6 +319,9 @@ Source10: idle3.desktop # AppData file for idle3 Source11: idle3.appdata.xml +# Patch for the bundled pip wheel for CVE-2007-4559 +Source12: pip-CVE-2007-4559.patch + # (Patches taken from github.com/fedora-python/cpython) # 00001 # d06a8853cf4bae9e115f45e1d531d2dc152c5cc8 @@ -614,6 +617,20 @@ Patch392: 00392-cve-2022-37454-fix-buffer-overflows-in-_sha3-module.patch # the behavior to linear. Patch394: 00394-cve-2022-45061-cpu-denial-of-service-via-inefficient-idna-decoder.patch +# 00397 # e867e27272cd259b76133784ef3f2811e671f3db +# PEP 706, CVE-2007-4559: Filter API for tarfile.extractall +# +# Add API for allowing checks on the content of tar files, allowing callers to mitigate +# directory traversal (CVE-2007-4559) and related issues. +# +# Python 3.12 will warn if this API is not used. +# Python 3.14 will fail if it's not used. +# +# Backport from https://github.com/python/cpython/issues/102950 +# +# Change document: https://peps.python.org/pep-0706/ +Patch397: 00397-pep-706-cve-2007-4559-filter-api-for-tarfile-extractall.patch + # 00399 # dc0a803eea47d3b4f0657816b112b5a33491500f # CVE-2023-24329 # @@ -1010,6 +1027,12 @@ rm Lib/ensurepip/_bundled/*.whl # Apply the remaining patches %autopatch -m 190 +# Patch the bundled pip wheel for CVE-2007-4559 +unzip -qq Lib/ensurepip/_bundled/pip-%{pip_version}-py2.py3-none-any.whl +patch -p1 < %{SOURCE12} +zip -rq Lib/ensurepip/_bundled/pip-%{pip_version}-py2.py3-none-any.whl pip pip-%{pip_version}.dist-info +rm -rf pip/ pip-%{pip_version}.dist-info/ + # Remove bundled libraries to ensure that we're using the system copy. rm -r Modules/expat rm -r Modules/zlib @@ -1919,6 +1942,10 @@ CheckPython optimized # ====================================================== %changelog +* Thu Feb 29 2024 Charalampos Stratakis - 3.6.15-27 +- Security fix for CVE-2007-4559 +- Fixes: rhbz#2141080 + * Wed Feb 28 2024 Charalampos Stratakis - 3.6.15-23 - Fix tests for XMLPullParser with Expat 2.6.0 From 4a51f6d585cc3468f473d045d63690a82890ad05 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Wed, 24 Apr 2024 22:11:14 +0200 Subject: [PATCH 3/8] Security fix for CVE-2024-0450 and CVE-2023-6597 --- 00426-cve-2023-6597.patch | 295 ++++++++++++++++ ...pextfile-tell-and-seek-cve-2024-0450.patch | 314 ++++++++++++++++++ python3.6.spec | 24 +- 3 files changed, 632 insertions(+), 1 deletion(-) create mode 100644 00426-cve-2023-6597.patch create mode 100644 00427-zipextfile-tell-and-seek-cve-2024-0450.patch diff --git a/00426-cve-2023-6597.patch b/00426-cve-2023-6597.patch new file mode 100644 index 0000000..4900012 --- /dev/null +++ b/00426-cve-2023-6597.patch @@ -0,0 +1,295 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Lumir Balhar +Date: Wed, 24 Apr 2024 00:19:23 +0200 +Subject: [PATCH] 00426: CVE-2023-6597 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Combines Two fixes for tempfile.TemporaryDirectory: +https://github.com/python/cpython/commit/e9b51c0ad81da1da11ae65840ac8b50a8521373c +https://github.com/python/cpython/commit/02a9259c717738dfe6b463c44d7e17f2b6d2cb3a + +Co-authored-by: Søren Løvborg +Co-authored-by: Serhiy Storchaka +--- + Lib/tempfile.py | 44 +++++++++- + Lib/test/test_tempfile.py | 166 +++++++++++++++++++++++++++++++++++--- + 2 files changed, 199 insertions(+), 11 deletions(-) + +diff --git a/Lib/tempfile.py b/Lib/tempfile.py +index 2cb5434ba7..8e401f548a 100644 +--- a/Lib/tempfile.py ++++ b/Lib/tempfile.py +@@ -276,6 +276,23 @@ def _mkstemp_inner(dir, pre, suf, flags, output_type): + "No usable temporary file name found") + + ++def _dont_follow_symlinks(func, path, *args): ++ # Pass follow_symlinks=False, unless not supported on this platform. ++ if func in _os.supports_follow_symlinks: ++ func(path, *args, follow_symlinks=False) ++ elif _os.name == 'nt' or not _os.path.islink(path): ++ func(path, *args) ++ ++ ++def _resetperms(path): ++ try: ++ chflags = _os.chflags ++ except AttributeError: ++ pass ++ else: ++ _dont_follow_symlinks(chflags, path, 0) ++ _dont_follow_symlinks(_os.chmod, path, 0o700) ++ + # User visible interfaces. + + def gettempprefix(): +@@ -794,9 +811,32 @@ class TemporaryDirectory(object): + self, self._cleanup, self.name, + warn_message="Implicitly cleaning up {!r}".format(self)) + ++ @classmethod ++ def _rmtree(cls, name): ++ def onerror(func, path, exc_info): ++ if issubclass(exc_info[0], PermissionError): ++ try: ++ if path != name: ++ _resetperms(_os.path.dirname(path)) ++ _resetperms(path) ++ ++ try: ++ _os.unlink(path) ++ # PermissionError is raised on FreeBSD for directories ++ except (IsADirectoryError, PermissionError): ++ cls._rmtree(path) ++ except FileNotFoundError: ++ pass ++ elif issubclass(exc_info[0], FileNotFoundError): ++ pass ++ else: ++ raise ++ ++ _shutil.rmtree(name, onerror=onerror) ++ + @classmethod + def _cleanup(cls, name, warn_message): +- _shutil.rmtree(name) ++ cls._rmtree(name) + _warnings.warn(warn_message, ResourceWarning) + + def __repr__(self): +@@ -810,4 +850,4 @@ class TemporaryDirectory(object): + + def cleanup(self): + if self._finalizer.detach(): +- _shutil.rmtree(self.name) ++ self._rmtree(self.name) +diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py +index 710756bde6..c5560e12e7 100644 +--- a/Lib/test/test_tempfile.py ++++ b/Lib/test/test_tempfile.py +@@ -1298,19 +1298,25 @@ class NulledModules: + class TestTemporaryDirectory(BaseTestCase): + """Test TemporaryDirectory().""" + +- def do_create(self, dir=None, pre="", suf="", recurse=1): ++ def do_create(self, dir=None, pre="", suf="", recurse=1, dirs=1, files=1): + if dir is None: + dir = tempfile.gettempdir() + tmp = tempfile.TemporaryDirectory(dir=dir, prefix=pre, suffix=suf) + self.nameCheck(tmp.name, dir, pre, suf) +- # Create a subdirectory and some files +- if recurse: +- d1 = self.do_create(tmp.name, pre, suf, recurse-1) +- d1.name = None +- with open(os.path.join(tmp.name, "test.txt"), "wb") as f: +- f.write(b"Hello world!") ++ self.do_create2(tmp.name, recurse, dirs, files) + return tmp + ++ def do_create2(self, path, recurse=1, dirs=1, files=1): ++ # Create subdirectories and some files ++ if recurse: ++ for i in range(dirs): ++ name = os.path.join(path, "dir%d" % i) ++ os.mkdir(name) ++ self.do_create2(name, recurse-1, dirs, files) ++ for i in range(files): ++ with open(os.path.join(path, "test%d.txt" % i), "wb") as f: ++ f.write(b"Hello world!") ++ + def test_mkdtemp_failure(self): + # Check no additional exception if mkdtemp fails + # Previously would raise AttributeError instead +@@ -1350,11 +1356,108 @@ class TestTemporaryDirectory(BaseTestCase): + "TemporaryDirectory %s exists after cleanup" % d1.name) + self.assertTrue(os.path.exists(d2.name), + "Directory pointed to by a symlink was deleted") +- self.assertEqual(os.listdir(d2.name), ['test.txt'], ++ self.assertEqual(os.listdir(d2.name), ['test0.txt'], + "Contents of the directory pointed to by a symlink " + "were deleted") + d2.cleanup() + ++ @support.skip_unless_symlink ++ def test_cleanup_with_symlink_modes(self): ++ # cleanup() should not follow symlinks when fixing mode bits (#91133) ++ with self.do_create(recurse=0) as d2: ++ file1 = os.path.join(d2, 'file1') ++ open(file1, 'wb').close() ++ dir1 = os.path.join(d2, 'dir1') ++ os.mkdir(dir1) ++ for mode in range(8): ++ mode <<= 6 ++ with self.subTest(mode=format(mode, '03o')): ++ def test(target, target_is_directory): ++ d1 = self.do_create(recurse=0) ++ symlink = os.path.join(d1.name, 'symlink') ++ os.symlink(target, symlink, ++ target_is_directory=target_is_directory) ++ try: ++ os.chmod(symlink, mode, follow_symlinks=False) ++ except NotImplementedError: ++ pass ++ try: ++ os.chmod(symlink, mode) ++ except FileNotFoundError: ++ pass ++ os.chmod(d1.name, mode) ++ d1.cleanup() ++ self.assertFalse(os.path.exists(d1.name)) ++ ++ with self.subTest('nonexisting file'): ++ test('nonexisting', target_is_directory=False) ++ with self.subTest('nonexisting dir'): ++ test('nonexisting', target_is_directory=True) ++ ++ with self.subTest('existing file'): ++ os.chmod(file1, mode) ++ old_mode = os.stat(file1).st_mode ++ test(file1, target_is_directory=False) ++ new_mode = os.stat(file1).st_mode ++ self.assertEqual(new_mode, old_mode, ++ '%03o != %03o' % (new_mode, old_mode)) ++ ++ with self.subTest('existing dir'): ++ os.chmod(dir1, mode) ++ old_mode = os.stat(dir1).st_mode ++ test(dir1, target_is_directory=True) ++ new_mode = os.stat(dir1).st_mode ++ self.assertEqual(new_mode, old_mode, ++ '%03o != %03o' % (new_mode, old_mode)) ++ ++ @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.chflags') ++ @support.skip_unless_symlink ++ def test_cleanup_with_symlink_flags(self): ++ # cleanup() should not follow symlinks when fixing flags (#91133) ++ flags = stat.UF_IMMUTABLE | stat.UF_NOUNLINK ++ self.check_flags(flags) ++ ++ with self.do_create(recurse=0) as d2: ++ file1 = os.path.join(d2, 'file1') ++ open(file1, 'wb').close() ++ dir1 = os.path.join(d2, 'dir1') ++ os.mkdir(dir1) ++ def test(target, target_is_directory): ++ d1 = self.do_create(recurse=0) ++ symlink = os.path.join(d1.name, 'symlink') ++ os.symlink(target, symlink, ++ target_is_directory=target_is_directory) ++ try: ++ os.chflags(symlink, flags, follow_symlinks=False) ++ except NotImplementedError: ++ pass ++ try: ++ os.chflags(symlink, flags) ++ except FileNotFoundError: ++ pass ++ os.chflags(d1.name, flags) ++ d1.cleanup() ++ self.assertFalse(os.path.exists(d1.name)) ++ ++ with self.subTest('nonexisting file'): ++ test('nonexisting', target_is_directory=False) ++ with self.subTest('nonexisting dir'): ++ test('nonexisting', target_is_directory=True) ++ ++ with self.subTest('existing file'): ++ os.chflags(file1, flags) ++ old_flags = os.stat(file1).st_flags ++ test(file1, target_is_directory=False) ++ new_flags = os.stat(file1).st_flags ++ self.assertEqual(new_flags, old_flags) ++ ++ with self.subTest('existing dir'): ++ os.chflags(dir1, flags) ++ old_flags = os.stat(dir1).st_flags ++ test(dir1, target_is_directory=True) ++ new_flags = os.stat(dir1).st_flags ++ self.assertEqual(new_flags, old_flags) ++ + @support.cpython_only + def test_del_on_collection(self): + # A TemporaryDirectory is deleted when garbage collected +@@ -1385,7 +1488,7 @@ class TestTemporaryDirectory(BaseTestCase): + + tmp2 = os.path.join(tmp.name, 'test_dir') + os.mkdir(tmp2) +- with open(os.path.join(tmp2, "test.txt"), "w") as f: ++ with open(os.path.join(tmp2, "test0.txt"), "w") as f: + f.write("Hello world!") + + {mod}.tmp = tmp +@@ -1453,6 +1556,51 @@ class TestTemporaryDirectory(BaseTestCase): + self.assertEqual(name, d.name) + self.assertFalse(os.path.exists(name)) + ++ def test_modes(self): ++ for mode in range(8): ++ mode <<= 6 ++ with self.subTest(mode=format(mode, '03o')): ++ d = self.do_create(recurse=3, dirs=2, files=2) ++ with d: ++ # Change files and directories mode recursively. ++ for root, dirs, files in os.walk(d.name, topdown=False): ++ for name in files: ++ os.chmod(os.path.join(root, name), mode) ++ os.chmod(root, mode) ++ d.cleanup() ++ self.assertFalse(os.path.exists(d.name)) ++ ++ def check_flags(self, flags): ++ # skip the test if these flags are not supported (ex: FreeBSD 13) ++ filename = support.TESTFN ++ try: ++ open(filename, "w").close() ++ try: ++ os.chflags(filename, flags) ++ except OSError as exc: ++ # "OSError: [Errno 45] Operation not supported" ++ self.skipTest(f"chflags() doesn't support flags " ++ f"{flags:#b}: {exc}") ++ else: ++ os.chflags(filename, 0) ++ finally: ++ support.unlink(filename) ++ ++ @unittest.skipUnless(hasattr(os, 'chflags'), 'requires os.lchflags') ++ def test_flags(self): ++ flags = stat.UF_IMMUTABLE | stat.UF_NOUNLINK ++ self.check_flags(flags) ++ ++ d = self.do_create(recurse=3, dirs=2, files=2) ++ with d: ++ # Change files and directories flags recursively. ++ for root, dirs, files in os.walk(d.name, topdown=False): ++ for name in files: ++ os.chflags(os.path.join(root, name), flags) ++ os.chflags(root, flags) ++ d.cleanup() ++ self.assertFalse(os.path.exists(d.name)) ++ + + if __name__ == "__main__": + unittest.main() diff --git a/00427-zipextfile-tell-and-seek-cve-2024-0450.patch b/00427-zipextfile-tell-and-seek-cve-2024-0450.patch new file mode 100644 index 0000000..2771a4a --- /dev/null +++ b/00427-zipextfile-tell-and-seek-cve-2024-0450.patch @@ -0,0 +1,314 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: John Jolly +Date: Tue, 30 Jan 2018 01:51:35 -0700 +Subject: [PATCH] 00427: ZipExtFile tell and seek, CVE-2024-0450 + +Backport of seek and tell methods for ZipExtFile makes it +possible to backport the fix for CVE-2024-0450. + +Combines: +https://github.com/python/cpython/commit/066df4fd454d6ff9be66e80b2a65995b10af174f +https://github.com/python/cpython/commit/66363b9a7b9fe7c99eba3a185b74c5fdbf842eba +--- + Doc/library/zipfile.rst | 6 +- + Lib/test/test_zipfile.py | 94 +++++++++++++++++++ + Lib/zipfile.py | 94 +++++++++++++++++++ + .../2017-12-21-22-00-11.bpo-22908.cVm89I.rst | 2 + + ...-09-28-13-15-51.gh-issue-109858.43e2dg.rst | 3 + + 5 files changed, 196 insertions(+), 3 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2017-12-21-22-00-11.bpo-22908.cVm89I.rst + create mode 100644 Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst + +diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst +index b65b61d8da..5c28ce52c2 100644 +--- a/Doc/library/zipfile.rst ++++ b/Doc/library/zipfile.rst +@@ -230,9 +230,9 @@ ZipFile Objects + With *mode* ``'r'`` the file-like object + (``ZipExtFile``) is read-only and provides the following methods: + :meth:`~io.BufferedIOBase.read`, :meth:`~io.IOBase.readline`, +- :meth:`~io.IOBase.readlines`, :meth:`__iter__`, +- :meth:`~iterator.__next__`. These objects can operate independently of +- the ZipFile. ++ :meth:`~io.IOBase.readlines`, :meth:`~io.IOBase.seek`, ++ :meth:`~io.IOBase.tell`, :meth:`__iter__`, :meth:`~iterator.__next__`. ++ These objects can operate independently of the ZipFile. + + With ``mode='w'``, a writable file handle is returned, which supports the + :meth:`~io.BufferedIOBase.write` method. While a writable file handle is open, +diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py +index e62b82e1d3..03799090b9 100644 +--- a/Lib/test/test_zipfile.py ++++ b/Lib/test/test_zipfile.py +@@ -1610,6 +1610,100 @@ class OtherTests(unittest.TestCase): + self.assertEqual(zipf.read('baz'), msg3) + self.assertEqual(zipf.namelist(), ['foo', 'bar', 'baz']) + ++ def test_seek_tell(self): ++ # Test seek functionality ++ txt = b"Where's Bruce?" ++ bloc = txt.find(b"Bruce") ++ # Check seek on a file ++ with zipfile.ZipFile(TESTFN, "w") as zipf: ++ zipf.writestr("foo.txt", txt) ++ with zipfile.ZipFile(TESTFN, "r") as zipf: ++ with zipf.open("foo.txt", "r") as fp: ++ fp.seek(bloc, os.SEEK_SET) ++ self.assertEqual(fp.tell(), bloc) ++ fp.seek(-bloc, os.SEEK_CUR) ++ self.assertEqual(fp.tell(), 0) ++ fp.seek(bloc, os.SEEK_CUR) ++ self.assertEqual(fp.tell(), bloc) ++ self.assertEqual(fp.read(5), txt[bloc:bloc+5]) ++ fp.seek(0, os.SEEK_END) ++ self.assertEqual(fp.tell(), len(txt)) ++ # Check seek on memory file ++ data = io.BytesIO() ++ with zipfile.ZipFile(data, mode="w") as zipf: ++ zipf.writestr("foo.txt", txt) ++ with zipfile.ZipFile(data, mode="r") as zipf: ++ with zipf.open("foo.txt", "r") as fp: ++ fp.seek(bloc, os.SEEK_SET) ++ self.assertEqual(fp.tell(), bloc) ++ fp.seek(-bloc, os.SEEK_CUR) ++ self.assertEqual(fp.tell(), 0) ++ fp.seek(bloc, os.SEEK_CUR) ++ self.assertEqual(fp.tell(), bloc) ++ self.assertEqual(fp.read(5), txt[bloc:bloc+5]) ++ fp.seek(0, os.SEEK_END) ++ self.assertEqual(fp.tell(), len(txt)) ++ ++ @requires_zlib ++ def test_full_overlap(self): ++ data = ( ++ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' ++ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' ++ b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' ++ b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' ++ b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' ++ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK' ++ b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' ++ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00' ++ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05' ++ b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00' ++ b'\x00\x00\x00' ++ ) ++ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: ++ self.assertEqual(zipf.namelist(), ['a', 'b']) ++ zi = zipf.getinfo('a') ++ self.assertEqual(zi.header_offset, 0) ++ self.assertEqual(zi.compress_size, 16) ++ self.assertEqual(zi.file_size, 1033) ++ zi = zipf.getinfo('b') ++ self.assertEqual(zi.header_offset, 0) ++ self.assertEqual(zi.compress_size, 16) ++ self.assertEqual(zi.file_size, 1033) ++ self.assertEqual(len(zipf.read('a')), 1033) ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): ++ zipf.read('b') ++ ++ @requires_zlib ++ def test_quoted_overlap(self): ++ data = ( ++ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc' ++ b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00' ++ b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l' ++ b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' ++ b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\' ++ b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0' ++ b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01' ++ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' ++ b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l' ++ b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' ++ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00' ++ b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00' ++ b'\x00S\x00\x00\x00\x00\x00' ++ ) ++ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: ++ self.assertEqual(zipf.namelist(), ['a', 'b']) ++ zi = zipf.getinfo('a') ++ self.assertEqual(zi.header_offset, 0) ++ self.assertEqual(zi.compress_size, 52) ++ self.assertEqual(zi.file_size, 1064) ++ zi = zipf.getinfo('b') ++ self.assertEqual(zi.header_offset, 36) ++ self.assertEqual(zi.compress_size, 16) ++ self.assertEqual(zi.file_size, 1033) ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'): ++ zipf.read('a') ++ self.assertEqual(len(zipf.read('b')), 1033) ++ + def tearDown(self): + unlink(TESTFN) + unlink(TESTFN2) +diff --git a/Lib/zipfile.py b/Lib/zipfile.py +index edde0c5fd4..e6d7676079 100644 +--- a/Lib/zipfile.py ++++ b/Lib/zipfile.py +@@ -338,6 +338,7 @@ class ZipInfo (object): + 'compress_size', + 'file_size', + '_raw_time', ++ '_end_offset', + ) + + def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): +@@ -376,6 +377,7 @@ class ZipInfo (object): + self.volume = 0 # Volume number of file header + self.internal_attr = 0 # Internal attributes + self.external_attr = 0 # External file attributes ++ self._end_offset = None # Start of the next local header or central directory + # Other attributes are set by class ZipFile: + # header_offset Byte offset to the file header + # CRC CRC-32 of the uncompressed file +@@ -718,6 +720,18 @@ class _SharedFile: + self._close = close + self._lock = lock + self._writing = writing ++ self.seekable = file.seekable ++ self.tell = file.tell ++ ++ def seek(self, offset, whence=0): ++ with self._lock: ++ if self.writing(): ++ raise ValueError("Can't reposition in the ZIP file while " ++ "there is an open writing handle on it. " ++ "Close the writing handle before trying to read.") ++ self._file.seek(self._pos) ++ self._pos = self._file.tell() ++ return self._pos + + def read(self, n=-1): + with self._lock: +@@ -768,6 +782,9 @@ class ZipExtFile(io.BufferedIOBase): + # Read from compressed files in 4k blocks. + MIN_READ_SIZE = 4096 + ++ # Chunk size to read during seek ++ MAX_SEEK_READ = 1 << 24 ++ + def __init__(self, fileobj, mode, zipinfo, decrypter=None, + close_fileobj=False): + self._fileobj = fileobj +@@ -800,6 +817,17 @@ class ZipExtFile(io.BufferedIOBase): + else: + self._expected_crc = None + ++ self._seekable = False ++ try: ++ if fileobj.seekable(): ++ self._orig_compress_start = fileobj.tell() ++ self._orig_compress_size = zipinfo.compress_size ++ self._orig_file_size = zipinfo.file_size ++ self._orig_start_crc = self._running_crc ++ self._seekable = True ++ except AttributeError: ++ pass ++ + def __repr__(self): + result = ['<%s.%s' % (self.__class__.__module__, + self.__class__.__qualname__)] +@@ -985,6 +1013,62 @@ class ZipExtFile(io.BufferedIOBase): + finally: + super().close() + ++ def seekable(self): ++ return self._seekable ++ ++ def seek(self, offset, whence=0): ++ if not self._seekable: ++ raise io.UnsupportedOperation("underlying stream is not seekable") ++ curr_pos = self.tell() ++ if whence == 0: # Seek from start of file ++ new_pos = offset ++ elif whence == 1: # Seek from current position ++ new_pos = curr_pos + offset ++ elif whence == 2: # Seek from EOF ++ new_pos = self._orig_file_size + offset ++ else: ++ raise ValueError("whence must be os.SEEK_SET (0), " ++ "os.SEEK_CUR (1), or os.SEEK_END (2)") ++ ++ if new_pos > self._orig_file_size: ++ new_pos = self._orig_file_size ++ ++ if new_pos < 0: ++ new_pos = 0 ++ ++ read_offset = new_pos - curr_pos ++ buff_offset = read_offset + self._offset ++ ++ if buff_offset >= 0 and buff_offset < len(self._readbuffer): ++ # Just move the _offset index if the new position is in the _readbuffer ++ self._offset = buff_offset ++ read_offset = 0 ++ elif read_offset < 0: ++ # Position is before the current position. Reset the ZipExtFile ++ ++ self._fileobj.seek(self._orig_compress_start) ++ self._running_crc = self._orig_start_crc ++ self._compress_left = self._orig_compress_size ++ self._left = self._orig_file_size ++ self._readbuffer = b'' ++ self._offset = 0 ++ self._decompressor = zipfile._get_decompressor(self._compress_type) ++ self._eof = False ++ read_offset = new_pos ++ ++ while read_offset > 0: ++ read_len = min(self.MAX_SEEK_READ, read_offset) ++ self.read(read_len) ++ read_offset -= read_len ++ ++ return self.tell() ++ ++ def tell(self): ++ if not self._seekable: ++ raise io.UnsupportedOperation("underlying stream is not seekable") ++ filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset ++ return filepos ++ + + class _ZipWriteFile(io.BufferedIOBase): + def __init__(self, zf, zinfo, zip64): +@@ -1264,6 +1348,12 @@ class ZipFile: + if self.debug > 2: + print("total", total) + ++ end_offset = self.start_dir ++ for zinfo in sorted(self.filelist, ++ key=lambda zinfo: zinfo.header_offset, ++ reverse=True): ++ zinfo._end_offset = end_offset ++ end_offset = zinfo.header_offset + + def namelist(self): + """Return a list of file names in the archive.""" +@@ -1418,6 +1508,10 @@ class ZipFile: + 'File name in directory %r and header %r differ.' + % (zinfo.orig_filename, fname)) + ++ if (zinfo._end_offset is not None and ++ zef_file.tell() + zinfo.compress_size > zinfo._end_offset): ++ raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)") ++ + # check for encrypted flag & handle password + is_encrypted = zinfo.flag_bits & 0x1 + zd = None +diff --git a/Misc/NEWS.d/next/Library/2017-12-21-22-00-11.bpo-22908.cVm89I.rst b/Misc/NEWS.d/next/Library/2017-12-21-22-00-11.bpo-22908.cVm89I.rst +new file mode 100644 +index 0000000000..4f3cc01660 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2017-12-21-22-00-11.bpo-22908.cVm89I.rst +@@ -0,0 +1,2 @@ ++Added seek and tell to the ZipExtFile class. This only works if the file ++object used to open the zipfile is seekable. +diff --git a/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst +new file mode 100644 +index 0000000000..be279caffc +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst +@@ -0,0 +1,3 @@ ++Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises ++BadZipFile when try to read an entry that overlaps with other entry or ++central directory. diff --git a/python3.6.spec b/python3.6.spec index bd35776..127c479 100644 --- a/python3.6.spec +++ b/python3.6.spec @@ -17,7 +17,7 @@ URL: https://www.python.org/ #global prerel ... %global upstream_version %{general_version}%{?prerel} Version: %{general_version}%{?prerel:~%{prerel}} -Release: 27%{?dist} +Release: 28%{?dist} # Python is Python # pip MIT is and bundles: # appdirs: MIT @@ -679,6 +679,25 @@ Patch415: 00415-cve-2023-27043-gh-102988-reject-malformed-addresses-in-email-par # CVE-2023-52425. Future versions of Expat may be more reactive. Patch422: 00422-gh-115133-fix-tests-for-xmlpullparser-with-expat-2-6-0.patch +# 00426 # 05ddec93394a09199c3bbb2d71a4a2566fd50332 +# CVE-2023-6597 +# +# Combines Two fixes for tempfile.TemporaryDirectory: +# https://github.com/python/cpython/commit/e9b51c0ad81da1da11ae65840ac8b50a8521373c +# https://github.com/python/cpython/commit/02a9259c717738dfe6b463c44d7e17f2b6d2cb3a +Patch426: 00426-cve-2023-6597.patch + +# 00427 # 37c3b42b8931ed4eca0272bf53086eb28ca8544e +# ZipExtFile tell and seek, CVE-2024-0450 +# +# Backport of seek and tell methods for ZipExtFile makes it +# possible to backport the fix for CVE-2024-0450. +# +# Combines: +# https://github.com/python/cpython/commit/066df4fd454d6ff9be66e80b2a65995b10af174f +# https://github.com/python/cpython/commit/66363b9a7b9fe7c99eba3a185b74c5fdbf842eba +Patch427: 00427-zipextfile-tell-and-seek-cve-2024-0450.patch + # (New patches go here ^^^) # # When adding new patches to "python" and "python3" in Fedora, EL, etc., @@ -1942,6 +1961,9 @@ CheckPython optimized # ====================================================== %changelog +* Wed Apr 24 2024 Lumír Balhar - 3.6.15-28 +- Security fix for CVE-2024-0450 and CVE-2023-6597 + * Thu Feb 29 2024 Charalampos Stratakis - 3.6.15-27 - Security fix for CVE-2007-4559 - Fixes: rhbz#2141080 From be26dc77a7deee98543e2adf268d8edff60188bb Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Tue, 2 Jul 2024 19:46:28 +0200 Subject: [PATCH 4/8] Security fix for CVE-2024-4032 (rhbz#2293394) --- 00431-cve-2024-4032.patch | 356 ++++++++++++++++++++++++++++++++++++++ python3.6.spec | 13 +- 2 files changed, 368 insertions(+), 1 deletion(-) create mode 100644 00431-cve-2024-4032.patch diff --git a/00431-cve-2024-4032.patch b/00431-cve-2024-4032.patch new file mode 100644 index 0000000..52cf4ec --- /dev/null +++ b/00431-cve-2024-4032.patch @@ -0,0 +1,356 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Petr Viktorin +Date: Tue, 7 May 2024 11:58:20 +0200 +Subject: [PATCH] 00431: CVE-2024-4032: incorrect IPv4 and IPv6 private ranges + +Upstream issue: https://github.com/python/cpython/issues/113171 + +Backported from 3.8. +--- + Doc/library/ipaddress.rst | 43 ++++++++- + Doc/tools/susp-ignored.csv | 8 ++ + Lib/ipaddress.py | 95 +++++++++++++++---- + Lib/test/test_ipaddress.py | 52 ++++++++++ + ...-03-14-01-38-44.gh-issue-113171.VFnObz.rst | 9 ++ + 5 files changed, 186 insertions(+), 21 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst + +diff --git a/Doc/library/ipaddress.rst b/Doc/library/ipaddress.rst +index 4ce1ed1ced..18613babc9 100644 +--- a/Doc/library/ipaddress.rst ++++ b/Doc/library/ipaddress.rst +@@ -166,18 +166,53 @@ write code that handles both IP versions correctly. Address objects are + + .. attribute:: is_private + +- ``True`` if the address is allocated for private networks. See ++ ``True`` if the address is defined as not globally reachable by + iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ +- (for IPv6). ++ (for IPv6) with the following exceptions: ++ ++ * ``is_private`` is ``False`` for the shared address space (``100.64.0.0/10``) ++ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_private == address.ipv4_mapped.is_private ++ ++ ``is_private`` has value opposite to :attr:`is_global`, except for the shared address space ++ (``100.64.0.0/10`` range) where they are both ``False``. ++ ++ .. versionchanged:: 3.8.20 ++ ++ Fixed some false positives and false negatives. ++ ++ * ``192.0.0.0/24`` is considered private with the exception of ``192.0.0.9/32`` and ++ ``192.0.0.10/32`` (previously: only the ``192.0.0.0/29`` sub-range was considered private). ++ * ``64:ff9b:1::/48`` is considered private. ++ * ``2002::/16`` is considered private. ++ * There are exceptions within ``2001::/23`` (otherwise considered private): ``2001:1::1/128``, ++ ``2001:1::2/128``, ``2001:3::/32``, ``2001:4:112::/48``, ``2001:20::/28``, ``2001:30::/28``. ++ The exceptions are not considered private. + + .. attribute:: is_global + +- ``True`` if the address is allocated for public networks. See ++ ``True`` if the address is defined as globally reachable by + iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ +- (for IPv6). ++ (for IPv6) with the following exception: ++ ++ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_global == address.ipv4_mapped.is_global ++ ++ ``is_global`` has value opposite to :attr:`is_private`, except for the shared address space ++ (``100.64.0.0/10`` range) where they are both ``False``. + + .. versionadded:: 3.4 + ++ .. versionchanged:: 3.8.20 ++ ++ Fixed some false positives and false negatives, see :attr:`is_private` for details. ++ + .. attribute:: is_unspecified + + ``True`` if the address is unspecified. See :RFC:`5735` (for IPv4) +diff --git a/Doc/tools/susp-ignored.csv b/Doc/tools/susp-ignored.csv +index ed434ce77d..6bc0741b12 100644 +--- a/Doc/tools/susp-ignored.csv ++++ b/Doc/tools/susp-ignored.csv +@@ -160,6 +160,14 @@ library/ipaddress,,:db00,2001:db00::0/24 + library/ipaddress,,::,2001:db00::0/24 + library/ipaddress,,:db00,2001:db00::0/ffff:ff00:: + library/ipaddress,,::,2001:db00::0/ffff:ff00:: ++library/ipaddress,,:ff9b,64:ff9b:1::/48 ++library/ipaddress,,::,64:ff9b:1::/48 ++library/ipaddress,,::,2001:: ++library/ipaddress,,::,2001:1:: ++library/ipaddress,,::,2001:3:: ++library/ipaddress,,::,2001:4:112:: ++library/ipaddress,,::,2001:20:: ++library/ipaddress,,::,2001:30:: + library/itertools,,:step,elements from seq[start:stop:step] + library/itertools,,:stop,elements from seq[start:stop:step] + library/logging.handlers,,:port,host:port +diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py +index 98492136ca..55d4d62d70 100644 +--- a/Lib/ipaddress.py ++++ b/Lib/ipaddress.py +@@ -1302,18 +1302,41 @@ class IPv4Address(_BaseV4, _BaseAddress): + @property + @functools.lru_cache() + def is_private(self): +- """Test if this address is allocated for private networks. ++ """``True`` if the address is defined as not globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exceptions: + +- Returns: +- A boolean, True if the address is reserved per +- iana-ipv4-special-registry. ++ * ``is_private`` is ``False`` for ``100.64.0.0/10`` ++ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: + ++ address.is_private == address.ipv4_mapped.is_private ++ ++ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. + """ +- return any(self in net for net in self._constants._private_networks) ++ return ( ++ any(self in net for net in self._constants._private_networks) ++ and all(self not in net for net in self._constants._private_networks_exceptions) ++ ) + + @property + @functools.lru_cache() + def is_global(self): ++ """``True`` if the address is defined as globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exception: ++ ++ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_global == address.ipv4_mapped.is_global ++ ++ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. ++ """ + return self not in self._constants._public_network and not self.is_private + + @property +@@ -1548,13 +1571,15 @@ class _IPv4Constants: + + _public_network = IPv4Network('100.64.0.0/10') + ++ # Not globally reachable address blocks listed on ++ # https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml + _private_networks = [ + IPv4Network('0.0.0.0/8'), + IPv4Network('10.0.0.0/8'), + IPv4Network('127.0.0.0/8'), + IPv4Network('169.254.0.0/16'), + IPv4Network('172.16.0.0/12'), +- IPv4Network('192.0.0.0/29'), ++ IPv4Network('192.0.0.0/24'), + IPv4Network('192.0.0.170/31'), + IPv4Network('192.0.2.0/24'), + IPv4Network('192.168.0.0/16'), +@@ -1565,6 +1590,11 @@ class _IPv4Constants: + IPv4Network('255.255.255.255/32'), + ] + ++ _private_networks_exceptions = [ ++ IPv4Network('192.0.0.9/32'), ++ IPv4Network('192.0.0.10/32'), ++ ] ++ + _reserved_network = IPv4Network('240.0.0.0/4') + + _unspecified_address = IPv4Address('0.0.0.0') +@@ -1953,23 +1983,42 @@ class IPv6Address(_BaseV6, _BaseAddress): + @property + @functools.lru_cache() + def is_private(self): +- """Test if this address is allocated for private networks. ++ """``True`` if the address is defined as not globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exceptions: + +- Returns: +- A boolean, True if the address is reserved per +- iana-ipv6-special-registry. ++ * ``is_private`` is ``False`` for ``100.64.0.0/10`` ++ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: + ++ address.is_private == address.ipv4_mapped.is_private ++ ++ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. + """ +- return any(self in net for net in self._constants._private_networks) ++ ipv4_mapped = self.ipv4_mapped ++ if ipv4_mapped is not None: ++ return ipv4_mapped.is_private ++ return ( ++ any(self in net for net in self._constants._private_networks) ++ and all(self not in net for net in self._constants._private_networks_exceptions) ++ ) + + @property + def is_global(self): +- """Test if this address is allocated for public networks. ++ """``True`` if the address is defined as globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exception: + +- Returns: +- A boolean, true if the address is not reserved per +- iana-ipv6-special-registry. ++ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: + ++ address.is_global == address.ipv4_mapped.is_global ++ ++ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. + """ + return not self.is_private + +@@ -2236,19 +2285,31 @@ class _IPv6Constants: + + _multicast_network = IPv6Network('ff00::/8') + ++ # Not globally reachable address blocks listed on ++ # https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml + _private_networks = [ + IPv6Network('::1/128'), + IPv6Network('::/128'), + IPv6Network('::ffff:0:0/96'), ++ IPv6Network('64:ff9b:1::/48'), + IPv6Network('100::/64'), + IPv6Network('2001::/23'), +- IPv6Network('2001:2::/48'), + IPv6Network('2001:db8::/32'), +- IPv6Network('2001:10::/28'), ++ # IANA says N/A, let's consider it not globally reachable to be safe ++ IPv6Network('2002::/16'), + IPv6Network('fc00::/7'), + IPv6Network('fe80::/10'), + ] + ++ _private_networks_exceptions = [ ++ IPv6Network('2001:1::1/128'), ++ IPv6Network('2001:1::2/128'), ++ IPv6Network('2001:3::/32'), ++ IPv6Network('2001:4:112::/48'), ++ IPv6Network('2001:20::/28'), ++ IPv6Network('2001:30::/28'), ++ ] ++ + _reserved_networks = [ + IPv6Network('::/8'), IPv6Network('100::/8'), + IPv6Network('200::/7'), IPv6Network('400::/6'), +diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py +index 7de444af4a..716846b2ae 100644 +--- a/Lib/test/test_ipaddress.py ++++ b/Lib/test/test_ipaddress.py +@@ -1665,6 +1665,10 @@ class IpaddrUnitTest(unittest.TestCase): + self.assertEqual(True, ipaddress.ip_address( + '172.31.255.255').is_private) + self.assertEqual(False, ipaddress.ip_address('172.32.0.0').is_private) ++ self.assertFalse(ipaddress.ip_address('192.0.0.0').is_global) ++ self.assertTrue(ipaddress.ip_address('192.0.0.9').is_global) ++ self.assertTrue(ipaddress.ip_address('192.0.0.10').is_global) ++ self.assertFalse(ipaddress.ip_address('192.0.0.255').is_global) + + self.assertEqual(True, + ipaddress.ip_address('169.254.100.200').is_link_local) +@@ -1680,6 +1684,40 @@ class IpaddrUnitTest(unittest.TestCase): + self.assertEqual(False, ipaddress.ip_address('128.0.0.0').is_loopback) + self.assertEqual(True, ipaddress.ip_network('0.0.0.0').is_unspecified) + ++ def testPrivateNetworks(self): ++ self.assertEqual(True, ipaddress.ip_network("0.0.0.0/0").is_private) ++ self.assertEqual(False, ipaddress.ip_network("1.0.0.0/8").is_private) ++ ++ self.assertEqual(True, ipaddress.ip_network("0.0.0.0/8").is_private) ++ self.assertEqual(True, ipaddress.ip_network("10.0.0.0/8").is_private) ++ self.assertEqual(True, ipaddress.ip_network("127.0.0.0/8").is_private) ++ self.assertEqual(True, ipaddress.ip_network("169.254.0.0/16").is_private) ++ self.assertEqual(True, ipaddress.ip_network("172.16.0.0/12").is_private) ++ self.assertEqual(True, ipaddress.ip_network("192.0.0.0/29").is_private) ++ self.assertEqual(False, ipaddress.ip_network("192.0.0.9/32").is_private) ++ self.assertEqual(True, ipaddress.ip_network("192.0.0.170/31").is_private) ++ self.assertEqual(True, ipaddress.ip_network("192.0.2.0/24").is_private) ++ self.assertEqual(True, ipaddress.ip_network("192.168.0.0/16").is_private) ++ self.assertEqual(True, ipaddress.ip_network("198.18.0.0/15").is_private) ++ self.assertEqual(True, ipaddress.ip_network("198.51.100.0/24").is_private) ++ self.assertEqual(True, ipaddress.ip_network("203.0.113.0/24").is_private) ++ self.assertEqual(True, ipaddress.ip_network("240.0.0.0/4").is_private) ++ self.assertEqual(True, ipaddress.ip_network("255.255.255.255/32").is_private) ++ ++ self.assertEqual(False, ipaddress.ip_network("::/0").is_private) ++ self.assertEqual(False, ipaddress.ip_network("::ff/128").is_private) ++ ++ self.assertEqual(True, ipaddress.ip_network("::1/128").is_private) ++ self.assertEqual(True, ipaddress.ip_network("::/128").is_private) ++ self.assertEqual(True, ipaddress.ip_network("::ffff:0:0/96").is_private) ++ self.assertEqual(True, ipaddress.ip_network("100::/64").is_private) ++ self.assertEqual(True, ipaddress.ip_network("2001:2::/48").is_private) ++ self.assertEqual(False, ipaddress.ip_network("2001:3::/48").is_private) ++ self.assertEqual(True, ipaddress.ip_network("2001:db8::/32").is_private) ++ self.assertEqual(True, ipaddress.ip_network("2001:10::/28").is_private) ++ self.assertEqual(True, ipaddress.ip_network("fc00::/7").is_private) ++ self.assertEqual(True, ipaddress.ip_network("fe80::/10").is_private) ++ + def testReservedIpv6(self): + + self.assertEqual(True, ipaddress.ip_network('ffff::').is_multicast) +@@ -1753,6 +1791,20 @@ class IpaddrUnitTest(unittest.TestCase): + self.assertEqual(True, ipaddress.ip_address('0::0').is_unspecified) + self.assertEqual(False, ipaddress.ip_address('::1').is_unspecified) + ++ self.assertFalse(ipaddress.ip_address('64:ff9b:1::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:1::1').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:1::2').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:2::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:3::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:4::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:4:112::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:10::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:20::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:30::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:40::').is_global) ++ self.assertFalse(ipaddress.ip_address('2002::').is_global) ++ + # some generic IETF reserved addresses + self.assertEqual(True, ipaddress.ip_address('100::').is_reserved) + self.assertEqual(True, ipaddress.ip_network('4000::1/128').is_reserved) +diff --git a/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst b/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst +new file mode 100644 +index 0000000000..f9a72473be +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst +@@ -0,0 +1,9 @@ ++Fixed various false positives and false negatives in ++ ++* :attr:`ipaddress.IPv4Address.is_private` (see these docs for details) ++* :attr:`ipaddress.IPv4Address.is_global` ++* :attr:`ipaddress.IPv6Address.is_private` ++* :attr:`ipaddress.IPv6Address.is_global` ++ ++Also in the corresponding :class:`ipaddress.IPv4Network` and :class:`ipaddress.IPv6Network` ++attributes. diff --git a/python3.6.spec b/python3.6.spec index 127c479..61b1a66 100644 --- a/python3.6.spec +++ b/python3.6.spec @@ -17,7 +17,7 @@ URL: https://www.python.org/ #global prerel ... %global upstream_version %{general_version}%{?prerel} Version: %{general_version}%{?prerel:~%{prerel}} -Release: 28%{?dist} +Release: 29%{?dist} # Python is Python # pip MIT is and bundles: # appdirs: MIT @@ -698,6 +698,14 @@ Patch426: 00426-cve-2023-6597.patch # https://github.com/python/cpython/commit/66363b9a7b9fe7c99eba3a185b74c5fdbf842eba Patch427: 00427-zipextfile-tell-and-seek-cve-2024-0450.patch +# 00431 # ee1b513c52ab7663f7d58b07a1df123ea551e7c4 +# CVE-2024-4032: incorrect IPv4 and IPv6 private ranges +# +# Upstream issue: https://github.com/python/cpython/issues/113171 +# +# Backported from 3.8. +Patch431: 00431-cve-2024-4032.patch + # (New patches go here ^^^) # # When adding new patches to "python" and "python3" in Fedora, EL, etc., @@ -1961,6 +1969,9 @@ CheckPython optimized # ====================================================== %changelog +* Tue Jul 02 2024 Lumír Balhar - 3.6.15-29 +- Security fix for CVE-2024-4032 (rhbz#2293394) + * Wed Apr 24 2024 Lumír Balhar - 3.6.15-28 - Security fix for CVE-2024-0450 and CVE-2023-6597 From 64480d7cccc00831e36f4cbf7142e11402e353f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= Date: Thu, 1 Aug 2024 18:49:51 +0200 Subject: [PATCH 5/8] Separate patches for wheel from other sources, apply them only --without rpmwheels --- python3.6.spec | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/python3.6.spec b/python3.6.spec index 61b1a66..346dc7c 100644 --- a/python3.6.spec +++ b/python3.6.spec @@ -319,8 +319,10 @@ Source10: idle3.desktop # AppData file for idle3 Source11: idle3.appdata.xml +# Patches for bundled wheels + # Patch for the bundled pip wheel for CVE-2007-4559 -Source12: pip-CVE-2007-4559.patch +Source101: pip-CVE-2007-4559.patch # (Patches taken from github.com/fedora-python/cpython) @@ -1049,17 +1051,17 @@ or older Fedora releases. %if %{with rpmwheels} %autopatch 189 rm Lib/ensurepip/_bundled/*.whl +%else +# Patch the bundled pip wheel for CVE-2007-4559 +unzip -qq Lib/ensurepip/_bundled/pip-%{pip_version}-py2.py3-none-any.whl +patch -p1 < %{SOURCE101} +zip -rq Lib/ensurepip/_bundled/pip-%{pip_version}-py2.py3-none-any.whl pip pip-%{pip_version}.dist-info +rm -rf pip/ pip-%{pip_version}.dist-info/ %endif # Apply the remaining patches %autopatch -m 190 -# Patch the bundled pip wheel for CVE-2007-4559 -unzip -qq Lib/ensurepip/_bundled/pip-%{pip_version}-py2.py3-none-any.whl -patch -p1 < %{SOURCE12} -zip -rq Lib/ensurepip/_bundled/pip-%{pip_version}-py2.py3-none-any.whl pip pip-%{pip_version}.dist-info -rm -rf pip/ pip-%{pip_version}.dist-info/ - # Remove bundled libraries to ensure that we're using the system copy. rm -r Modules/expat rm -r Modules/zlib From 1bb7fee2bbb30a6f62c9652dc692248fddfff865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= Date: Thu, 1 Aug 2024 18:59:48 +0200 Subject: [PATCH 6/8] Security fix for CVE-2024-6345 (in bundled setuptools wheel) --- python3.6.spec | 18 +++++++- setuptools-CVE-2024-6345.patch | 83 ++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 setuptools-CVE-2024-6345.patch diff --git a/python3.6.spec b/python3.6.spec index 346dc7c..99e92c6 100644 --- a/python3.6.spec +++ b/python3.6.spec @@ -17,7 +17,7 @@ URL: https://www.python.org/ #global prerel ... %global upstream_version %{general_version}%{?prerel} Version: %{general_version}%{?prerel:~%{prerel}} -Release: 29%{?dist} +Release: 34%{?dist} # Python is Python # pip MIT is and bundles: # appdirs: MIT @@ -324,6 +324,13 @@ Source11: idle3.appdata.xml # Patch for the bundled pip wheel for CVE-2007-4559 Source101: pip-CVE-2007-4559.patch +# Patch for the bundled setuptools wheel for CVE-2024-6345 +# Remote code execution via download functions in the package_index module +# Tracking bug: https://bugzilla.redhat.com/show_bug.cgi?id=2297771 +# Upstream solution: https://github.com/pypa/setuptools/pull/4332 +# Patch simplified because upstream doesn't support SVN anymore. +Source102: setuptools-CVE-2024-6345.patch + # (Patches taken from github.com/fedora-python/cpython) # 00001 # d06a8853cf4bae9e115f45e1d531d2dc152c5cc8 @@ -1057,6 +1064,12 @@ unzip -qq Lib/ensurepip/_bundled/pip-%{pip_version}-py2.py3-none-any.whl patch -p1 < %{SOURCE101} zip -rq Lib/ensurepip/_bundled/pip-%{pip_version}-py2.py3-none-any.whl pip pip-%{pip_version}.dist-info rm -rf pip/ pip-%{pip_version}.dist-info/ + +# Patch the bundled setuptools wheel for CVE-2024-6345 +unzip -qq Lib/ensurepip/_bundled/setuptools-%{setuptools_version}-py2.py3-none-any.whl +patch -p1 < %{SOURCE102} +zip -rq Lib/ensurepip/_bundled/setuptools-%{setuptools_version}-py2.py3-none-any.whl easy_install.py pkg_resources setuptools setuptools-%{setuptools_version}.dist-info +rm -rf easy_install.py pkg_resources/ setuptools/ setuptools-%{setuptools_version}.dist-info/ %endif # Apply the remaining patches @@ -1971,6 +1984,9 @@ CheckPython optimized # ====================================================== %changelog +* Thu Aug 01 2024 Miro Hrončok - 3.6.15-34 +- Security fix for CVE-2024-6345 (in bundled setuptools wheel) + * Tue Jul 02 2024 Lumír Balhar - 3.6.15-29 - Security fix for CVE-2024-4032 (rhbz#2293394) diff --git a/setuptools-CVE-2024-6345.patch b/setuptools-CVE-2024-6345.patch new file mode 100644 index 0000000..54a856e --- /dev/null +++ b/setuptools-CVE-2024-6345.patch @@ -0,0 +1,83 @@ +From 8af1b3e03edc8a38565558aff3bf1689c1ca3545 Mon Sep 17 00:00:00 2001 +From: Lumir Balhar +Date: Fri, 26 Jul 2024 13:49:11 +0200 +Subject: [PATCH] CVE-2024-6345 + +--- + setuptools/package_index.py | 23 +++++++++-------------- + 1 file changed, 9 insertions(+), 14 deletions(-) + +diff --git a/setuptools/package_index.py b/setuptools/package_index.py +index bdcf4a6..1d3e5b4 100755 +--- a/setuptools/package_index.py ++++ b/setuptools/package_index.py +@@ -1,4 +1,5 @@ + """PyPI and direct package downloading""" ++import subprocess + import sys + import os + import re +@@ -848,7 +849,7 @@ class PackageIndex(Environment): + + def _download_svn(self, url, filename): + url = url.split('#', 1)[0] # remove any fragment for svn's sake +- creds = '' ++ creds = [] + if url.lower().startswith('svn:') and '@' in url: + scheme, netloc, path, p, q, f = urllib.parse.urlparse(url) + if not netloc and path.startswith('//') and '/' in path[2:]: +@@ -857,14 +858,14 @@ class PackageIndex(Environment): + if auth: + if ':' in auth: + user, pw = auth.split(':', 1) +- creds = " --username=%s --password=%s" % (user, pw) ++ creds = ["--username=" + user, "--password=" + pw] + else: +- creds = " --username=" + auth ++ creds = ["--username=" + auth] + netloc = host + parts = scheme, netloc, url, p, q, f + url = urllib.parse.urlunparse(parts) + self.info("Doing subversion checkout from %s to %s", url, filename) +- os.system("svn checkout%s -q %s %s" % (creds, url, filename)) ++ subprocess.check_call(["svn", "checkout"] + creds + ["-q", url, filename]) + return filename + + @staticmethod +@@ -890,14 +891,11 @@ class PackageIndex(Environment): + url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) + + self.info("Doing git clone from %s to %s", url, filename) +- os.system("git clone --quiet %s %s" % (url, filename)) ++ subprocess.check_call(["git", "clone", "--quiet", url, filename]) + + if rev is not None: + self.info("Checking out %s", rev) +- os.system("(cd %s && git checkout --quiet %s)" % ( +- filename, +- rev, +- )) ++ subprocess.check_call(["git", "-C", filename, "checkout", "--quiet", rev]) + + return filename + +@@ -906,14 +904,11 @@ class PackageIndex(Environment): + url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) + + self.info("Doing hg clone from %s to %s", url, filename) +- os.system("hg clone --quiet %s %s" % (url, filename)) ++ subprocess.check_call(["hg", "clone", "--quiet", url, filename]) + + if rev is not None: + self.info("Updating to %s", rev) +- os.system("(cd %s && hg up -C -r %s -q)" % ( +- filename, +- rev, +- )) ++ subprocess.check_call(["hg", "--cwd", filename, "up", "-C", "-r", rev, "-q"]) + + return filename + +-- +2.45.2 + From 2cfab79d1177e41512a07bb7ac55db54cafeec8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hrn=C4=8Diar?= Date: Fri, 16 Aug 2024 15:57:21 +0200 Subject: [PATCH 7/8] Security fix for CVE-2024-6923 (rhbz#2303161) --- ...ncode-newlines-in-headers-and-verify.patch | 381 ++++++++++++++++++ python3.6.spec | 34 +- 2 files changed, 414 insertions(+), 1 deletion(-) create mode 100644 00435-gh-121650-encode-newlines-in-headers-and-verify.patch diff --git a/00435-gh-121650-encode-newlines-in-headers-and-verify.patch b/00435-gh-121650-encode-newlines-in-headers-and-verify.patch new file mode 100644 index 0000000..32af189 --- /dev/null +++ b/00435-gh-121650-encode-newlines-in-headers-and-verify.patch @@ -0,0 +1,381 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hrn=C4=8Diar?= +Date: Fri, 16 Aug 2024 14:12:58 +0200 +Subject: [PATCH] 00435: gh-121650: Encode newlines in headers, and verify + headers are sound (GH-122233) + +Per RFC 2047: + +> [...] these encoding schemes allow the +> encoding of arbitrary octet values, mail readers that implement this +> decoding should also ensure that display of the decoded data on the +> recipient's terminal will not cause unwanted side-effects + +It seems that the "quoted-word" scheme is a valid way to include +a newline character in a header value, just like we already allow +undecodable bytes or control characters. +They do need to be properly quoted when serialized to text, though. + +This should fail for custom fold() implementations that aren't careful +about newlines. + +(cherry picked from commit 097633981879b3c9de9a1dd120d3aa585ecc2384) + +This patch also contains modified commit cherry picked from +c5bba853d5e7836f6d4340e18721d3fb3a6ee0f7. + +This commit was backported to simplify the backport of the other commit +fixing CVE. The only modification is a removal of one test case which +tests multiple changes in Python 3.7 and it wasn't working properly +with Python 3.6 where we backported only one change. + +Co-authored-by: Petr Viktorin +Co-authored-by: Bas Bloemsaat +Co-authored-by: Serhiy Storchaka +Co-authored-by: bsiem <52461103+bsiem@users.noreply.github.com> +--- + Doc/library/email.errors.rst | 6 ++ + Doc/library/email.policy.rst | 18 ++++++ + Lib/email/_header_value_parser.py | 9 +++ + Lib/email/_policybase.py | 8 +++ + Lib/email/errors.py | 4 ++ + Lib/email/generator.py | 16 ++++- + Lib/test/test_email/test_generator.py | 62 +++++++++++++++++++ + Lib/test/test_email/test_headerregistry.py | 16 +++++ + Lib/test/test_email/test_policy.py | 26 ++++++++ + .../2019-07-09-11-20-21.bpo-37482.auzvev.rst | 1 + + ...-07-27-16-10-41.gh-issue-121650.nf6oc9.rst | 5 ++ + 11 files changed, 170 insertions(+), 1 deletion(-) + create mode 100644 Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst + create mode 100644 Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst + +diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst +index 511ad16358..7e51f74467 100644 +--- a/Doc/library/email.errors.rst ++++ b/Doc/library/email.errors.rst +@@ -59,6 +59,12 @@ The following exception classes are defined in the :mod:`email.errors` module: + :class:`~email.mime.image.MIMEImage`). + + ++.. exception:: HeaderWriteError() ++ ++ Raised when an error occurs when the :mod:`~email.generator` outputs ++ headers. ++ ++ + Here is the list of the defects that the :class:`~email.parser.FeedParser` + can find while parsing messages. Note that the defects are added to the message + where the problem was found, so for example, if a message nested inside a +diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst +index 8e70762598..8617b2ed5b 100644 +--- a/Doc/library/email.policy.rst ++++ b/Doc/library/email.policy.rst +@@ -229,6 +229,24 @@ added matters. To illustrate:: + + .. versionadded:: 3.6 + ++ ++ .. attribute:: verify_generated_headers ++ ++ If ``True`` (the default), the generator will raise ++ :exc:`~email.errors.HeaderWriteError` instead of writing a header ++ that is improperly folded or delimited, such that it would ++ be parsed as multiple headers or joined with adjacent data. ++ Such headers can be generated by custom header classes or bugs ++ in the ``email`` module. ++ ++ As it's a security feature, this defaults to ``True`` even in the ++ :class:`~email.policy.Compat32` policy. ++ For backwards compatible, but unsafe, behavior, it must be set to ++ ``False`` explicitly. ++ ++ .. versionadded:: 3.8.20 ++ ++ + The following :class:`Policy` method is intended to be called by code using + the email library to create policy instances with custom settings: + +diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py +index bc9c9b6241..04035b2612 100644 +--- a/Lib/email/_header_value_parser.py ++++ b/Lib/email/_header_value_parser.py +@@ -92,6 +92,8 @@ TOKEN_ENDS = TSPECIALS | WSP + ASPECIALS = TSPECIALS | set("*'%") + ATTRIBUTE_ENDS = ASPECIALS | WSP + EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') ++NLSET = {'\n', '\r'} ++SPECIALSNL = SPECIALS | NLSET + + def quote_string(value): + return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' +@@ -2611,6 +2613,13 @@ def _refold_parse_tree(parse_tree, *, policy): + wrap_as_ew_blocked -= 1 + continue + tstr = str(part) ++ if not want_encoding: ++ if part.token_type == 'ptext': ++ # Encode if tstr contains special characters. ++ want_encoding = not SPECIALSNL.isdisjoint(tstr) ++ else: ++ # Encode if tstr contains newlines. ++ want_encoding = not NLSET.isdisjoint(tstr) + try: + tstr.encode(encoding) + charset = encoding +diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py +index c9cbadd2a8..d1f48211f9 100644 +--- a/Lib/email/_policybase.py ++++ b/Lib/email/_policybase.py +@@ -157,6 +157,13 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): + message_factory -- the class to use to create new message objects. + If the value is None, the default is Message. + ++ verify_generated_headers ++ -- if true, the generator verifies that each header ++ they are properly folded, so that a parser won't ++ treat it as multiple headers, start-of-body, or ++ part of another header. ++ This is a check against custom Header & fold() ++ implementations. + """ + + raise_on_defect = False +@@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): + max_line_length = 78 + mangle_from_ = False + message_factory = None ++ verify_generated_headers = True + + def handle_defect(self, obj, defect): + """Based on policy, either raise defect or call register_defect. +diff --git a/Lib/email/errors.py b/Lib/email/errors.py +index d28a680010..1a0d5c63e6 100644 +--- a/Lib/email/errors.py ++++ b/Lib/email/errors.py +@@ -29,6 +29,10 @@ class CharsetError(MessageError): + """An illegal charset was given.""" + + ++class HeaderWriteError(MessageError): ++ """Error while writing headers.""" ++ ++ + # These are parsing defects which the parser was able to work around. + class MessageDefect(ValueError): + """Base class for a message defect.""" +diff --git a/Lib/email/generator.py b/Lib/email/generator.py +index ae670c2353..6deb95ba8a 100644 +--- a/Lib/email/generator.py ++++ b/Lib/email/generator.py +@@ -14,12 +14,14 @@ import random + from copy import deepcopy + from io import StringIO, BytesIO + from email.utils import _has_surrogates ++from email.errors import HeaderWriteError + + UNDERSCORE = '_' + NL = '\n' # XXX: no longer used by the code below. + + NLCRE = re.compile(r'\r\n|\r|\n') + fcre = re.compile(r'^From ', re.MULTILINE) ++NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') + + + +@@ -219,7 +221,19 @@ class Generator: + + def _write_headers(self, msg): + for h, v in msg.raw_items(): +- self.write(self.policy.fold(h, v)) ++ folded = self.policy.fold(h, v) ++ if self.policy.verify_generated_headers: ++ linesep = self.policy.linesep ++ if not folded.endswith(self.policy.linesep): ++ raise HeaderWriteError( ++ f'folded header does not end with {linesep!r}: {folded!r}') ++ folded_no_linesep = folded ++ if folded.endswith(linesep): ++ folded_no_linesep = folded[:-len(linesep)] ++ if NEWLINE_WITHOUT_FWSP.search(folded_no_linesep): ++ raise HeaderWriteError( ++ f'folded header contains newline: {folded!r}') ++ self.write(folded) + # A blank line always separates headers from body + self.write(self._NL) + +diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py +index c1aeaefab7..cdf1075bab 100644 +--- a/Lib/test/test_email/test_generator.py ++++ b/Lib/test/test_email/test_generator.py +@@ -5,6 +5,7 @@ from email import message_from_string, message_from_bytes + from email.message import EmailMessage + from email.generator import Generator, BytesGenerator + from email import policy ++import email.errors + from test.test_email import TestEmailBase, parameterize + + +@@ -215,6 +216,44 @@ class TestGeneratorBase: + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + ++ def test_keep_encoded_newlines(self): ++ msg = self.msgmaker(self.typ(textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com ++ ++ None ++ """))) ++ expected = textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com ++ ++ None ++ """) ++ s = self.ioclass() ++ g = self.genclass(s, policy=self.policy.clone(max_line_length=80)) ++ g.flatten(msg) ++ self.assertEqual(s.getvalue(), self.typ(expected)) ++ ++ def test_keep_long_encoded_newlines(self): ++ msg = self.msgmaker(self.typ(textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject =?UTF-8?Q?=0A?=Bcc: injection@example.com ++ ++ None ++ """))) ++ expected = textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject \n\ ++ =?utf-8?q?=0A?=Bcc: ++ injection@example.com ++ ++ None ++ """) ++ s = self.ioclass() ++ g = self.genclass(s, policy=self.policy.clone(max_line_length=30)) ++ g.flatten(msg) ++ self.assertEqual(s.getvalue(), self.typ(expected)) ++ + + class TestGenerator(TestGeneratorBase, TestEmailBase): + +@@ -223,6 +262,29 @@ class TestGenerator(TestGeneratorBase, TestEmailBase): + ioclass = io.StringIO + typ = str + ++ def test_verify_generated_headers(self): ++ """gh-121650: by default the generator prevents header injection""" ++ class LiteralHeader(str): ++ name = 'Header' ++ def fold(self, **kwargs): ++ return self ++ ++ for text in ( ++ 'Value\r\nBad Injection\r\n', ++ 'NoNewLine' ++ ): ++ with self.subTest(text=text): ++ message = message_from_string( ++ "Header: Value\r\n\r\nBody", ++ policy=self.policy, ++ ) ++ ++ del message['Header'] ++ message['Header'] = LiteralHeader(text) ++ ++ with self.assertRaises(email.errors.HeaderWriteError): ++ message.as_string() ++ + + class TestBytesGenerator(TestGeneratorBase, TestEmailBase): + +diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py +index 08634daa7f..d7c1878940 100644 +--- a/Lib/test/test_email/test_headerregistry.py ++++ b/Lib/test/test_email/test_headerregistry.py +@@ -1546,6 +1546,22 @@ class TestAddressAndGroup(TestEmailBase): + + class TestFolding(TestHeaderBase): + ++ def test_address_display_names(self): ++ """Test the folding and encoding of address headers.""" ++ for name, result in ( ++ ('Foo Bar, France', '"Foo Bar, France"'), ++ ('Foo Bar (France)', '"Foo Bar (France)"'), ++ ('Foo Bar, España', 'Foo =?utf-8?q?Bar=2C_Espa=C3=B1a?='), ++ ('Foo Bar (España)', 'Foo Bar =?utf-8?b?KEVzcGHDsWEp?='), ++ ('Foo, Bar España', '=?utf-8?q?Foo=2C_Bar_Espa=C3=B1a?='), ++ ('Foo, Bar [España]', '=?utf-8?q?Foo=2C_Bar_=5BEspa=C3=B1a=5D?='), ++ ('Foo Bär, France', 'Foo =?utf-8?q?B=C3=A4r=2C?= France'), ++ ('Foo Bär ', 'Foo =?utf-8?q?B=C3=A4r_=3CFrance=3E?='), ++ ): ++ h = self.make_header('To', Address(name, addr_spec='a@b.com')) ++ self.assertEqual(h.fold(policy=policy.default), ++ 'To: %s \n' % result) ++ + def test_short_unstructured(self): + h = self.make_header('subject', 'this is a test') + self.assertEqual(h.fold(policy=policy.default), +diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py +index 6999e4af10..76198392e5 100644 +--- a/Lib/test/test_email/test_policy.py ++++ b/Lib/test/test_email/test_policy.py +@@ -26,6 +26,7 @@ class PolicyAPITests(unittest.TestCase): + 'raise_on_defect': False, + 'mangle_from_': True, + 'message_factory': None, ++ 'verify_generated_headers': True, + } + # These default values are the ones set on email.policy.default. + # If any of these defaults change, the docs must be updated. +@@ -265,6 +266,31 @@ class PolicyAPITests(unittest.TestCase): + with self.assertRaises(email.errors.HeaderParseError): + policy.fold("Subject", subject) + ++ def test_verify_generated_headers(self): ++ """Turning protection off allows header injection""" ++ policy = email.policy.default.clone(verify_generated_headers=False) ++ for text in ( ++ 'Header: Value\r\nBad: Injection\r\n', ++ 'Header: NoNewLine' ++ ): ++ with self.subTest(text=text): ++ message = email.message_from_string( ++ "Header: Value\r\n\r\nBody", ++ policy=policy, ++ ) ++ class LiteralHeader(str): ++ name = 'Header' ++ def fold(self, **kwargs): ++ return self ++ ++ del message['Header'] ++ message['Header'] = LiteralHeader(text) ++ ++ self.assertEqual( ++ message.as_string(), ++ f"{text}\nBody", ++ ) ++ + # XXX: Need subclassing tests. + # For adding subclassed objects, make sure the usual rules apply (subclass + # wins), but that the order still works (right overrides left). +diff --git a/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst b/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst +new file mode 100644 +index 0000000000..e09ff63eed +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst +@@ -0,0 +1 @@ ++Fix serialization of display name in originator or destination address fields with both encoded words and special chars. +diff --git a/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst +new file mode 100644 +index 0000000000..83dd28d4ac +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst +@@ -0,0 +1,5 @@ ++:mod:`email` headers with embedded newlines are now quoted on output. The ++:mod:`~email.generator` will now refuse to serialize (write) headers that ++are unsafely folded or delimited; see ++:attr:`~email.policy.Policy.verify_generated_headers`. (Contributed by Bas ++Bloemsaat and Petr Viktorin in :gh:`121650`.) diff --git a/python3.6.spec b/python3.6.spec index 99e92c6..b4b2969 100644 --- a/python3.6.spec +++ b/python3.6.spec @@ -17,7 +17,7 @@ URL: https://www.python.org/ #global prerel ... %global upstream_version %{general_version}%{?prerel} Version: %{general_version}%{?prerel:~%{prerel}} -Release: 34%{?dist} +Release: 35%{?dist} # Python is Python # pip MIT is and bundles: # appdirs: MIT @@ -715,6 +715,35 @@ Patch427: 00427-zipextfile-tell-and-seek-cve-2024-0450.patch # Backported from 3.8. Patch431: 00431-cve-2024-4032.patch +# 00435 # f80b87e6a67eebe0693b895261bad2e9a58a4825 +# gh-121650: Encode newlines in headers, and verify +# headers are sound (GH-122233) +# +# Per RFC 2047: +# +# > [...] these encoding schemes allow the +# > encoding of arbitrary octet values, mail readers that implement this +# > decoding should also ensure that display of the decoded data on the +# > recipient's terminal will not cause unwanted side-effects +# +# It seems that the "quoted-word" scheme is a valid way to include +# a newline character in a header value, just like we already allow +# undecodable bytes or control characters. +# They do need to be properly quoted when serialized to text, though. +# +# This should fail for custom fold() implementations that aren't careful +# about newlines. +# +# +# This patch also contains modified commit cherry picked from +# c5bba853d5e7836f6d4340e18721d3fb3a6ee0f7. +# +# This commit was backported to simplify the backport of the other commit +# fixing CVE. The only modification is a removal of one test case which +# tests multiple changes in Python 3.7 and it wasn't working properly +# with Python 3.6 where we backported only one change. +Patch435: 00435-gh-121650-encode-newlines-in-headers-and-verify.patch + # (New patches go here ^^^) # # When adding new patches to "python" and "python3" in Fedora, EL, etc., @@ -1984,6 +2013,9 @@ CheckPython optimized # ====================================================== %changelog +* Fri Aug 16 2024 Tomáš Hrnčiar - 3.6.15-35 +- Security fix for CVE-2024-6923 (rhbz#2303161) + * Thu Aug 01 2024 Miro Hrončok - 3.6.15-34 - Security fix for CVE-2024-6345 (in bundled setuptools wheel) From 0bbcef3a9297763fa6aaf1e8274ae82d75138073 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Thu, 5 Sep 2024 12:19:31 +0200 Subject: [PATCH 8/8] Security fix for CVE-2024-6232 (rhbz#2310092) --- ...racking-when-parsing-tarfile-headers.patch | 249 ++++++++++++++++++ python3.6.spec | 13 +- 2 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 00437-cve-2024-6232-remove-backtracking-when-parsing-tarfile-headers.patch diff --git a/00437-cve-2024-6232-remove-backtracking-when-parsing-tarfile-headers.patch b/00437-cve-2024-6232-remove-backtracking-when-parsing-tarfile-headers.patch new file mode 100644 index 0000000..a5f704e --- /dev/null +++ b/00437-cve-2024-6232-remove-backtracking-when-parsing-tarfile-headers.patch @@ -0,0 +1,249 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Seth Michael Larson +Date: Wed, 4 Sep 2024 10:41:42 -0500 +Subject: [PATCH] 00437: CVE-2024-6232 Remove backtracking when parsing tarfile + headers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +* Remove backtracking when parsing tarfile headers +* Rewrite PAX header parsing to be stricter +* Optimize parsing of GNU extended sparse headers v0.0 + +(cherry picked from commit 34ddb64d088dd7ccc321f6103d23153256caa5d4) + +Co-authored-by: Seth Michael Larson +Co-authored-by: Kirill Podoprigora +Co-authored-by: Gregory P. Smith +Co-authored-by: Lumír Balhar +--- + Lib/tarfile.py | 104 +++++++++++------- + Lib/test/test_tarfile.py | 42 +++++++ + ...-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | 2 + + 3 files changed, 111 insertions(+), 37 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst + +diff --git a/Lib/tarfile.py b/Lib/tarfile.py +index c18590325a..ee1bf37bfd 100755 +--- a/Lib/tarfile.py ++++ b/Lib/tarfile.py +@@ -846,6 +846,9 @@ _NAMED_FILTERS = { + # Sentinel for replace() defaults, meaning "don't change the attribute" + _KEEP = object() + ++# Header length is digits followed by a space. ++_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") ++ + class TarInfo(object): + """Informational class which holds the details about an + archive member given by a tar header block. +@@ -1371,41 +1374,60 @@ class TarInfo(object): + else: + pax_headers = tarfile.pax_headers.copy() + +- # Check if the pax header contains a hdrcharset field. This tells us +- # the encoding of the path, linkpath, uname and gname fields. Normally, +- # these fields are UTF-8 encoded but since POSIX.1-2008 tar +- # implementations are allowed to store them as raw binary strings if +- # the translation to UTF-8 fails. +- match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) +- if match is not None: +- pax_headers["hdrcharset"] = match.group(1).decode("utf-8") +- +- # For the time being, we don't care about anything other than "BINARY". +- # The only other value that is currently allowed by the standard is +- # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. +- hdrcharset = pax_headers.get("hdrcharset") +- if hdrcharset == "BINARY": +- encoding = tarfile.encoding +- else: +- encoding = "utf-8" +- + # Parse pax header information. A record looks like that: + # "%d %s=%s\n" % (length, keyword, value). length is the size + # of the complete record including the length field itself and +- # the newline. keyword and value are both UTF-8 encoded strings. +- regex = re.compile(br"(\d+) ([^=]+)=") ++ # the newline. + pos = 0 +- while True: +- match = regex.match(buf, pos) ++ encoding = None ++ raw_headers = [] ++ while len(buf) > pos and buf[pos] != 0x00: ++ match = _header_length_prefix_re.match(buf, pos) + if not match: +- break ++ raise InvalidHeaderError("invalid header") ++ try: ++ length = int(match.group(1)) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ # Headers must be at least 5 bytes, shortest being '5 x=\n'. ++ # Value is allowed to be empty. ++ if length < 5: ++ raise InvalidHeaderError("invalid header") ++ if pos + length > len(buf): ++ raise InvalidHeaderError("invalid header") + +- length, keyword = match.groups() +- length = int(length) +- if length == 0: ++ header_value_end_offset = match.start(1) + length - 1 # Last byte of the header ++ keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] ++ raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") ++ ++ # Check the framing of the header. The last character must be '\n' (0x0A) ++ if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: + raise InvalidHeaderError("invalid header") +- value = buf[match.end(2) + 1:match.start(1) + length - 1] ++ raw_headers.append((length, raw_keyword, raw_value)) ++ ++ # Check if the pax header contains a hdrcharset field. This tells us ++ # the encoding of the path, linkpath, uname and gname fields. Normally, ++ # these fields are UTF-8 encoded but since POSIX.1-2008 tar ++ # implementations are allowed to store them as raw binary strings if ++ # the translation to UTF-8 fails. For the time being, we don't care about ++ # anything other than "BINARY". The only other value that is currently ++ # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. ++ # Note that we only follow the initial 'hdrcharset' setting to preserve ++ # the initial behavior of the 'tarfile' module. ++ if raw_keyword == b"hdrcharset" and encoding is None: ++ if raw_value == b"BINARY": ++ encoding = tarfile.encoding ++ else: # This branch ensures only the first 'hdrcharset' header is used. ++ encoding = "utf-8" ++ ++ pos += length ++ ++ # If no explicit hdrcharset is set, we use UTF-8 as a default. ++ if encoding is None: ++ encoding = "utf-8" + ++ # After parsing the raw headers we can decode them to text. ++ for length, raw_keyword, raw_value in raw_headers: + # Normally, we could just use "utf-8" as the encoding and "strict" + # as the error handler, but we better not take the risk. For + # example, GNU tar <= 1.23 is known to store filenames it cannot +@@ -1413,17 +1435,16 @@ class TarInfo(object): + # hdrcharset=BINARY header). + # We first try the strict standard encoding, and if that fails we + # fall back on the user's encoding and error handler. +- keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", ++ keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", + tarfile.errors) + if keyword in PAX_NAME_FIELDS: +- value = self._decode_pax_field(value, encoding, tarfile.encoding, ++ value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, + tarfile.errors) + else: +- value = self._decode_pax_field(value, "utf-8", "utf-8", ++ value = self._decode_pax_field(raw_value, "utf-8", "utf-8", + tarfile.errors) + + pax_headers[keyword] = value +- pos += length + + # Fetch the next header. + try: +@@ -1438,7 +1459,7 @@ class TarInfo(object): + + elif "GNU.sparse.size" in pax_headers: + # GNU extended sparse format version 0.0. +- self._proc_gnusparse_00(next, pax_headers, buf) ++ self._proc_gnusparse_00(next, raw_headers) + + elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": + # GNU extended sparse format version 1.0. +@@ -1460,15 +1481,24 @@ class TarInfo(object): + + return next + +- def _proc_gnusparse_00(self, next, pax_headers, buf): ++ def _proc_gnusparse_00(self, next, raw_headers): + """Process a GNU tar extended sparse header, version 0.0. + """ + offsets = [] +- for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): +- offsets.append(int(match.group(1))) + numbytes = [] +- for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): +- numbytes.append(int(match.group(1))) ++ for _, keyword, value in raw_headers: ++ if keyword == b"GNU.sparse.offset": ++ try: ++ offsets.append(int(value.decode())) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ ++ elif keyword == b"GNU.sparse.numbytes": ++ try: ++ numbytes.append(int(value.decode())) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ + next.sparse = list(zip(offsets, numbytes)) + + def _proc_gnusparse_01(self, next, pax_headers): +diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py +index f261048615..04ef000e71 100644 +--- a/Lib/test/test_tarfile.py ++++ b/Lib/test/test_tarfile.py +@@ -1046,6 +1046,48 @@ class PaxReadTest(LongnameTest, ReadTest, unittest.TestCase): + finally: + tar.close() + ++ def test_pax_header_bad_formats(self): ++ # The fields from the pax header have priority over the ++ # TarInfo. ++ pax_header_replacements = ( ++ b" foo=bar\n", ++ b"0 \n", ++ b"1 \n", ++ b"2 \n", ++ b"3 =\n", ++ b"4 =a\n", ++ b"1000000 foo=bar\n", ++ b"0 foo=bar\n", ++ b"-12 foo=bar\n", ++ b"000000000000000000000000036 foo=bar\n", ++ ) ++ pax_headers = {"foo": "bar"} ++ ++ for replacement in pax_header_replacements: ++ with self.subTest(header=replacement): ++ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, ++ encoding="iso8859-1") ++ try: ++ t = tarfile.TarInfo() ++ t.name = "pax" # non-ASCII ++ t.uid = 1 ++ t.pax_headers = pax_headers ++ tar.addfile(t) ++ finally: ++ tar.close() ++ ++ with open(tmpname, "rb") as f: ++ data = f.read() ++ self.assertIn(b"11 foo=bar\n", data) ++ data = data.replace(b"11 foo=bar\n", replacement) ++ ++ with open(tmpname, "wb") as f: ++ f.truncate() ++ f.write(data) ++ ++ with self.assertRaisesRegex(tarfile.ReadError, r"file could not be opened successfully"): ++ tarfile.open(tmpname, encoding="iso8859-1") ++ + + class WriteTestBase(TarTest): + # Put all write tests in here that are supposed to be tested +diff --git a/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst +new file mode 100644 +index 0000000000..81f918bfe2 +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst +@@ -0,0 +1,2 @@ ++Remove backtracking from tarfile header parsing for ``hdrcharset``, PAX, and ++GNU sparse headers. diff --git a/python3.6.spec b/python3.6.spec index b4b2969..bd5e7e1 100644 --- a/python3.6.spec +++ b/python3.6.spec @@ -17,7 +17,7 @@ URL: https://www.python.org/ #global prerel ... %global upstream_version %{general_version}%{?prerel} Version: %{general_version}%{?prerel:~%{prerel}} -Release: 35%{?dist} +Release: 36%{?dist} # Python is Python # pip MIT is and bundles: # appdirs: MIT @@ -744,6 +744,14 @@ Patch431: 00431-cve-2024-4032.patch # with Python 3.6 where we backported only one change. Patch435: 00435-gh-121650-encode-newlines-in-headers-and-verify.patch +# 00437 # c1618bd3b415d9df1a2d050332220300d394ac5f +# CVE-2024-6232 Remove backtracking when parsing tarfile headers +# +# * Remove backtracking when parsing tarfile headers +# * Rewrite PAX header parsing to be stricter +# * Optimize parsing of GNU extended sparse headers v0.0 +Patch437: 00437-cve-2024-6232-remove-backtracking-when-parsing-tarfile-headers.patch + # (New patches go here ^^^) # # When adding new patches to "python" and "python3" in Fedora, EL, etc., @@ -2013,6 +2021,9 @@ CheckPython optimized # ====================================================== %changelog +* Thu Sep 05 2024 Lumír Balhar - 3.6.15-36 +- Security fix for CVE-2024-6232 (rhbz#2310092) + * Fri Aug 16 2024 Tomáš Hrnčiar - 3.6.15-35 - Security fix for CVE-2024-6923 (rhbz#2303161)