diff --git a/coreutils-9.6-cp-improve-nfsv4-acl-support.patch b/coreutils-9.6-cp-improve-nfsv4-acl-support.patch new file mode 100644 index 0000000..1b36b1e --- /dev/null +++ b/coreutils-9.6-cp-improve-nfsv4-acl-support.patch @@ -0,0 +1,512 @@ +From 6ad28e2b6627caf7b83bf893027c087b8cea1a97 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Fri, 9 May 2025 18:02:29 -0700 +Subject: [PATCH 1/2] qcopy-acl: port better to NFSv4 on GNU/Linux + +Problem reported by Ian Dall in +and by Thomas Clark in . +* lib/file-has-acl.c (smack_new_label_from_file) [!HAVE_SMACK]: +New dummy function. +(has_xattr, get_aclinfo): New arg FD. All callers changed. +Remove some unnecessary MAYBE_UNUSEDs. +(acl_get_fd_np): Fall back on acl_get_fd if this function is +needed but not available. +(acl_get_fdfile): New function, if needed. +(file_has_aclinfo): Reimplement in terms of ... +(fdfile_has_aclinfo): ... this new function, +which also has an FD argument. +* lib/qcopy-acl.c [USE_XATTR]: Include dirent.h, for DT_DIR etc. +(qcopy_acl): If attr_copy_file or attr_copy_fd fail with EOPNOTSUPP, +don’t fail if the source has a trivial ACL (this is the part +that fixes the bug; the rest is optimization). + +(cherry picked from commit 8a356b77717a2e4f735ec06e326880ca1f61aadb) +--- + lib/acl.h | 2 + + lib/copy-acl.c | 1 + + lib/file-has-acl.c | 172 ++++++++++++++++++++++++++++++++------------- + lib/qcopy-acl.c | 29 ++++++-- + 4 files changed, 152 insertions(+), 52 deletions(-) + +diff --git a/lib/acl.h b/lib/acl.h +index 90fd24e..e3c134f 100644 +--- a/lib/acl.h ++++ b/lib/acl.h +@@ -79,6 +79,8 @@ struct aclinfo + bool acl_errno_valid (int) _GL_ATTRIBUTE_CONST; + int file_has_acl (char const *, struct stat const *); + int file_has_aclinfo (char const *restrict, struct aclinfo *restrict, int); ++int fdfile_has_aclinfo (int, char const *restrict, ++ struct aclinfo *restrict, int); + + #if HAVE_LINUX_XATTR_H && HAVE_LISTXATTR + bool aclinfo_has_xattr (struct aclinfo const *, char const *) +diff --git a/lib/copy-acl.c b/lib/copy-acl.c +index c36f64e..2fce6c7 100644 +--- a/lib/copy-acl.c ++++ b/lib/copy-acl.c +@@ -33,6 +33,7 @@ + a valid file descriptor, use file descriptor operations, else use + filename based operations on SRC_NAME. Likewise for DEST_DESC and + DST_NAME. ++ MODE should be the source file's st_mode. + If access control lists are not available, fchmod the target file to + MODE. Also sets the non-permission bits of the destination file + (S_ISUID, S_ISGID, S_ISVTX) to those from MODE if any are set. +diff --git a/lib/file-has-acl.c b/lib/file-has-acl.c +index 66b920c..a356ee0 100644 +--- a/lib/file-has-acl.c ++++ b/lib/file-has-acl.c +@@ -85,6 +85,13 @@ smack_new_label_from_path (MAYBE_UNUSED const char *path, + { + return -1; + } ++static ssize_t ++smack_new_label_from_file (MAYBE_UNUSED int fd, ++ MAYBE_UNUSED const char *xattr, ++ MAYBE_UNUSED char **label) ++{ ++ return -1; ++} + # endif + static bool + is_smack_enabled (void) +@@ -115,14 +122,16 @@ aclinfo_may_indicate_xattr (struct aclinfo const *ai) + + static bool + has_xattr (char const *xattr, struct aclinfo const *ai, +- MAYBE_UNUSED char const *restrict name, MAYBE_UNUSED int flags) ++ int fd, char const *restrict name, int flags) + { + if (ai && aclinfo_has_xattr (ai, xattr)) + return true; + else if (!ai || aclinfo_may_indicate_xattr (ai)) + { +- int ret = ((flags & ACL_SYMLINK_FOLLOW ? getxattr : lgetxattr) +- (name, xattr, NULL, 0)); ++ int ret = (fd < 0 ++ ? ((flags & ACL_SYMLINK_FOLLOW ? getxattr : lgetxattr) ++ (name, xattr, NULL, 0)) ++ : fgetxattr (fd, xattr, NULL, 0)); + if (0 <= ret || (errno == ERANGE || errno == E2BIG)) + return true; + } +@@ -145,11 +154,12 @@ aclinfo_has_xattr (struct aclinfo const *ai, char const *xattr) + return false; + } + +-/* Get attributes of the file NAME into AI, if USE_ACL. ++/* Get attributes of the file FD aka NAME into AI, if USE_ACL. ++ Ignore FD if it is negative. + If FLAGS & ACL_GET_SCONTEXT, also get security context. + If FLAGS & ACL_SYMLINK_FOLLOW, follow symbolic links. */ + static void +-get_aclinfo (char const *name, struct aclinfo *ai, int flags) ++get_aclinfo (int fd, char const *name, struct aclinfo *ai, int flags) + { + int scontext_err = ENOTSUP; + ai->buf = ai->u.__gl_acl_ch; +@@ -163,7 +173,9 @@ get_aclinfo (char const *name, struct aclinfo *ai, int flags) + = (flags & ACL_SYMLINK_FOLLOW ? listxattr : llistxattr); + while (true) + { +- ai->size = lsxattr (name, ai->buf, acl_alloc); ++ ai->size = (fd < 0 ++ ? lsxattr (name, ai->buf, acl_alloc) ++ : flistxattr (fd, ai->buf, acl_alloc)); + if (0 < ai->size) + break; + ai->u.err = ai->size < 0 ? errno : 0; +@@ -171,7 +183,9 @@ get_aclinfo (char const *name, struct aclinfo *ai, int flags) + break; + + /* The buffer was too small. Find how large it should have been. */ +- ssize_t size = lsxattr (name, NULL, 0); ++ ssize_t size = (fd < 0 ++ ? lsxattr (name, NULL, 0) ++ : flistxattr (fd, NULL, 0)); + if (size <= 0) + { + ai->size = size; +@@ -214,9 +228,13 @@ get_aclinfo (char const *name, struct aclinfo *ai, int flags) + { + if (ai->size < 0 || aclinfo_has_xattr (ai, XATTR_NAME_SMACK)) + { +- ssize_t r = smack_new_label_from_path (name, "security.SMACK64", +- flags & ACL_SYMLINK_FOLLOW, +- &ai->scontext); ++ static char const SMACK64[] = "security.SMACK64"; ++ ssize_t r = ++ (fd < 0 ++ ? smack_new_label_from_path (name, SMACK64, ++ flags & ACL_SYMLINK_FOLLOW, ++ &ai->scontext) ++ : smack_new_label_from_file (fd, SMACK64, &ai->scontext)); + scontext_err = r < 0 ? errno : 0; + } + } +@@ -226,8 +244,10 @@ get_aclinfo (char const *name, struct aclinfo *ai, int flags) + if (ai->size < 0 || aclinfo_has_xattr (ai, XATTR_NAME_SELINUX)) + { + ssize_t r = +- ((flags & ACL_SYMLINK_FOLLOW ? getfilecon : lgetfilecon) +- (name, &ai->scontext)); ++ (fd < 0 ++ ? ((flags & ACL_SYMLINK_FOLLOW ? getfilecon : lgetfilecon) ++ (name, &ai->scontext)) ++ : fgetfilecon (fd, &ai->scontext)); + scontext_err = r < 0 ? errno : 0; + # ifndef SE_SELINUX_INLINE + /* Gnulib's selinux-h module is not in use, so getfilecon and +@@ -362,11 +382,13 @@ acl_nfs4_nontrivial (uint32_t *xattr, ssize_t nbytes) + } + #endif + +-#if (!USE_LINUX_XATTR && USE_ACL && HAVE_ACL_GET_FD \ +- && !HAVE_ACL_EXTENDED_FILE && !HAVE_ACL_TYPE_EXTENDED \ +- && !HAVE_ACL_GET_LINK_NP) +-# include +-# ifdef O_PATH ++#if (!USE_LINUX_XATTR && USE_ACL && !HAVE_ACL_EXTENDED_FILE \ ++ && !HAVE_ACL_TYPE_EXTENDED) ++ ++# if HAVE_ACL_GET_FD && !HAVE_ACL_GET_LINK_NP ++# include ++# ifdef O_PATH ++# define acl_get_fd_np(fd, type) acl_get_fd (fd) + + /* Like acl_get_file, but do not follow symbolic links. */ + static acl_t +@@ -381,8 +403,24 @@ acl_get_link_np (char const *name, acl_type_t type) + errno = err; + return r; + } +-# define HAVE_ACL_GET_LINK_NP 1 ++# define HAVE_ACL_GET_LINK_NP 1 ++# endif + # endif ++ ++static acl_t ++acl_get_fdfile (int fd, char const *name, acl_type_t type, int flags) ++{ ++ acl_t (*get) (char const *, acl_type_t) = acl_get_file; ++# if HAVE_ACL_GET_LINK_NP /* FreeBSD, NetBSD >= 10, Cygwin >= 2.5 */ ++ if (0 <= fd) ++ return acl_get_fd_np (fd, type); ++ if (! (flags & ACL_SYMLINK_FOLLOW)) ++ get = acl_get_link_np; ++# else ++ /* Ignore FD and FLAGS, unfortunately. */ ++# endif ++ return get (name, type); ++} + #endif + + /* Return 1 if NAME has a nontrivial access control list, +@@ -398,14 +436,35 @@ acl_get_link_np (char const *name, acl_type_t type) + If the d_type value is not known, use DT_UNKNOWN though this may be less + efficient. */ + int +-file_has_aclinfo (MAYBE_UNUSED char const *restrict name, ++file_has_aclinfo (char const *restrict name, + struct aclinfo *restrict ai, int flags) ++{ ++ return fdfile_has_aclinfo (-1, name, ai, flags); ++} ++ ++/* Return 1 if FD aka NAME has a nontrivial access control list, ++ 0 if ACLs are not supported, or if NAME has no or only a base ACL, ++ and -1 (setting errno) on error. Note callers can determine ++ if ACLs are not supported as errno is set in that case also. ++ Ignore FD if it is negative. ++ Set *AI to ACL info regardless of return value. ++ FLAGS should be a d_type value, optionally ORed with ++ - _GL_DT_NOTDIR if it is known that NAME is not a directory, ++ - ACL_GET_SCONTEXT to retrieve security context and return 1 if present, ++ - ACL_SYMLINK_FOLLOW to follow the link if NAME is a symbolic link; ++ otherwise do not follow them if possible. ++ If the d_type value is not known, use DT_UNKNOWN though this may be less ++ efficient. */ ++int ++fdfile_has_aclinfo (MAYBE_UNUSED int fd, ++ MAYBE_UNUSED char const *restrict name, ++ struct aclinfo *restrict ai, int flags) + { + MAYBE_UNUSED unsigned char d_type = flags & UCHAR_MAX; + + #if USE_LINUX_XATTR + int initial_errno = errno; +- get_aclinfo (name, ai, flags); ++ get_aclinfo (fd, name, ai, flags); + + if (!aclinfo_may_indicate_xattr (ai) && ai->size <= 0) + { +@@ -418,11 +477,11 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + In earlier Fedora the two types of ACLs were mutually exclusive. + Attempt to work correctly on both kinds of systems. */ + +- if (!has_xattr (XATTR_NAME_NFSV4_ACL, ai, name, flags)) ++ if (!has_xattr (XATTR_NAME_NFSV4_ACL, ai, fd, name, flags)) + return +- (has_xattr (XATTR_NAME_POSIX_ACL_ACCESS, ai, name, flags) ++ (has_xattr (XATTR_NAME_POSIX_ACL_ACCESS, ai, fd, name, flags) + || ((d_type == DT_DIR || d_type == DT_UNKNOWN) +- && has_xattr (XATTR_NAME_POSIX_ACL_DEFAULT, ai, name, flags))); ++ && has_xattr (XATTR_NAME_POSIX_ACL_DEFAULT, ai, fd, name, flags))); + + /* A buffer large enough to hold any trivial NFSv4 ACL. + The max length of a trivial NFSv4 ACL is 6 words for owner, +@@ -432,8 +491,10 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + everyone is another word to hold "EVERYONE@". */ + uint32_t buf[2 * (6 + 6 + 7)]; + +- int ret = ((flags & ACL_SYMLINK_FOLLOW ? getxattr : lgetxattr) +- (name, XATTR_NAME_NFSV4_ACL, buf, sizeof buf)); ++ int ret = (fd < 0 ++ ? ((flags & ACL_SYMLINK_FOLLOW ? getxattr : lgetxattr) ++ (name, XATTR_NAME_NFSV4_ACL, buf, sizeof buf)) ++ : fgetxattr (fd, XATTR_NAME_NFSV4_ACL, buf, sizeof buf)); + if (ret < 0) + switch (errno) + { +@@ -467,20 +528,23 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + /* On Linux, acl_extended_file is an optimized function: It only + makes two calls to getxattr(), one for ACL_TYPE_ACCESS, one for + ACL_TYPE_DEFAULT. */ +- ret = ((flags & ACL_SYMLINK_FOLLOW +- ? acl_extended_file +- : acl_extended_file_nofollow) +- (name)); ++ ret = (fd < 0 ++ ? ((flags & ACL_SYMLINK_FOLLOW ++ ? acl_extended_file ++ : acl_extended_file_nofollow) ++ (name)) ++ : acl_extended_fd (fd)); + # elif HAVE_ACL_TYPE_EXTENDED /* Mac OS X */ + /* On Mac OS X, acl_get_file (name, ACL_TYPE_ACCESS) + and acl_get_file (name, ACL_TYPE_DEFAULT) + always return NULL / EINVAL. There is no point in making + these two useless calls. The real ACL is retrieved through +- acl_get_file (name, ACL_TYPE_EXTENDED). */ +- acl_t acl = ((flags & ACL_SYMLINK_FOLLOW +- ? acl_get_file +- : acl_get_link_np) +- (name, ACL_TYPE_EXTENDED)); ++ ACL_TYPE_EXTENDED. */ ++ acl_t acl = ++ (fd < 0 ++ ? ((flags & ACL_SYMLINK_FOLLOW ? acl_get_file : acl_get_link_np) ++ (name, ACL_TYPE_EXTENDED)) ++ : acl_get_fd_np (fd, ACL_TYPE_EXTENDED)); + if (acl) + { + ret = acl_extended_nontrivial (acl); +@@ -489,13 +553,8 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + else + ret = -1; + # else /* FreeBSD, NetBSD >= 10, IRIX, Tru64, Cygwin >= 2.5 */ +- acl_t (*acl_get_file_or_link) (char const *, acl_type_t) = acl_get_file; +-# if HAVE_ACL_GET_LINK_NP /* FreeBSD, NetBSD >= 10, Cygwin >= 2.5 */ +- if (! (flags & ACL_SYMLINK_FOLLOW)) +- acl_get_file_or_link = acl_get_link_np; +-# endif + +- acl_t acl = acl_get_file_or_link (name, ACL_TYPE_ACCESS); ++ acl_t acl = acl_get_fdfile (fd, name, ACL_TYPE_ACCESS, flags); + if (acl) + { + ret = acl_access_nontrivial (acl); +@@ -517,7 +576,7 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + && (d_type == DT_DIR + || (d_type == DT_UNKNOWN && !(flags & _GL_DT_NOTDIR)))) + { +- acl = acl_get_file_or_link (name, ACL_TYPE_DEFAULT); ++ acl = acl_get_fdfile (fd, name, ACL_TYPE_DEFAULT, flags); + if (acl) + { + # ifdef __CYGWIN__ /* Cygwin >= 2.5 */ +@@ -562,7 +621,10 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + + /* Solaris 10 (newer version), which has additional API declared in + (acl_t) and implemented in libsec (acl_set, acl_trivial, +- acl_fromtext, ...). */ ++ acl_fromtext, ...). ++ ++ Ignore FD, unfortunately. That is better than mishandling ++ ZFS-style ACLs, as the general case code does. */ + return acl_trivial (name); + + # else /* Solaris, Cygwin, general case */ +@@ -586,7 +648,9 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + + for (;;) + { +- count = acl (name, GETACL, alloc, entries); ++ count = (fd < 0 ++ ? acl (name, GETACL, alloc, entries) ++ : facl (fd, GETACL, alloc, entries)); + if (count < 0 && errno == ENOSPC) + { + /* Increase the size of the buffer. */ +@@ -657,7 +721,9 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + + for (;;) + { +- count = acl (name, ACE_GETACL, alloc, entries); ++ count = (fd < 0 ++ ? acl (name, ACE_GETACL, alloc, entries) ++ : facl (fd, ACE_GETACL, alloc, entries)); + if (count < 0 && errno == ENOSPC) + { + /* Increase the size of the buffer. */ +@@ -722,7 +788,9 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + struct acl_entry entries[NACLENTRIES]; + int count; + +- count = getacl (name, NACLENTRIES, entries); ++ count = (fd < 0 ++ ? getacl (name, NACLENTRIES, entries) ++ : fgetacl (fd, NACLENTRIES, entries)); + + if (count < 0) + { +@@ -751,7 +819,8 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + { + struct stat statbuf; + +- if (stat (name, &statbuf) == -1 && errno != EOVERFLOW) ++ if ((fd < 0 ? stat (name, &statbuf) : fstat (fd, &statbuf)) < 0 ++ && errno != EOVERFLOW) + return -1; + + return acl_nontrivial (count, entries); +@@ -765,6 +834,7 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + struct acl entries[NACLVENTRIES]; + int count; + ++ /* Ignore FD, unfortunately. */ + count = acl ((char *) name, ACL_GET, NACLVENTRIES, entries); + + if (count < 0) +@@ -809,7 +879,9 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + /* The docs say that type being 0 is equivalent to ACL_ANY, but it + is not true, in AIX 5.3. */ + type.u64 = ACL_ANY; +- if (aclx_get (name, 0, &type, aclbuf, &aclsize, &mode) >= 0) ++ if (0 <= (fd < 0 ++ ? aclx_get (name, 0, &type, aclbuf, &aclsize, &mode) ++ : aclx_fget (fd, 0, &type, aclbuf, &aclsize, &mode))) + break; + if (errno == ENOSYS) + return 0; +@@ -855,7 +927,10 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + + union { struct acl a; char room[4096]; } u; + +- if (statacl ((char *) name, STX_NORMAL, &u.a, sizeof (u)) < 0) ++ if ((fd < 0 ++ ? statacl ((char *) name, STX_NORMAL, &u.a, sizeof u) ++ : fstatacl (fd, STX_NORMAL, &u.a, sizeof u)) ++ < 0) + return -1; + + return acl_nontrivial (&u.a); +@@ -866,6 +941,7 @@ file_has_aclinfo (MAYBE_UNUSED char const *restrict name, + struct acl entries[NACLENTRIES]; + int count; + ++ /* Ignore FD, unfortunately. */ + count = acl ((char *) name, ACL_GET, NACLENTRIES, entries); + + if (count < 0) +diff --git a/lib/qcopy-acl.c b/lib/qcopy-acl.c +index ad79661..282f4b2 100644 +--- a/lib/qcopy-acl.c ++++ b/lib/qcopy-acl.c +@@ -26,6 +26,7 @@ + #if USE_XATTR + + # include ++# include + # include + + # if HAVE_LINUX_XATTR_H +@@ -61,6 +62,7 @@ is_attr_permissions (const char *name, struct error_context *ctx) + a valid file descriptor, use file descriptor operations, else use + filename based operations on SRC_NAME. Likewise for DEST_DESC and + DST_NAME. ++ MODE should be the source file's st_mode. + If access control lists are not available, fchmod the target file to + MODE. Also sets the non-permission bits of the destination file + (S_ISUID, S_ISGID, S_ISVTX) to those from MODE if any are set. +@@ -86,10 +88,29 @@ qcopy_acl (const char *src_name, int source_desc, const char *dst_name, + Functions attr_copy_* return 0 in case we copied something OR nothing + to copy */ + if (ret == 0) +- ret = source_desc <= 0 || dest_desc <= 0 +- ? attr_copy_file (src_name, dst_name, is_attr_permissions, NULL) +- : attr_copy_fd (src_name, source_desc, dst_name, dest_desc, +- is_attr_permissions, NULL); ++ { ++ ret = source_desc <= 0 || dest_desc <= 0 ++ ? attr_copy_file (src_name, dst_name, is_attr_permissions, NULL) ++ : attr_copy_fd (src_name, source_desc, dst_name, dest_desc, ++ is_attr_permissions, NULL); ++ ++ /* Copying can fail with EOPNOTSUPP even when the source ++ permissions are trivial (Bug#78328). Don't report an error ++ in this case, as the chmod_or_fchmod suffices. */ ++ if (ret < 0 && errno == EOPNOTSUPP) ++ { ++ /* fdfile_has_aclinfo cares only about DT_DIR, _GL_DT_NOTDIR, ++ and DT_LNK (but DT_LNK is not possible here), ++ so use _GL_DT_NOTDIR | DT_UNKNOWN for other file types. */ ++ int flags = S_ISDIR (mode) ? DT_DIR : _GL_DT_NOTDIR | DT_UNKNOWN; ++ ++ struct aclinfo ai; ++ if (!fdfile_has_aclinfo (source_desc, src_name, &ai, flags)) ++ ret = 0; ++ aclinfo_free (&ai); ++ errno = EOPNOTSUPP; ++ } ++ } + #else + /* no XATTR, so we proceed the old dusty way */ + struct permission_context ctx; +-- +2.49.0 + + +From ed2bda5888829f4ebacd6dc9c86b7494dbf2a3b7 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Fri, 9 May 2025 18:48:03 -0700 +Subject: [PATCH 2/2] acl-tests: link with $(FILE_HAS_ACL_LIB) + +* modules/acl-tests (test_copy_acl_LDADD): Add +$(FILE_HAS_ACL_LIB), since qcopy-acl depends on file-has-acl. +Although this suggests that QCOPY_ACL_LIB should contain +FILE_HAS_ACL_LIB, I’m not sure whether that’s the right course of +action and anyway this is good enough for coreutils. + +(cherry picked from commit 955360a66c99bdd9ac3688519a8b521b06958fd3) +--- + gnulib-tests/gnulib.mk | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gnulib-tests/gnulib.mk b/gnulib-tests/gnulib.mk +index e222c63..4b78de4 100644 +--- a/gnulib-tests/gnulib.mk ++++ b/gnulib-tests/gnulib.mk +@@ -99,7 +99,7 @@ TESTS += \ + TESTS_ENVIRONMENT += USE_ACL=$(USE_ACL) + check_PROGRAMS += test-set-mode-acl test-copy-acl test-sameacls + test_set_mode_acl_LDADD = $(LDADD) $(LIB_ACL) $(LIBUNISTRING) @LIBINTL@ $(MBRTOWC_LIB) $(LIBC32CONV) +-test_copy_acl_LDADD = $(LDADD) $(LIB_ACL) $(QCOPY_ACL_LIB) $(LIBUNISTRING) @LIBINTL@ $(MBRTOWC_LIB) $(LIBC32CONV) ++test_copy_acl_LDADD = $(LDADD) $(LIB_ACL) $(QCOPY_ACL_LIB) $(FILE_HAS_ACL_LIB) $(LIBUNISTRING) @LIBINTL@ $(MBRTOWC_LIB) $(LIBC32CONV) + test_sameacls_LDADD = $(LDADD) $(LIB_ACL) @LIBINTL@ $(MBRTOWC_LIB) + EXTRA_DIST += test-set-mode-acl.sh test-set-mode-acl-1.sh test-set-mode-acl-2.sh test-copy-acl.sh test-copy-acl-1.sh test-copy-acl-2.sh test-set-mode-acl.c test-copy-acl.c test-sameacls.c macros.h + +-- +2.49.0 + diff --git a/coreutils-9.7-stty-arbitrary-baud-rates.patch b/coreutils-9.7-stty-arbitrary-baud-rates.patch new file mode 100644 index 0000000..20d078b --- /dev/null +++ b/coreutils-9.7-stty-arbitrary-baud-rates.patch @@ -0,0 +1,669 @@ +From 8e5ee22042931bdac6488d61c5d59bcd1b0dba5f Mon Sep 17 00:00:00 2001 +From: "H. Peter Anvin" +Date: Mon, 16 Jun 2025 14:58:01 -0700 +Subject: [PATCH 1/5] stty: arbitrary or non-a priori known speed_t support + +Support the case where speed_t is simply a number, and in that case +assume that arbitrary values can be passed. This is assumed to be the +case when all known speed_t macros equal their own value. + +Try to probe for a variety of speed_t constants by trying to coax +$(CC) into emitting macro definitions (-E -dM). If this is not +supported, use a fairly extensive list of constants as a +fallback. This both improves the test for arbitrary speed support, as +well as allowing proper operation in the case where the constants are +not plain numbers and allows for handing enumerated speed constants +that were not known a priori when the source code was written. + +A simple shell script (mostly using sed) is used to turn the list of +constants (probed and predefined) into a pair of conversion functions, +baud_to_value() and value_to_baud(); string_to_baud() is then +reimplemented as a wrapper around the latter. + +* src/local.mk: Generate speedlist.h. +* src/speedgen: Portable shell script to generate speedlist.h. +* src/stty.c: Adjust string_to_baud to +convert from arbitrary numeric values. +* src/termios.c: A helper used when generating speedlist.h + +@lzaoral: This patch was amended to fix build failure in the +"single-binary" mode. + +(cherry picked from commit 357fda90d15fd3f7dba61e1ab322b183a48d0081) +--- + src/local.mk | 15 +++++- + src/speedgen | 85 ++++++++++++++++++++++++++++++ + src/stty.c | 143 +++++++++++++++++++------------------------------- + src/termios.c | 34 ++++++++++++ + 4 files changed, 188 insertions(+), 89 deletions(-) + create mode 100755 src/speedgen + create mode 100644 src/termios.c + +diff --git a/src/local.mk b/src/local.mk +index fd9dc81..3b8a34e 100644 +--- a/src/local.mk ++++ b/src/local.mk +@@ -71,7 +71,8 @@ EXTRA_DIST += \ + src/dircolors.hin \ + src/primes.h \ + src/tac-pipe.c \ +- src/extract-magic ++ src/extract-magic \ ++ src/speedgen + + CLEANFILES += $(SCRIPTS) + +@@ -672,6 +673,18 @@ src/version.h: Makefile + $(AM_V_at)chmod a-w $@t + $(AM_V_at)mv $@t $@ + ++# Target-specific termios baud rate file. This is opportunistic; ++# if cc -E doesn't support -dM, the speedgen script still includes ++# an extensive fallback list of common constants. ++BUILT_SOURCES += src/speedlist.h ++src/speedlist.h: src/termios.c lib/config.h src/speedgen ++ $(AM_V_GEN)rm -f $@ ++ $(AM_V_at)${MKDIR_P} src ++ $(AM_V_at)$(COMPILE) -E -dM $< 2>/dev/null | \ ++ $(SHELL) $(srcdir)/src/speedgen $@t ++ $(AM_V_at)chmod a-w $@t ++ $(AM_V_at)mv $@t $@ ++ + # Generates a list of macro invocations like: + # SINGLE_BINARY_PROGRAM(program_name_str, main_name) + # once for each program list on $(single_binary_progs). Note that +diff --git a/src/speedgen b/src/speedgen +new file mode 100755 +index 0000000..f1647d9 +--- /dev/null ++++ b/src/speedgen +@@ -0,0 +1,85 @@ ++#!/bin/sh -e ++ ++out="$1" ++tmp="$out.tmp" ++ ++if [ -z "$out" ]; then ++ echo "Usage: $0 outfile" 2>&1 ++ exit 1 ++fi ++ ++s='[[:space:]]' # For brevity's sake ++ ++trap "rm -f '$tmp'" EXIT ++trap "rm -f '$tmp' '$out'" ERR HUP INT QUIT TERM ++ ++# Fallback list of speeds that are always tested for ++defspeeds="0 50 75 110 134 150 200 300 600 1200 1800 2400 4800 7200 9600 \ ++14400 19200 28800 33600 38400 57600 76800 115200 153600 230400 307200 \ ++460800 500000 576000 614400 921600 1000000 1152000 1500000 \ ++2000000 2500000 3000000 3500000 4000000 5000000 10000000" ++( ++ sed -n -e "s/^$s*\#$s*define$s$s*B\\([1-9][0-9]*\\)$s.*\$/\\1/p" ++ for s in $defspeeds; do echo "$s"; done ++) | sort -n | uniq > "$tmp" ++ ++cat > "$out" <<'EOF' ++#ifndef SPEEDLIST_H ++# define SPEEDLIST_H 1 ++ ++# if 1 \ ++EOF ++ ++sed -e 's/^.*$/ \&\& (!defined(B&) || B& == &) \\/' < "$tmp" >> "$out" ++ ++cat >> "$out" <<'EOF' ++ ++# define TERMIOS_SPEED_T_SANE 1 ++ ++# endif ++ ++ATTRIBUTE_CONST ++static unsigned long int ++baud_to_value (speed_t speed) ++{ ++# ifdef TERMIOS_SPEED_T_SANE ++ return speed; ++# else ++ switch (speed) ++ { ++EOF ++ ++sed -e 's/^.*$/# ifdef B&\n case B&: return &;\n# endif/' \ ++ < "$tmp" >> "$out" ++ ++cat >> "$out" <<'EOF' ++ default: return -1; ++ } ++# endif ++} ++ ++ATTRIBUTE_CONST ++static speed_t ++value_to_baud (unsigned long int value) ++{ ++# ifdef TERMIOS_SPEED_T_SANE ++ speed_t speed = value; ++ if (speed != value) ++ speed = (speed_t) -1; /* Unrepresentable (overflow?) */ ++ return speed; ++# else ++ switch (value) ++ { ++EOF ++ ++sed -e 's/^.*$/# ifdef B&\n case &: return B&;\n# endif/' \ ++ < "$tmp" >> "$out" ++ ++cat >> "$out" <<'EOF' ++ default: return (speed_t) -1; ++ } ++# endif ++} ++ ++#endif ++EOF +diff --git a/src/stty.c b/src/stty.c +index 133b33c..561de1c 100644 +--- a/src/stty.c ++++ b/src/stty.c +@@ -55,6 +55,7 @@ + + #include "system.h" + #include "assure.h" ++#include "c-ctype.h" + #include "fd-reopen.h" + #include "quote.h" + #include "xdectoint.h" +@@ -2172,100 +2173,66 @@ recover_mode (char const *arg, struct termios *mode) + return true; + } + +-struct speed_map +-{ +- char const *string; /* ASCII representation. */ +- speed_t speed; /* Internal form. */ +- unsigned long int value; /* Numeric value. */ +-}; +- +-static struct speed_map const speeds[] = +-{ +- {"0", B0, 0}, +- {"50", B50, 50}, +- {"75", B75, 75}, +- {"110", B110, 110}, +- {"134", B134, 134}, +- {"134.5", B134, 134}, +- {"150", B150, 150}, +- {"200", B200, 200}, +- {"300", B300, 300}, +- {"600", B600, 600}, +- {"1200", B1200, 1200}, +- {"1800", B1800, 1800}, +- {"2400", B2400, 2400}, +- {"4800", B4800, 4800}, +- {"9600", B9600, 9600}, +- {"19200", B19200, 19200}, +- {"38400", B38400, 38400}, +- {"exta", B19200, 19200}, +- {"extb", B38400, 38400}, +-#ifdef B57600 +- {"57600", B57600, 57600}, +-#endif +-#ifdef B115200 +- {"115200", B115200, 115200}, +-#endif +-#ifdef B230400 +- {"230400", B230400, 230400}, +-#endif +-#ifdef B460800 +- {"460800", B460800, 460800}, +-#endif +-#ifdef B500000 +- {"500000", B500000, 500000}, +-#endif +-#ifdef B576000 +- {"576000", B576000, 576000}, +-#endif +-#ifdef B921600 +- {"921600", B921600, 921600}, +-#endif +-#ifdef B1000000 +- {"1000000", B1000000, 1000000}, +-#endif +-#ifdef B1152000 +- {"1152000", B1152000, 1152000}, +-#endif +-#ifdef B1500000 +- {"1500000", B1500000, 1500000}, +-#endif +-#ifdef B2000000 +- {"2000000", B2000000, 2000000}, +-#endif +-#ifdef B2500000 +- {"2500000", B2500000, 2500000}, +-#endif +-#ifdef B3000000 +- {"3000000", B3000000, 3000000}, +-#endif +-#ifdef B3500000 +- {"3500000", B3500000, 3500000}, +-#endif +-#ifdef B4000000 +- {"4000000", B4000000, 4000000}, +-#endif +- {nullptr, 0, 0} +-}; ++/* Autogenerated conversion functions to/from speed_t */ ++#include "speedlist.h" + + ATTRIBUTE_PURE + static speed_t + string_to_baud (char const *arg) + { +- for (int i = 0; speeds[i].string != nullptr; ++i) +- if (STREQ (arg, speeds[i].string)) +- return speeds[i].speed; +- return (speed_t) -1; +-} ++ char *ep; ++ unsigned long value; ++ unsigned char c; + +-ATTRIBUTE_PURE +-static unsigned long int +-baud_to_value (speed_t speed) +-{ +- for (int i = 0; speeds[i].string != nullptr; ++i) +- if (speed == speeds[i].speed) +- return speeds[i].value; +- return 0; ++ /* Explicitly disallow negative numbers. */ ++ while (c_isspace (*arg)) ++ arg++; ++ if (*arg == '-') ++ return (speed_t) -1; ++ ++ value = strtoul (arg, &ep, 10); ++ ++ c = *ep++; ++ if (c == '.') ++ { ++ /* Number includes a fraction. Round it to nearest-even. ++ Note in particular that 134.5 must round to 134! */ ++ c = *ep++; ++ if (c) ++ { ++ c -= '0'; ++ if (c > 9) ++ { ++ return (speed_t) -1; /* Garbage after otherwise valid number */ ++ } ++ else if (c > 5) ++ { ++ value++; ++ } ++ else if (c == 5) ++ { ++ while ((c = *ep++) == '0') ++ ; /* Skip zeroes after .5 */ ++ ++ if (c >= '1' && c <= '9') ++ value++; /* Nonzero digit, round up */ ++ else ++ value += (value & 1); /* Exactly in the middle, round even */ ++ } ++ } ++ } ++ else if (c) ++ { ++ /* Not a valid number; check for legacy aliases "exta" and "extb" */ ++ if (STREQ (arg, "exta")) ++ return B19200; ++ else if (STREQ (arg, "extb")) ++ return B38400; ++ else ++ return (speed_t) -1; ++ } ++ ++ return value_to_baud (value); + } + + static void +diff --git a/src/termios.c b/src/termios.c +new file mode 100644 +index 0000000..f17e12e +--- /dev/null ++++ b/src/termios.c +@@ -0,0 +1,34 @@ ++/* termios.c -- coax out Bxxx macros from termios.h ++ ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation, either version 3 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ ++ ++/* This simply #includes headers which may or may not provide Bxxx ++ constant macros. This is run through the C preprocessor and defined ++ macros are extracted. ++ ++ In the case where the C preprocessor isn't capable of doing so, ++ the script this is fed through contains a pre-defined set of common ++ constants. */ ++ ++#include ++ ++#ifdef TERMIOS_NEEDS_XOPEN_SOURCE ++# define _XOPEN_SOURCE ++#endif ++ ++#include ++#include ++#include +-- +2.50.0 + + +From 60c9206391e2fac32639d3a143435d1dd9ec6421 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?P=C3=A1draig=20Brady?= +Date: Tue, 17 Jun 2025 23:32:05 +0100 +Subject: [PATCH 2/5] tests: stty: adjust tests for arbitary speeds + +* tests/stty/stty-invalid.sh: Adjust to what is now invalid. +* tests/stty/stty.sh: Add checks for valid speed variants. +* tests/stty/bad-speed.sh: New test to ensure unsupported speeds +are diagnosed. + +(cherry picked from commit efaec8078142996d958b6720b85a13b12497c3d0) +--- + tests/local.mk | 1 + + tests/stty/bad-speed.sh | 50 ++++++++++++++++++++++++++++++++++++++ + tests/stty/stty-invalid.sh | 10 ++++++-- + tests/stty/stty.sh | 10 ++++++++ + 4 files changed, 69 insertions(+), 2 deletions(-) + create mode 100755 tests/stty/bad-speed.sh + +diff --git a/tests/local.mk b/tests/local.mk +index 642d225..b68df41 100644 +--- a/tests/local.mk ++++ b/tests/local.mk +@@ -425,6 +425,7 @@ all_tests = \ + tests/stat/stat-printf.pl \ + tests/stat/stat-slash.sh \ + tests/misc/stdbuf.sh \ ++ tests/stty/bad-speed.sh \ + tests/stty/stty.sh \ + tests/stty/stty-invalid.sh \ + tests/stty/stty-pairs.sh \ +diff --git a/tests/stty/bad-speed.sh b/tests/stty/bad-speed.sh +new file mode 100755 +index 0000000..d80d2e7 +--- /dev/null ++++ b/tests/stty/bad-speed.sh +@@ -0,0 +1,50 @@ ++#!/bin/sh ++# Ensure we handle cfsetispeed failing ++# which we did not before coreutils v9.1 ++ ++# Copyright (C) 2025 Free Software Foundation, Inc. ++ ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation, either version 3 of the License, or ++# (at your option) any later version. ++ ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++ ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src ++print_ver_ stty ++require_gcc_shared_ ++ ++# Replace each cfsetispeed call with a call to these stubs. ++cat > k.c <<'EOF' || framework_failure_ ++#include ++#include ++#include ++#include ++ ++int cfsetispeed(struct termios *termios_p, speed_t speed) ++{ ++ /* Leave a marker so we can identify if the function was intercepted. */ ++ fclose(fopen("preloaded", "w")); ++ ++ errno=EINVAL; ++ return -1; ++} ++EOF ++ ++# Then compile/link it: ++gcc_shared_ k.c k.so \ ++ || skip_ 'failed to build shared library' ++ ++( export LD_PRELOAD=$LD_PRELOAD:./k.so ++ returns_ 1 stty ispeed 9600 ) || fail=1 ++ ++test -e preloaded || skip_ 'LD_PRELOAD interception failed' ++ ++Exit $fail +diff --git a/tests/stty/stty-invalid.sh b/tests/stty/stty-invalid.sh +index 4b87e2a..a1442a8 100755 +--- a/tests/stty/stty-invalid.sh ++++ b/tests/stty/stty-invalid.sh +@@ -20,6 +20,7 @@ + print_ver_ stty + require_controlling_input_terminal_ + require_trap_signame_ ++getlimits_ + + trap '' TTOU # Ignore SIGTTOU + +@@ -50,8 +51,13 @@ if tty -s = 9.8 supports arbitrary speeds on some systems ++# so restrict tests here to invalid numbers ++# We simulate unsupported numbers in a separate "LD_PRELOAD" test. ++WRAP_9600="$(expr $ULONG_OFLOW - 9600)" ++for speed in 9600.. ++9600 -$WRAP_9600 --$WRAP_9600 0x2580 96E2; do ++ returns_ 1 stty ispeed "$speed" || fail=1 ++done + + # Just in case either of the above mistakenly succeeds (and changes + # the state of our tty), try to restore the initial state. +diff --git a/tests/stty/stty.sh b/tests/stty/stty.sh +index dab4cd0..c0f7494 100755 +--- a/tests/stty/stty.sh ++++ b/tests/stty/stty.sh +@@ -95,4 +95,14 @@ for W in $(seq 80 90); do + test "$output_width" -le "$W" || fail=1 + done + ++# Ensure we support varied numeric forms ++# with appropriate rounding ++if stty ispeed '9600'; then ++ stty ispeed ' +9600' || fail=1 ++ stty ispeed '9600.49' || fail=1 ++ stty ispeed '9600.50' || fail=1 ++ stty ispeed '9599.51' || fail=1 ++ stty ispeed ' 9600.' || fail=1 ++fi ++ + Exit $fail +-- +2.50.0 + + +From a47c15eea3ffe08662415ae13873b40c7ffcdb43 Mon Sep 17 00:00:00 2001 +From: Collin Funk +Date: Sat, 21 Jun 2025 22:05:19 -0700 +Subject: [PATCH 3/5] build: add src/termios.c to the tarball + +* src/local.mk (EXTRA_DIST): Add src/termios.c. + +(cherry picked from commit b7db7757831e93ca44ae59e1921bc4ebbc87974f) +--- + src/local.mk | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/local.mk b/src/local.mk +index 3b8a34e..188dda1 100644 +--- a/src/local.mk ++++ b/src/local.mk +@@ -72,7 +72,8 @@ EXTRA_DIST += \ + src/primes.h \ + src/tac-pipe.c \ + src/extract-magic \ +- src/speedgen ++ src/speedgen \ ++ src/termios.c + + CLEANFILES += $(SCRIPTS) + +-- +2.50.0 + + +From caa439bf750193bcbed215a6676053f0b3c96e21 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?P=C3=A1draig=20Brady?= +Date: Sun, 22 Jun 2025 15:01:21 +0100 +Subject: [PATCH 4/5] doc: stty: adjust description of supported speeds + +* doc/coreutils.texi (stty invocation): Remove now imprecise +list of speeds given we may now support higher or arbitrary speeds. +Mention that we may support higher or arbitrary speeds. + +(cherry picked from commit 8b05eca972f70858749a946ac24f08d0718c1be6) +--- + doc/coreutils.texi | 21 ++------------------- + 1 file changed, 2 insertions(+), 19 deletions(-) + +diff --git a/doc/coreutils.texi b/doc/coreutils.texi +index 6d1ee11..c04af2b 100644 +--- a/doc/coreutils.texi ++++ b/doc/coreutils.texi +@@ -15932,25 +15932,8 @@ Print the terminal speed. + Set the input and output speeds to @var{n}. @var{n} can be one of: 0 + 50 75 110 134 134.5 150 200 300 600 1200 1800 2400 4800 9600 19200 + 38400 @code{exta} @code{extb}. @code{exta} is the same as 19200; +-@code{extb} is the same as 38400. Many systems, including GNU/Linux, +-support higher speeds. The @command{stty} command includes support +-for speeds of +-57600, +-115200, +-230400, +-460800, +-500000, +-576000, +-921600, +-1000000, +-1152000, +-1500000, +-2000000, +-2500000, +-3000000, +-3500000, +-or +-4000000 where the system supports these. ++@code{extb} is the same as 38400. Many systems, support arbitrary ++or higher speeds. + 0 hangs up the line if @option{-clocal} is set. + @end table + +-- +2.50.0 + + +From 8e48d56c2aa10f9875ffe1ec051a17f0eab6d2f9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?P=C3=A1draig=20Brady?= +Date: Sun, 22 Jun 2025 16:40:04 +0100 +Subject: [PATCH 5/5] stty: stricter floating point parsing + +* src/stty.c (string_to_baud): Disallow extraneous characters +after floating point numbers. +* tests/stty/stty-invalid.sh: Add test cases. + +(cherry picked from commit 3d35b3c0e56bd556c90dc98c3e5e2e7289b0eb0d) +--- + src/stty.c | 27 +++++++++++++-------------- + tests/stty/stty-invalid.sh | 3 ++- + 2 files changed, 15 insertions(+), 15 deletions(-) + +diff --git a/src/stty.c b/src/stty.c +index 561de1c..0163ea4 100644 +--- a/src/stty.c ++++ b/src/stty.c +@@ -2200,25 +2200,24 @@ string_to_baud (char const *arg) + c = *ep++; + if (c) + { +- c -= '0'; +- if (c > 9) ++ unsigned char d = c - '0'; ++ if (d > 5) ++ value++; ++ else if (d == 5) + { +- return (speed_t) -1; /* Garbage after otherwise valid number */ +- } +- else if (c > 5) +- { +- value++; +- } +- else if (c == 5) +- { +- while ((c = *ep++) == '0') +- ; /* Skip zeroes after .5 */ ++ while ((c = *ep++) == '0'); /* Skip zeroes after .5 */ + +- if (c >= '1' && c <= '9') +- value++; /* Nonzero digit, round up */ ++ if (c) ++ value++; /* Nonzero, round up */ + else + value += (value & 1); /* Exactly in the middle, round even */ + } ++ ++ while (c_isdigit (c)) /* Skip remaining digits. */ ++ c = *ep++; ++ ++ if (c) ++ return (speed_t) -1; /* Garbage after otherwise valid number */ + } + } + else if (c) +diff --git a/tests/stty/stty-invalid.sh b/tests/stty/stty-invalid.sh +index a1442a8..868ed1d 100755 +--- a/tests/stty/stty-invalid.sh ++++ b/tests/stty/stty-invalid.sh +@@ -55,7 +55,8 @@ fi + # so restrict tests here to invalid numbers + # We simulate unsupported numbers in a separate "LD_PRELOAD" test. + WRAP_9600="$(expr $ULONG_OFLOW - 9600)" +-for speed in 9600.. ++9600 -$WRAP_9600 --$WRAP_9600 0x2580 96E2; do ++for speed in 9599.. 9600.. 9600.5. 9600.50. 9600.0. ++9600 \ ++ -$WRAP_9600 --$WRAP_9600 0x2580 96E2 9600,0 '9600.0 '; do + returns_ 1 stty ispeed "$speed" || fail=1 + done + +-- +2.50.0 + diff --git a/coreutils-9.9-fix-cut-test-aarch64.patch b/coreutils-9.9-fix-cut-test-aarch64.patch deleted file mode 100644 index 600f87b..0000000 --- a/coreutils-9.9-fix-cut-test-aarch64.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 95044cb5eaea83d02f768feb5ab79fcf5e6ad782 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?P=C3=A1draig=20Brady?= -Date: Mon, 22 Dec 2025 17:12:48 +0000 -Subject: [PATCH] tests: avoid false failure due to ulimit on aarch64 - -* tests/cut/cut-huge-range.sh: Add an extra 1MiB headroom, -which was seen with aarch64. -Reported at https://bugzilla.redhat.com/2424302 - -Cherry-picked-by: Lukáš Zaoral -Upstream-commit: 95044cb5eaea83d02f768feb5ab79fcf5e6ad782 ---- - tests/cut/cut-huge-range.sh | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tests/cut/cut-huge-range.sh b/tests/cut/cut-huge-range.sh -index 4bd1b129d8..98d7e8f0b9 100755 ---- a/tests/cut/cut-huge-range.sh -+++ b/tests/cut/cut-huge-range.sh -@@ -22,6 +22,7 @@ getlimits_ - - vm=$(get_min_ulimit_v_ returns_ 0 cut -b1 /dev/null) \ - || skip_ 'shell lacks ulimit, or ASAN enabled' -+vm=$(($vm+1000)) # https://bugzilla.redhat.com/2424302 - - # Ensure we can cut up to our sentinel value. - # Don't use expr to subtract one, - diff --git a/coreutils-9.9-gnulib-c23.patch b/coreutils-9.9-gnulib-c23.patch deleted file mode 100644 index 82e3899..0000000 --- a/coreutils-9.9-gnulib-c23.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 891761bca1aa78336e5b18c121075b6e4696c5d4 Mon Sep 17 00:00:00 2001 -From: Paul Eggert -Date: Sun, 23 Nov 2025 00:50:40 -0800 -Subject: [PATCH] Port to C23 qualifier-generic fns like strchr -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This ports Gnulib to strict C23 platforms that reject code -like ‘char *q = strchr (P, 'x');’ when P is a pointer to const, -because in C23 strchr is a qualifier-generic function so -strchr (P, 'x') returns char const *. -This patch does not attempt to do the following two things, -which might be useful in the future: -1. When compiling on non-C23 platforms, check user code for -portability to platforms that define qualifier-generic functions. -2. Port Gnulib to platforms that have qualifier-generic functions -not listed in the C23 standard, e.g., strchrnul. I don’t know -of any such platforms. -* lib/mbschr.c (mbschr): -* lib/memchr2.c (memchr2): -Port to C23, where functions like strchr are qualifier-generic. -* lib/c++defs.h (_GL_FUNCDECL_SYS_NAME): New macro. -* lib/c++defs.h (_GL_FUNCDECL_SYS): -* lib/stdlib.in.h (bsearch): -Use it, to prevent C23 names like strchr from acting like macros. -* lib/string.in.h (memchr, strchr, strpbrk, strrchr): -Do not #undef when GNULIB_POSIXCHECK is defined, as this could -cause conforming C23 code to fail to conform. It’s not clear why -_GL_WARN_ON_USE_CXX; perhaps it was needed but isn’t any more? -But for now, limit the removal of #undef to these four functions -where #undeffing is clearly undesirable in C23. -* lib/wchar.in.h (wmemchr): Parenthesize function name in decl, -to prevent it from acting like a macro. - -Cherry-picked-by: Lukáš Zaoral -Upstream-commit: df17f4f37ed3ca373d23ad42eae51122bdb96626 ---- - lib/c++defs.h | 12 +++++++++++- - lib/mbschr.c | 2 +- - lib/memchr2.c | 2 +- - lib/stdlib.in.h | 6 +++--- - lib/string.in.h | 4 ---- - lib/wchar.in.h | 2 +- - 6 files changed, 17 insertions(+), 11 deletions(-) - -diff --git a/lib/c++defs.h b/lib/c++defs.h -index b77979a..7384457 100644 ---- a/lib/c++defs.h -+++ b/lib/c++defs.h -@@ -127,6 +127,16 @@ - #define _GL_FUNCDECL_RPL_1(rpl_func,rettype,parameters,...) \ - _GL_EXTERN_C_FUNC __VA_ARGS__ rettype rpl_func parameters - -+/* _GL_FUNCDECL_SYS_NAME (func) expands to plain func if C++, and to -+ parenthsized func otherwise. Parenthesization is needed in C23 if -+ the function is like strchr and so is a qualifier-generic macro -+ that expands to something more complicated. */ -+#ifdef __cplusplus -+# define _GL_FUNCDECL_SYS_NAME(func) func -+#else -+# define _GL_FUNCDECL_SYS_NAME(func) (func) -+#endif -+ - /* _GL_FUNCDECL_SYS (func, rettype, parameters, [attributes]); - declares the system function, named func, with the given prototype, - consisting of return type, parameters, and attributes. -@@ -139,7 +149,7 @@ - _GL_FUNCDECL_SYS (posix_openpt, int, (int flags), _GL_ATTRIBUTE_NODISCARD); - */ - #define _GL_FUNCDECL_SYS(func,rettype,parameters,...) \ -- _GL_EXTERN_C_FUNC __VA_ARGS__ rettype func parameters -+ _GL_EXTERN_C_FUNC __VA_ARGS__ rettype _GL_FUNCDECL_SYS_NAME (func) parameters - - /* _GL_CXXALIAS_RPL (func, rettype, parameters); - declares a C++ alias called GNULIB_NAMESPACE::func -diff --git a/lib/mbschr.c b/lib/mbschr.c -index c9e14b5..6582134 100644 ---- a/lib/mbschr.c -+++ b/lib/mbschr.c -@@ -65,5 +65,5 @@ mbschr (const char *string, int c) - return NULL; - } - else -- return strchr (string, c); -+ return (char *) strchr (string, c); - } -diff --git a/lib/memchr2.c b/lib/memchr2.c -index 7493823..d7724ae 100644 ---- a/lib/memchr2.c -+++ b/lib/memchr2.c -@@ -55,7 +55,7 @@ memchr2 (void const *s, int c1_in, int c2_in, size_t n) - c2 = (unsigned char) c2_in; - - if (c1 == c2) -- return memchr (s, c1, n); -+ return (void *) memchr (s, c1, n); - - /* Handle the first few bytes by reading one byte at a time. - Do this until VOID_PTR is aligned on a longword boundary. */ -diff --git a/lib/stdlib.in.h b/lib/stdlib.in.h -index bef0aaa..fd0e1e0 100644 ---- a/lib/stdlib.in.h -+++ b/lib/stdlib.in.h -@@ -224,9 +224,9 @@ _GL_INLINE_HEADER_BEGIN - - /* Declarations for ISO C N3322. */ - #if defined __GNUC__ && __GNUC__ >= 15 && !defined __clang__ --_GL_EXTERN_C void *bsearch (const void *__key, -- const void *__base, size_t __nmemb, size_t __size, -- int (*__compare) (const void *, const void *)) -+_GL_EXTERN_C void *_GL_FUNCDECL_SYS_NAME (bsearch) -+ (const void *__key, const void *__base, size_t __nmemb, size_t __size, -+ int (*__compare) (const void *, const void *)) - _GL_ATTRIBUTE_NONNULL_IF_NONZERO (2, 3) _GL_ARG_NONNULL ((5)); - _GL_EXTERN_C void qsort (void *__base, size_t __nmemb, size_t __size, - int (*__compare) (const void *, const void *)) -diff --git a/lib/string.in.h b/lib/string.in.h -index fdcdd21..8b56acf 100644 ---- a/lib/string.in.h -+++ b/lib/string.in.h -@@ -409,7 +409,6 @@ _GL_CXXALIASWARN1 (memchr, void const *, - _GL_CXXALIASWARN (memchr); - # endif - #elif defined GNULIB_POSIXCHECK --# undef memchr - /* Assume memchr is always declared. */ - _GL_WARN_ON_USE (memchr, "memchr has platform-specific bugs - " - "use gnulib module memchr for portability" ); -@@ -674,7 +673,6 @@ _GL_WARN_ON_USE (stpncpy, "stpncpy is unportable - " - #if defined GNULIB_POSIXCHECK - /* strchr() does not work with multibyte strings if the locale encoding is - GB18030 and the character to be searched is a digit. */ --# undef strchr - /* Assume strchr is always declared. */ - _GL_WARN_ON_USE_CXX (strchr, - const char *, char *, (const char *, int), -@@ -981,7 +979,6 @@ _GL_CXXALIASWARN (strpbrk); - Even in this simple case, it does not work with multibyte strings if the - locale encoding is GB18030 and one of the characters to be searched is a - digit. */ --# undef strpbrk - _GL_WARN_ON_USE_CXX (strpbrk, - const char *, char *, (const char *, const char *), - "strpbrk cannot work correctly on character strings " -@@ -1011,7 +1008,6 @@ _GL_WARN_ON_USE (strspn, "strspn cannot work correctly on character strings " - #if defined GNULIB_POSIXCHECK - /* strrchr() does not work with multibyte strings if the locale encoding is - GB18030 and the character to be searched is a digit. */ --# undef strrchr - /* Assume strrchr is always declared. */ - _GL_WARN_ON_USE_CXX (strrchr, - const char *, char *, (const char *, int), -diff --git a/lib/wchar.in.h b/lib/wchar.in.h -index ab602a2..6be4515 100644 ---- a/lib/wchar.in.h -+++ b/lib/wchar.in.h -@@ -301,7 +301,7 @@ _GL_EXTERN_C int wcsncmp (const wchar_t *__s1, const wchar_t *__s2, size_t __n) - _GL_ATTRIBUTE_NONNULL_IF_NONZERO (1, 3) - _GL_ATTRIBUTE_NONNULL_IF_NONZERO (2, 3); - # ifndef __cplusplus --_GL_EXTERN_C wchar_t *wmemchr (const wchar_t *__s, wchar_t __wc, size_t __n) -+_GL_EXTERN_C wchar_t *(wmemchr) (const wchar_t *__s, wchar_t __wc, size_t __n) - _GL_ATTRIBUTE_NONNULL_IF_NONZERO (1, 3); - # endif - _GL_EXTERN_C wchar_t *wmemset (wchar_t *__s, wchar_t __wc, size_t __n) --- -2.52.0 - diff --git a/coreutils-CVE-2025-5278.patch b/coreutils-CVE-2025-5278.patch new file mode 100644 index 0000000..af81286 --- /dev/null +++ b/coreutils-CVE-2025-5278.patch @@ -0,0 +1,107 @@ +From 701a9bdbf78f869e0fb778ed5aede00e42517add Mon Sep 17 00:00:00 2001 +From: Pádraig Brady +Date: Tue, 20 May 2025 16:03:44 +0100 +Subject: [PATCH] sort: fix buffer under-read (CWE-127) + +* src/sort.c (begfield): Check pointer adjustment +to avoid Out-of-range pointer offset (CWE-823). +(limfield): Likewise. +* tests/sort/sort-field-limit.sh: Add a new test, +which triggers with ASAN or Valgrind. +* tests/local.mk: Reference the new test. +Fixes https://bugs.gnu.org/78507 + +(cherry picked from commit 8c9602e3a145e9596dc1a63c6ed67865814b6633) +--- + src/sort.c | 12 ++++++++++-- + tests/local.mk | 1 + + tests/sort/sort-field-limit.sh | 35 ++++++++++++++++++++++++++++++++++ + 3 files changed, 46 insertions(+), 2 deletions(-) + create mode 100755 tests/sort/sort-field-limit.sh + +diff --git a/src/sort.c b/src/sort.c +index b10183b..7af1a25 100644 +--- a/src/sort.c ++++ b/src/sort.c +@@ -1644,7 +1644,11 @@ begfield (struct line const *line, struct keyfield const *key) + ++ptr; + + /* Advance PTR by SCHAR (if possible), but no further than LIM. */ +- ptr = MIN (lim, ptr + schar); ++ size_t remaining_bytes = lim - ptr; ++ if (schar < remaining_bytes) ++ ptr += schar; ++ else ++ ptr = lim; + + return ptr; + } +@@ -1746,7 +1750,11 @@ limfield (struct line const *line, struct keyfield const *key) + ++ptr; + + /* Advance PTR by ECHAR (if possible), but no further than LIM. */ +- ptr = MIN (lim, ptr + echar); ++ size_t remaining_bytes = lim - ptr; ++ if (echar < remaining_bytes) ++ ptr += echar; ++ else ++ ptr = lim; + } + + return ptr; +diff --git a/tests/local.mk b/tests/local.mk +index 4da6756..642d225 100644 +--- a/tests/local.mk ++++ b/tests/local.mk +@@ -388,6 +388,7 @@ all_tests = \ + tests/sort/sort-debug-keys.sh \ + tests/sort/sort-debug-warn.sh \ + tests/sort/sort-discrim.sh \ ++ tests/sort/sort-field-limit.sh \ + tests/sort/sort-files0-from.pl \ + tests/sort/sort-float.sh \ + tests/sort/sort-h-thousands-sep.sh \ +diff --git a/tests/sort/sort-field-limit.sh b/tests/sort/sort-field-limit.sh +new file mode 100755 +index 0000000..52d8e1d +--- /dev/null ++++ b/tests/sort/sort-field-limit.sh +@@ -0,0 +1,35 @@ ++#!/bin/sh ++# From 7.2-9.7, this would trigger an out of bounds mem read ++ ++# Copyright (C) 2025 Free Software Foundation, Inc. ++ ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation, either version 3 of the License, or ++# (at your option) any later version. ++ ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++ ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src ++print_ver_ sort ++getlimits_ ++ ++# This issue triggers with valgrind or ASAN ++valgrind --error-exitcode=1 sort --version 2>/dev/null && ++ VALGRIND='valgrind --error-exitcode=1' ++ ++{ printf '%s\n' aa bb; } > in || framework_failure_ ++ ++_POSIX2_VERSION=200809 $VALGRIND sort +0.${SIZE_MAX}R in > out || fail=1 ++compare in out || fail=1 ++ ++_POSIX2_VERSION=200809 $VALGRIND sort +1 -1.${SIZE_MAX}R in > out || fail=1 ++compare in out || fail=1 ++ ++Exit $fail +-- +2.49.0 + diff --git a/coreutils-df-direct.patch b/coreutils-df-direct.patch index 341ee2c..f5e3d73 100644 --- a/coreutils-df-direct.patch +++ b/coreutils-df-direct.patch @@ -1,4 +1,4 @@ -From 91be1a584108a6a3d96f64382bbf206c4213b3db Mon Sep 17 00:00:00 2001 +From d3117ae1bb422f771f1c19af54f81d5151f55065 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Mon, 29 Mar 2010 17:20:34 +0000 Subject: [PATCH] coreutils-df-direct.patch @@ -11,10 +11,10 @@ Subject: [PATCH] coreutils-df-direct.patch create mode 100755 tests/df/direct.sh diff --git a/doc/coreutils.texi b/doc/coreutils.texi -index b420606..0ccb368 100644 +index d1c282f..6d1ee11 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi -@@ -12597,6 +12597,13 @@ some systems (notably Solaris), doing this yields more up to date results, +@@ -12467,6 +12467,13 @@ some systems (notably Solaris), doing this yields more up to date results, but in general this option makes @command{df} much slower, especially when there are many or very busy file systems. @@ -29,10 +29,10 @@ index b420606..0ccb368 100644 @opindex --total @cindex grand total of file system size, usage and available space diff --git a/src/df.c b/src/df.c -index 75e638c..ef9f0a7 100644 +index a969c5c..c465a3f 100644 --- a/src/df.c +++ b/src/df.c -@@ -121,6 +121,9 @@ static bool print_type; +@@ -122,6 +122,9 @@ static bool print_type; /* If true, print a grand total at the end. */ static bool print_grand_total; @@ -42,7 +42,7 @@ index 75e638c..ef9f0a7 100644 /* Grand total data. */ static struct fs_usage grand_fsu; -@@ -248,13 +251,15 @@ enum +@@ -249,13 +252,15 @@ enum NO_SYNC_OPTION = CHAR_MAX + 1, SYNC_OPTION, TOTAL_OPTION, @@ -59,7 +59,7 @@ index 75e638c..ef9f0a7 100644 {"inodes", no_argument, nullptr, 'i'}, {"human-readable", no_argument, nullptr, 'h'}, {"si", no_argument, nullptr, 'H'}, -@@ -571,7 +576,10 @@ get_header (void) +@@ -572,7 +577,10 @@ get_header (void) for (idx_t col = 0; col < ncolumns; col++) { char *cell; @@ -71,7 +71,7 @@ index 75e638c..ef9f0a7 100644 if (columns[col]->field == SIZE_FIELD && (header_mode == DEFAULT_MODE -@@ -1446,6 +1454,17 @@ get_point (char const *point, const struct stat *statp) +@@ -1454,6 +1462,17 @@ get_point (char const *point, const struct stat *statp) static void get_entry (char const *name, struct stat const *statp) { @@ -89,7 +89,7 @@ index 75e638c..ef9f0a7 100644 if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode)) && get_device (name)) return; -@@ -1516,6 +1535,7 @@ or all file systems by default.\n\ +@@ -1524,6 +1543,7 @@ or all file systems by default.\n\ -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\ '-BM' prints sizes in units of 1,048,576 bytes;\n\ see SIZE format below\n\ @@ -97,7 +97,7 @@ index 75e638c..ef9f0a7 100644 -h, --human-readable print sizes in powers of 1024 (e.g., 1023M)\n\ -H, --si print sizes in powers of 1000 (e.g., 1.1G)\n\ "), stdout); -@@ -1610,6 +1630,9 @@ main (int argc, char **argv) +@@ -1618,6 +1638,9 @@ main (int argc, char **argv) xstrtol_fatal (e, oi, c, long_options, optarg); } break; @@ -107,7 +107,7 @@ index 75e638c..ef9f0a7 100644 case 'i': if (header_mode == OUTPUT_MODE) { -@@ -1706,6 +1729,13 @@ main (int argc, char **argv) +@@ -1714,6 +1737,13 @@ main (int argc, char **argv) } } @@ -183,5 +183,5 @@ index 0000000..8e4cfb8 + +Exit $fail -- -2.52.0 +2.49.0 diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch index 83579e9..2917075 100644 --- a/coreutils-i18n.patch +++ b/coreutils-i18n.patch @@ -1,4 +1,4 @@ -From a81b096084524e9aeef5e8b81fc829eb9efec581 Mon Sep 17 00:00:00 2001 +From c0db8de625ca1ae0e0f4784c4eb2c779eae0047f Mon Sep 17 00:00:00 2001 From: rpm-build Date: Wed, 30 Aug 2023 17:19:58 +0200 Subject: [PATCH] coreutils-i18n.patch @@ -7,31 +7,39 @@ Subject: [PATCH] coreutils-i18n.patch bootstrap.conf | 2 + configure.ac | 6 + lib/linebuffer.h | 8 + + lib/mbchar.c | 23 ++ + lib/mbchar.h | 383 +++++++++++++++++ lib/mbfile.c | 20 + lib/mbfile.h | 283 +++++++++++++ + m4/mbchar.m4 | 15 + m4/mbfile.m4 | 16 + src/cut.c | 508 +++++++++++++++++++++-- src/expand-common.c | 114 ++++++ src/expand-common.h | 12 + src/expand.c | 90 +++- + src/fold.c | 311 ++++++++++++-- src/local.mk | 4 +- src/pr.c | 443 ++++++++++++++++++-- - src/sort.c | 791 +++++++++++++++++++++++++++++++++--- + src/sort.c | 790 +++++++++++++++++++++++++++++++++--- src/unexpand.c | 101 ++++- tests/Coreutils.pm | 3 + tests/expand/mb.sh | 183 +++++++++ tests/i18n/sort.sh | 29 ++ tests/local.mk | 4 + tests/misc/expand.pl | 42 ++ + tests/misc/fold.pl | 50 ++- tests/misc/sort-mb-tests.sh | 45 ++ tests/misc/unexpand.pl | 39 ++ tests/pr/pr-tests.pl | 49 +++ tests/sort/sort-merge.pl | 42 ++ tests/sort/sort.pl | 40 +- tests/unexpand/mb.sh | 172 ++++++++ - 25 files changed, 2879 insertions(+), 167 deletions(-) + 30 files changed, 3632 insertions(+), 195 deletions(-) + create mode 100644 lib/mbchar.c + create mode 100644 lib/mbchar.h create mode 100644 lib/mbfile.c create mode 100644 lib/mbfile.h + create mode 100644 m4/mbchar.m4 create mode 100644 m4/mbfile.m4 create mode 100644 tests/expand/mb.sh create mode 100644 tests/i18n/sort.sh @@ -39,23 +47,23 @@ Subject: [PATCH] coreutils-i18n.patch create mode 100644 tests/unexpand/mb.sh diff --git a/bootstrap.conf b/bootstrap.conf -index ec68ac8..ec2fbbe 100644 +index 94c164e..cecbf26 100644 --- a/bootstrap.conf +++ b/bootstrap.conf -@@ -171,6 +171,8 @@ gnulib_modules=" +@@ -166,6 +166,8 @@ gnulib_modules=" + maintainer-makefile malloc-gnu manywarnings - mbbuf + mbchar + mbfile mbrlen mbrtoc32 mbrtowc diff --git a/configure.ac b/configure.ac -index 5e99ef3..ac07577 100644 +index 775c4cc..e6b5c9c 100644 --- a/configure.ac +++ b/configure.ac -@@ -465,6 +465,12 @@ fi +@@ -504,6 +504,12 @@ fi # I'm leaving it here for now. This whole thing needs to be modernized... gl_WINSIZE_IN_PTEM @@ -94,6 +102,424 @@ index ca56f80..509b7e6 100644 }; /* Initialize linebuffer LINEBUFFER for use. */ +diff --git a/lib/mbchar.c b/lib/mbchar.c +new file mode 100644 +index 0000000..713c2f7 +--- /dev/null ++++ b/lib/mbchar.c +@@ -0,0 +1,23 @@ ++/* Copyright (C) 2001, 2006, 2009-2025 Free Software Foundation, Inc. ++ ++ This file is free software: you can redistribute it and/or modify ++ it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ This file is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public License ++ along with this program. If not, see . */ ++ ++ ++#include ++ ++#define MBCHAR_INLINE _GL_EXTERN_INLINE ++ ++#include ++ ++#include "mbchar.h" +diff --git a/lib/mbchar.h b/lib/mbchar.h +new file mode 100644 +index 0000000..d77168e +--- /dev/null ++++ b/lib/mbchar.h +@@ -0,0 +1,383 @@ ++/* Multibyte character data type. ++ Copyright (C) 2001, 2005-2007, 2009-2025 Free Software Foundation, Inc. ++ ++ This file is free software: you can redistribute it and/or modify ++ it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ This file is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public License ++ along with this program. If not, see . */ ++ ++/* Written by Bruno Haible . */ ++ ++/* A multibyte character is a short subsequence of a char* string, ++ representing a single 32-bit wide character. ++ ++ We use multibyte characters instead of 32-bit wide characters because ++ of the following goals: ++ 1) correct multibyte handling, i.e. operate according to the LC_CTYPE ++ locale, ++ 2) ease of maintenance, i.e. the maintainer needs not know all details ++ of the ISO C 99 standard, ++ 3) don't fail grossly if the input is not in the encoding set by the ++ locale, because often different encodings are in use in the same ++ countries (ISO-8859-1/UTF-8, EUC-JP/Shift_JIS, ...), ++ 4) fast in the case of ASCII characters. ++ ++ Multibyte characters are only accessed through the mb* macros. ++ ++ mb_ptr (mbc) ++ return a pointer to the beginning of the multibyte sequence. ++ ++ mb_len (mbc) ++ returns the number of bytes occupied by the multibyte sequence. ++ Always > 0. ++ ++ mb_iseq (mbc, sc) ++ returns true if mbc is the standard ASCII character sc. ++ ++ mb_isnul (mbc) ++ returns true if mbc is the nul character. ++ ++ mb_cmp (mbc1, mbc2) ++ returns a positive, zero, or negative value depending on whether mbc1 ++ sorts after, same or before mbc2. ++ ++ mb_casecmp (mbc1, mbc2) ++ returns a positive, zero, or negative value depending on whether mbc1 ++ sorts after, same or before mbc2, modulo upper/lowercase conversion. ++ ++ mb_equal (mbc1, mbc2) ++ returns true if mbc1 and mbc2 are equal. ++ ++ mb_caseequal (mbc1, mbc2) ++ returns true if mbc1 and mbc2 are equal modulo upper/lowercase conversion. ++ ++ mb_isalnum (mbc) ++ returns true if mbc is alphanumeric. ++ ++ mb_isalpha (mbc) ++ returns true if mbc is alphabetic. ++ ++ mb_isascii(mbc) ++ returns true if mbc is plain ASCII. ++ ++ mb_isblank (mbc) ++ returns true if mbc is a blank. ++ ++ mb_iscntrl (mbc) ++ returns true if mbc is a control character. ++ ++ mb_isdigit (mbc) ++ returns true if mbc is a decimal digit. ++ ++ mb_isgraph (mbc) ++ returns true if mbc is a graphic character. ++ ++ mb_islower (mbc) ++ returns true if mbc is lowercase. ++ ++ mb_isprint (mbc) ++ returns true if mbc is a printable character. ++ ++ mb_ispunct (mbc) ++ returns true if mbc is a punctuation character. ++ ++ mb_isspace (mbc) ++ returns true if mbc is a space character. ++ ++ mb_isupper (mbc) ++ returns true if mbc is uppercase. ++ ++ mb_isxdigit (mbc) ++ returns true if mbc is a hexadecimal digit. ++ ++ mb_width (mbc) ++ returns the number of columns on the output device occupied by mbc. ++ Always >= 0. ++ ++ mb_putc (mbc, stream) ++ outputs mbc on stream, a byte oriented FILE stream opened for output. ++ ++ mb_setascii (&mbc, sc) ++ assigns the standard ASCII character sc to mbc. ++ (Only available if the 'mbfile' module is in use.) ++ ++ mb_copy (&destmbc, &srcmbc) ++ copies srcmbc to destmbc. ++ ++ Here are the function prototypes of the macros. ++ ++ extern const char * mb_ptr (const mbchar_t mbc); ++ extern size_t mb_len (const mbchar_t mbc); ++ extern bool mb_iseq (const mbchar_t mbc, char sc); ++ extern bool mb_isnul (const mbchar_t mbc); ++ extern int mb_cmp (const mbchar_t mbc1, const mbchar_t mbc2); ++ extern int mb_casecmp (const mbchar_t mbc1, const mbchar_t mbc2); ++ extern bool mb_equal (const mbchar_t mbc1, const mbchar_t mbc2); ++ extern bool mb_caseequal (const mbchar_t mbc1, const mbchar_t mbc2); ++ extern bool mb_isalnum (const mbchar_t mbc); ++ extern bool mb_isalpha (const mbchar_t mbc); ++ extern bool mb_isascii (const mbchar_t mbc); ++ extern bool mb_isblank (const mbchar_t mbc); ++ extern bool mb_iscntrl (const mbchar_t mbc); ++ extern bool mb_isdigit (const mbchar_t mbc); ++ extern bool mb_isgraph (const mbchar_t mbc); ++ extern bool mb_islower (const mbchar_t mbc); ++ extern bool mb_isprint (const mbchar_t mbc); ++ extern bool mb_ispunct (const mbchar_t mbc); ++ extern bool mb_isspace (const mbchar_t mbc); ++ extern bool mb_isupper (const mbchar_t mbc); ++ extern bool mb_isxdigit (const mbchar_t mbc); ++ extern int mb_width (const mbchar_t mbc); ++ extern void mb_putc (const mbchar_t mbc, FILE *stream); ++ extern void mb_setascii (mbchar_t *new, char sc); ++ extern void mb_copy (mbchar_t *new, const mbchar_t *old); ++ */ ++ ++#ifndef _MBCHAR_H ++#define _MBCHAR_H 1 ++ ++/* This file uses _GL_INLINE_HEADER_BEGIN, _GL_INLINE. */ ++#if !_GL_CONFIG_H_INCLUDED ++ #error "Please include config.h first." ++#endif ++ ++#include ++#include ++ ++_GL_INLINE_HEADER_BEGIN ++#ifndef MBCHAR_INLINE ++# define MBCHAR_INLINE _GL_INLINE ++#endif ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++ ++/* The longest multibyte characters, nowadays, are 4 bytes long. ++ Regardless of the values of MB_CUR_MAX and MB_LEN_MAX. */ ++#define MBCHAR_BUF_SIZE 4 ++ ++struct mbchar ++{ ++ const char *ptr; /* pointer to current character */ ++ size_t bytes; /* number of bytes of current character, > 0 */ ++ bool wc_valid; /* true if wc is a valid 32-bit wide character */ ++ char32_t wc; /* if wc_valid: the current character */ ++#if defined GNULIB_MBFILE ++ char buf[MBCHAR_BUF_SIZE]; /* room for the bytes, used for file input only */ ++#endif ++}; ++ ++/* EOF (not a real character) is represented with bytes = 0 and ++ wc_valid = false. */ ++ ++typedef struct mbchar mbchar_t; ++ ++/* Access the current character. */ ++#define mb_ptr(mbc) ((mbc).ptr) ++#define mb_len(mbc) ((mbc).bytes) ++ ++/* Comparison of characters. */ ++#define mb_iseq(mbc, sc) ((mbc).wc_valid && (mbc).wc == (sc)) ++#define mb_isnul(mbc) ((mbc).wc_valid && (mbc).wc == 0) ++#define mb_cmp(mbc1, mbc2) \ ++ ((mbc1).wc_valid \ ++ ? ((mbc2).wc_valid \ ++ ? _GL_CMP ((mbc1).wc, (mbc2).wc) \ ++ : -1) \ ++ : ((mbc2).wc_valid \ ++ ? 1 \ ++ : (mbc1).bytes == (mbc2).bytes \ ++ ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) \ ++ : (mbc1).bytes < (mbc2).bytes \ ++ ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \ ++ : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1))) ++#define mb_casecmp(mbc1, mbc2) \ ++ ((mbc1).wc_valid \ ++ ? ((mbc2).wc_valid \ ++ ? _GL_CMP (c32tolower ((mbc1).wc), c32tolower ((mbc2).wc)) \ ++ : -1) \ ++ : ((mbc2).wc_valid \ ++ ? 1 \ ++ : (mbc1).bytes == (mbc2).bytes \ ++ ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) \ ++ : (mbc1).bytes < (mbc2).bytes \ ++ ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \ ++ : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1))) ++#define mb_equal(mbc1, mbc2) \ ++ ((mbc1).wc_valid && (mbc2).wc_valid \ ++ ? (mbc1).wc == (mbc2).wc \ ++ : (mbc1).bytes == (mbc2).bytes \ ++ && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0) ++#define mb_caseequal(mbc1, mbc2) \ ++ ((mbc1).wc_valid && (mbc2).wc_valid \ ++ ? c32tolower ((mbc1).wc) == c32tolower ((mbc2).wc) \ ++ : (mbc1).bytes == (mbc2).bytes \ ++ && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0) ++ ++/* , classification. */ ++#define mb_isascii(mbc) \ ++ ((mbc).wc_valid && (mbc).wc >= 0 && (mbc).wc <= 127) ++#define mb_isalnum(mbc) ((mbc).wc_valid && c32isalnum ((mbc).wc)) ++#define mb_isalpha(mbc) ((mbc).wc_valid && c32isalpha ((mbc).wc)) ++#define mb_isblank(mbc) ((mbc).wc_valid && c32isblank ((mbc).wc)) ++#define mb_iscntrl(mbc) ((mbc).wc_valid && c32iscntrl ((mbc).wc)) ++#define mb_isdigit(mbc) ((mbc).wc_valid && c32isdigit ((mbc).wc)) ++#define mb_isgraph(mbc) ((mbc).wc_valid && c32isgraph ((mbc).wc)) ++#define mb_islower(mbc) ((mbc).wc_valid && c32islower ((mbc).wc)) ++#define mb_isprint(mbc) ((mbc).wc_valid && c32isprint ((mbc).wc)) ++#define mb_ispunct(mbc) ((mbc).wc_valid && c32ispunct ((mbc).wc)) ++#define mb_isspace(mbc) ((mbc).wc_valid && c32isspace ((mbc).wc)) ++#define mb_isupper(mbc) ((mbc).wc_valid && c32isupper ((mbc).wc)) ++#define mb_isxdigit(mbc) ((mbc).wc_valid && c32isxdigit ((mbc).wc)) ++ ++/* Extra function. */ ++ ++/* Unprintable characters appear as a small box of width 1. */ ++#define MB_UNPRINTABLE_WIDTH 1 ++ ++MBCHAR_INLINE int ++mb_width_aux (char32_t wc) ++{ ++ int w = c32width (wc); ++ /* For unprintable characters, arbitrarily return 0 for control characters ++ and MB_UNPRINTABLE_WIDTH otherwise. */ ++ return (w >= 0 ? w : c32iscntrl (wc) ? 0 : MB_UNPRINTABLE_WIDTH); ++} ++ ++#define mb_width(mbc) \ ++ ((mbc).wc_valid ? mb_width_aux ((mbc).wc) : MB_UNPRINTABLE_WIDTH) ++ ++/* Output. */ ++#define mb_putc(mbc, stream) fwrite ((mbc).ptr, 1, (mbc).bytes, (stream)) ++ ++#if defined GNULIB_MBFILE ++/* Assignment. */ ++# define mb_setascii(mbc, sc) \ ++ ((mbc)->ptr = (mbc)->buf, (mbc)->bytes = 1, (mbc)->wc_valid = 1, \ ++ (mbc)->wc = (mbc)->buf[0] = (sc)) ++#endif ++ ++/* Copying a character. */ ++MBCHAR_INLINE void ++mb_copy (mbchar_t *new_mbc, const mbchar_t *old_mbc) ++{ ++#if defined GNULIB_MBFILE ++ if (old_mbc->ptr == &old_mbc->buf[0]) ++ { ++ memcpy (&new_mbc->buf[0], &old_mbc->buf[0], old_mbc->bytes); ++ new_mbc->ptr = &new_mbc->buf[0]; ++ } ++ else ++#endif ++ new_mbc->ptr = old_mbc->ptr; ++ new_mbc->bytes = old_mbc->bytes; ++ if ((new_mbc->wc_valid = old_mbc->wc_valid)) ++ new_mbc->wc = old_mbc->wc; ++} ++ ++ ++/* is_basic(c) tests whether the single-byte character c is ++ - in the ISO C "basic character set" or is one of '@', '$', and '`' ++ which ISO C 23 § 5.2.1.1.(1) guarantees to be single-byte and in ++ practice are safe to treat as basic in the execution character set, ++ or ++ - in the POSIX "portable character set", which ++ ++ equally guarantees to be single-byte. ++ This is a convenience function, and is in this file only to share code ++ between mbiter.h, mbuiter.h, and mbfile.h. */ ++#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ ++ && ('$' == 36) && ('%' == 37) && ('&' == 38) && ('\'' == 39) \ ++ && ('(' == 40) && (')' == 41) && ('*' == 42) && ('+' == 43) \ ++ && (',' == 44) && ('-' == 45) && ('.' == 46) && ('/' == 47) \ ++ && ('0' == 48) && ('1' == 49) && ('2' == 50) && ('3' == 51) \ ++ && ('4' == 52) && ('5' == 53) && ('6' == 54) && ('7' == 55) \ ++ && ('8' == 56) && ('9' == 57) && (':' == 58) && (';' == 59) \ ++ && ('<' == 60) && ('=' == 61) && ('>' == 62) && ('?' == 63) \ ++ && ('@' == 64) && ('A' == 65) && ('B' == 66) && ('C' == 67) \ ++ && ('D' == 68) && ('E' == 69) && ('F' == 70) && ('G' == 71) \ ++ && ('H' == 72) && ('I' == 73) && ('J' == 74) && ('K' == 75) \ ++ && ('L' == 76) && ('M' == 77) && ('N' == 78) && ('O' == 79) \ ++ && ('P' == 80) && ('Q' == 81) && ('R' == 82) && ('S' == 83) \ ++ && ('T' == 84) && ('U' == 85) && ('V' == 86) && ('W' == 87) \ ++ && ('X' == 88) && ('Y' == 89) && ('Z' == 90) && ('[' == 91) \ ++ && ('\\' == 92) && (']' == 93) && ('^' == 94) && ('_' == 95) \ ++ && ('`' == 96) && ('a' == 97) && ('b' == 98) && ('c' == 99) \ ++ && ('d' == 100) && ('e' == 101) && ('f' == 102) && ('g' == 103) \ ++ && ('h' == 104) && ('i' == 105) && ('j' == 106) && ('k' == 107) \ ++ && ('l' == 108) && ('m' == 109) && ('n' == 110) && ('o' == 111) \ ++ && ('p' == 112) && ('q' == 113) && ('r' == 114) && ('s' == 115) \ ++ && ('t' == 116) && ('u' == 117) && ('v' == 118) && ('w' == 119) \ ++ && ('x' == 120) && ('y' == 121) && ('z' == 122) && ('{' == 123) \ ++ && ('|' == 124) && ('}' == 125) && ('~' == 126) ++/* The character set is ISO-646, not EBCDIC. */ ++# define IS_BASIC_ASCII 1 ++ ++/* All locale encodings (see localcharset.h) map the characters 0x00..0x7F ++ to U+0000..U+007F, like ASCII, except for ++ CP864 different mapping of '%' ++ SHIFT_JIS different mappings of 0x5C, 0x7E ++ JOHAB different mapping of 0x5C ++ However, these characters in the range 0x20..0x7E are in the ISO C ++ "basic character set" and in the POSIX "portable character set", which ++ ISO C and POSIX guarantee to be single-byte. Thus, locales with these ++ encodings are not POSIX compliant. And they are most likely not in use ++ any more (as of 2023). */ ++# define is_basic(c) ((unsigned char) (c) < 0x80) ++ ++#else ++ ++MBCHAR_INLINE bool ++is_basic (char c) ++{ ++ switch (c) ++ { ++ case '\0': ++ case '\007': case '\010': ++ case '\t': case '\n': case '\v': case '\f': case '\r': ++ case ' ': case '!': case '"': case '#': case '$': case '%': ++ case '&': case '\'': case '(': case ')': case '*': ++ case '+': case ',': case '-': case '.': case '/': ++ case '0': case '1': case '2': case '3': case '4': ++ case '5': case '6': case '7': case '8': case '9': ++ case ':': case ';': case '<': case '=': case '>': ++ case '?': case '@': ++ case 'A': case 'B': case 'C': case 'D': case 'E': ++ case 'F': case 'G': case 'H': case 'I': case 'J': ++ case 'K': case 'L': case 'M': case 'N': case 'O': ++ case 'P': case 'Q': case 'R': case 'S': case 'T': ++ case 'U': case 'V': case 'W': case 'X': case 'Y': ++ case 'Z': ++ case '[': case '\\': case ']': case '^': case '_': case '`': ++ case 'a': case 'b': case 'c': case 'd': case 'e': ++ case 'f': case 'g': case 'h': case 'i': case 'j': ++ case 'k': case 'l': case 'm': case 'n': case 'o': ++ case 'p': case 'q': case 'r': case 's': case 't': ++ case 'u': case 'v': case 'w': case 'x': case 'y': ++ case 'z': case '{': case '|': case '}': case '~': ++ return 1; ++ default: ++ return 0; ++ } ++} ++ ++#endif ++ ++ ++#ifdef __cplusplus ++} ++#endif ++ ++_GL_INLINE_HEADER_END ++ ++#endif /* _MBCHAR_H */ diff --git a/lib/mbfile.c b/lib/mbfile.c new file mode 100644 index 0000000..f4e3e77 @@ -409,6 +835,27 @@ index 0000000..c852f31 +_GL_INLINE_HEADER_END + +#endif /* _MBFILE_H */ +diff --git a/m4/mbchar.m4 b/m4/mbchar.m4 +new file mode 100644 +index 0000000..b76f1d7 +--- /dev/null ++++ b/m4/mbchar.m4 +@@ -0,0 +1,15 @@ ++# mbchar.m4 ++# serial 9 ++dnl Copyright (C) 2005-2007, 2009-2025 Free Software Foundation, Inc. ++dnl This file is free software; the Free Software Foundation ++dnl gives unlimited permission to copy and/or distribute it, ++dnl with or without modifications, as long as this notice is preserved. ++dnl This file is offered as-is, without any warranty. ++ ++dnl autoconf tests required for use of mbchar.m4 ++dnl From Bruno Haible. ++ ++AC_DEFUN([gl_MBCHAR], ++[ ++ AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS]) ++]) diff --git a/m4/mbfile.m4 b/m4/mbfile.m4 new file mode 100644 index 0000000..1d126e0 @@ -432,7 +879,7 @@ index 0000000..1d126e0 + : +]) diff --git a/src/cut.c b/src/cut.c -index f0effb9..36479d6 100644 +index b424997..c9f181c 100644 --- a/src/cut.c +++ b/src/cut.c @@ -27,6 +27,11 @@ @@ -1092,7 +1539,7 @@ index f0effb9..36479d6 100644 if (have_read_stdin && fclose (stdin) == EOF) diff --git a/src/expand-common.c b/src/expand-common.c -index 14dd804..0d8eaaa 100644 +index 732123f..fdbef3f 100644 --- a/src/expand-common.c +++ b/src/expand-common.c @@ -19,6 +19,7 @@ @@ -1103,7 +1550,7 @@ index 14dd804..0d8eaaa 100644 #include "system.h" #include "c-ctype.h" #include "fadvise.h" -@@ -132,6 +133,119 @@ set_increment_size (colno tabval) +@@ -123,6 +124,119 @@ set_increment_size (colno tabval) return ok; } @@ -1224,7 +1671,7 @@ index 14dd804..0d8eaaa 100644 to the list of tab stops. */ extern void diff --git a/src/expand-common.h b/src/expand-common.h -index 46ef4e3..e19469b 100644 +index fe6c8ed..80a1280 100644 --- a/src/expand-common.h +++ b/src/expand-common.h @@ -29,6 +29,18 @@ extern idx_t max_column_width; @@ -1401,11 +1848,414 @@ index 5ec7ce9..65ac315 100644 } } +diff --git a/src/fold.c b/src/fold.c +index b64aad4..a156337 100644 +--- a/src/fold.c ++++ b/src/fold.c +@@ -23,10 +23,32 @@ + #include + #include + ++/* Get mbstate_t, mbrtowc(), wcwidth(). */ ++#if HAVE_WCHAR_H ++# include ++#endif ++ ++/* Get iswprint(), iswblank(), wcwidth(). */ ++#if HAVE_WCTYPE_H ++# include ++#endif ++ + #include "system.h" + #include "fadvise.h" + #include "xdectoint.h" + ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC ++ installation; work around this configuration error. */ ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 ++# undef MB_LEN_MAX ++# define MB_LEN_MAX 16 ++#endif ++ ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ ++#if HAVE_MBRTOWC && defined mbstate_t ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) ++#endif ++ + #define TAB_WIDTH 8 + + /* The official name of this program (e.g., no 'g' prefix). */ +@@ -34,20 +56,41 @@ + + #define AUTHORS proper_name ("David MacKenzie") + ++#define FATAL_ERROR(Message) \ ++ do \ ++ { \ ++ error (0, 0, (Message)); \ ++ usage (2); \ ++ } \ ++ while (0) ++ ++enum operating_mode ++{ ++ /* Fold texts by columns that are at the given positions. */ ++ column_mode, ++ ++ /* Fold texts by bytes that are at the given positions. */ ++ byte_mode, ++ ++ /* Fold texts by characters that are at the given positions. */ ++ character_mode, ++}; ++ ++/* The argument shows current mode. (Default: column_mode) */ ++static enum operating_mode operating_mode; ++ + /* If nonzero, try to break on whitespace. */ + static bool break_spaces; + +-/* If nonzero, count bytes, not column positions. */ +-static bool count_bytes; +- + /* If nonzero, at least one of the files we read was standard input. */ + static bool have_read_stdin; + +-static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::"; ++static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; + + static struct option const longopts[] = + { + {"bytes", no_argument, nullptr, 'b'}, ++ {"characters", no_argument, nullptr, 'c'}, + {"spaces", no_argument, nullptr, 's'}, + {"width", required_argument, nullptr, 'w'}, + {GETOPT_HELP_OPTION_DECL}, +@@ -75,6 +118,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ + + fputs (_("\ + -b, --bytes count bytes rather than columns\n\ ++ -c, --characters count characters rather than columns\n\ + -s, --spaces break at spaces\n\ + -w, --width=WIDTH use WIDTH columns instead of 80\n\ + "), stdout); +@@ -92,7 +136,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ + static size_t + adjust_column (size_t column, char c) + { +- if (!count_bytes) ++ if (operating_mode != byte_mode) + { + if (c == '\b') + { +@@ -115,30 +159,14 @@ adjust_column (size_t column, char c) + to stdout, with maximum line length WIDTH. + Return true if successful. */ + +-static bool +-fold_file (char const *filename, size_t width) ++static void ++fold_text (FILE *istream, size_t width, int *saved_errno) + { +- FILE *istream; + int c; + size_t column = 0; /* Screen column where next char will go. */ + idx_t offset_out = 0; /* Index in 'line_out' for next char. */ + static char *line_out = nullptr; + static idx_t allocated_out = 0; +- int saved_errno; +- +- if (STREQ (filename, "-")) +- { +- istream = stdin; +- have_read_stdin = true; +- } +- else +- istream = fopen (filename, "r"); +- +- if (istream == nullptr) +- { +- error (0, errno, "%s", quotef (filename)); +- return false; +- } + + fadvise (istream, FADVISE_SEQUENTIAL); + +@@ -168,6 +196,15 @@ fold_file (char const *filename, size_t width) + bool found_blank = false; + idx_t logical_end = offset_out; + ++ /* If LINE_OUT has no wide character, ++ put a new wide character in LINE_OUT ++ if column is bigger than width. */ ++ if (offset_out == 0) ++ { ++ line_out[offset_out++] = c; ++ continue; ++ } ++ + /* Look for the last blank. */ + while (logical_end) + { +@@ -212,13 +249,224 @@ fold_file (char const *filename, size_t width) + line_out[offset_out++] = c; + } + +- saved_errno = errno; ++ *saved_errno = errno; + if (!ferror (istream)) +- saved_errno = 0; ++ *saved_errno = 0; + + if (offset_out) + fwrite (line_out, sizeof (char), offset_out, stdout); + ++} ++ ++#if HAVE_MBRTOWC ++static void ++fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) ++{ ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ ++ size_t buflen = 0; /* The length of the byte sequence in buf. */ ++ char *bufpos = buf; /* Next read position of BUF. */ ++ wint_t wc; /* A gotten wide character. */ ++ size_t mblength; /* The byte size of a multibyte character which shows ++ as same character as WC. */ ++ mbstate_t state, state_bak; /* State of the stream. */ ++ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */ ++ ++ static char *line_out = NULL; ++ idx_t offset_out = 0; /* Index in `line_out' for next char. */ ++ static idx_t allocated_out = 0; ++ ++ int increment; ++ size_t column = 0; ++ ++ size_t last_blank_pos; ++ size_t last_blank_column; ++ int is_blank_seen; ++ int last_blank_increment = 0; ++ int is_bs_following_last_blank; ++ size_t bs_following_last_blank_num; ++ int is_cr_after_last_blank; ++ ++#define CLEAR_FLAGS \ ++ do \ ++ { \ ++ last_blank_pos = 0; \ ++ last_blank_column = 0; \ ++ is_blank_seen = 0; \ ++ is_bs_following_last_blank = 0; \ ++ bs_following_last_blank_num = 0; \ ++ is_cr_after_last_blank = 0; \ ++ } \ ++ while (0) ++ ++#define START_NEW_LINE \ ++ do \ ++ { \ ++ putchar ('\n'); \ ++ column = 0; \ ++ offset_out = 0; \ ++ CLEAR_FLAGS; \ ++ } \ ++ while (0) ++ ++ CLEAR_FLAGS; ++ memset (&state, '\0', sizeof(mbstate_t)); ++ ++ for (;; bufpos += mblength, buflen -= mblength) ++ { ++ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) ++ { ++ memmove (buf, bufpos, buflen); ++ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); ++ bufpos = buf; ++ } ++ ++ if (buflen < 1) ++ break; ++ ++ /* Get a wide character. */ ++ state_bak = state; ++ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); ++ ++ switch (mblength) ++ { ++ case (size_t)-1: ++ case (size_t)-2: ++ convfail++; ++ state = state_bak; ++ /* Fall through. */ ++ ++ case 0: ++ mblength = 1; ++ break; ++ } ++ ++rescan: ++ if (convfail) ++ increment = 1; ++ else if (wc == L'\n') ++ { ++ /* preserve newline */ ++ fwrite (line_out, sizeof(char), offset_out, stdout); ++ START_NEW_LINE; ++ continue; ++ } ++ else if (operating_mode == byte_mode) /* byte mode */ ++ increment = mblength; ++ else if (operating_mode == character_mode) /* character mode */ ++ increment = 1; ++ else /* column mode */ ++ { ++ switch (wc) ++ { ++ case L'\b': ++ increment = (column > 0) ? -1 : 0; ++ break; ++ ++ case L'\r': ++ increment = -1 * column; ++ break; ++ ++ case L'\t': ++ increment = 8 - column % 8; ++ break; ++ ++ default: ++ increment = wcwidth (wc); ++ increment = (increment < 0) ? 0 : increment; ++ } ++ } ++ ++ if (column + increment > width && break_spaces && last_blank_pos) ++ { ++ fwrite (line_out, sizeof(char), last_blank_pos, stdout); ++ putchar ('\n'); ++ ++ offset_out = offset_out - last_blank_pos; ++ column = column - last_blank_column + ((is_cr_after_last_blank) ++ ? last_blank_increment : bs_following_last_blank_num); ++ memmove (line_out, line_out + last_blank_pos, offset_out); ++ CLEAR_FLAGS; ++ goto rescan; ++ } ++ ++ if (column + increment > width && column != 0) ++ { ++ fwrite (line_out, sizeof(char), offset_out, stdout); ++ START_NEW_LINE; ++ goto rescan; ++ } ++ ++ if (allocated_out - offset_out <= mblength) ++ { ++ line_out = xpalloc (line_out, &allocated_out, 1, -1, sizeof *line_out); ++ } ++ ++ memcpy (line_out + offset_out, bufpos, mblength); ++ offset_out += mblength; ++ column += increment; ++ ++ if (is_blank_seen && !convfail && wc == L'\r') ++ is_cr_after_last_blank = 1; ++ ++ if (is_bs_following_last_blank && !convfail && wc == L'\b') ++ ++bs_following_last_blank_num; ++ else ++ is_bs_following_last_blank = 0; ++ ++ if (break_spaces && !convfail && iswblank (wc)) ++ { ++ last_blank_pos = offset_out; ++ last_blank_column = column; ++ is_blank_seen = 1; ++ last_blank_increment = increment; ++ is_bs_following_last_blank = 1; ++ bs_following_last_blank_num = 0; ++ is_cr_after_last_blank = 0; ++ } ++ } ++ ++ *saved_errno = errno; ++ if (!ferror (istream)) ++ *saved_errno = 0; ++ ++ if (offset_out) ++ fwrite (line_out, sizeof (char), offset_out, stdout); ++ ++} ++#endif ++ ++/* Fold file FILENAME, or standard input if FILENAME is "-", ++ to stdout, with maximum line length WIDTH. ++ Return true if successful. */ ++ ++static bool ++fold_file (char const *filename, size_t width) ++{ ++ FILE *istream; ++ int saved_errno; ++ ++ if (STREQ (filename, "-")) ++ { ++ istream = stdin; ++ have_read_stdin = true; ++ } ++ else ++ istream = fopen (filename, "r"); ++ ++ if (istream == nullptr) ++ { ++ error (0, errno, "%s", filename); ++ return false; ++ } ++ ++ /* Define how ISTREAM is being folded. */ ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ fold_multibyte_text (istream, width, &saved_errno); ++ else ++#endif ++ fold_text (istream, width, &saved_errno); ++ + if (STREQ (filename, "-")) + clearerr (istream); + else if (fclose (istream) != 0 && !saved_errno) +@@ -249,7 +497,8 @@ main (int argc, char **argv) + + atexit (close_stdout); + +- break_spaces = count_bytes = have_read_stdin = false; ++ operating_mode = column_mode; ++ break_spaces = have_read_stdin = false; + + while ((optc = getopt_long (argc, argv, shortopts, longopts, nullptr)) != -1) + { +@@ -258,7 +507,15 @@ main (int argc, char **argv) + switch (optc) + { + case 'b': /* Count bytes rather than columns. */ +- count_bytes = true; ++ if (operating_mode != column_mode) ++ FATAL_ERROR (_("only one way of folding may be specified")); ++ operating_mode = byte_mode; ++ break; ++ ++ case 'c': ++ if (operating_mode != column_mode) ++ FATAL_ERROR (_("only one way of folding may be specified")); ++ operating_mode = character_mode; + break; + + case 's': /* Break at word boundaries. */ diff --git a/src/local.mk b/src/local.mk -index a8ad6b4..b0e61ec 100644 +index 188dda1..7db5753 100644 --- a/src/local.mk +++ b/src/local.mk -@@ -490,8 +490,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) +@@ -478,8 +478,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) src_basenc_SOURCES = src/basenc.c src_basenc_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS) @@ -1415,9 +2265,9 @@ index a8ad6b4..b0e61ec 100644 +src_unexpand_SOURCES = src/unexpand.c src/expand-common.c lib/mbfile.c lib/mbchar.c src_wc_SOURCES = src/wc.c - if USE_AVX512_WC_LINECOUNT + if USE_AVX2_WC_LINECOUNT diff --git a/src/pr.c b/src/pr.c -index 10b8c52..079c86c 100644 +index e7081a0..19e0268 100644 --- a/src/pr.c +++ b/src/pr.c @@ -312,6 +312,24 @@ @@ -1764,7 +2614,7 @@ index 10b8c52..079c86c 100644 h_next = h + chars_per_column; } } -@@ -1751,9 +1875,9 @@ static void +@@ -1748,9 +1872,9 @@ static void align_column (COLUMN *p) { padding_not_printed = p->start_position; @@ -1776,7 +2626,7 @@ index 10b8c52..079c86c 100644 padding_not_printed = ANYWHERE; } -@@ -2030,13 +2154,13 @@ store_char (char c) +@@ -2024,13 +2148,13 @@ store_char (char c) /* May be too generous. */ buff = xpalloc (buff, &buff_allocated, 1, -1, sizeof *buff); } @@ -1792,7 +2642,7 @@ index 10b8c52..079c86c 100644 char *s; int num_width; -@@ -2053,22 +2177,24 @@ add_line_number (COLUMN *p) +@@ -2047,22 +2171,24 @@ add_line_number (COLUMN *p) /* Tabification is assumed for multiple columns, also for n-separators, but 'default n-separator = TAB' hasn't been given priority over equal column_width also specified by POSIX. */ @@ -1821,7 +2671,7 @@ index 10b8c52..079c86c 100644 output_position = POS_AFTER_TAB (chars_per_output_tab, output_position); } -@@ -2227,7 +2353,7 @@ print_white_space (void) +@@ -2221,7 +2347,7 @@ print_white_space (void) while (goal - h_old > 1 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) { @@ -1830,7 +2680,7 @@ index 10b8c52..079c86c 100644 h_old = h_new; } while (++h_old <= goal) -@@ -2247,6 +2373,7 @@ print_sep_string (void) +@@ -2241,6 +2367,7 @@ print_sep_string (void) { char const *s = col_sep_string; int l = col_sep_length; @@ -1838,7 +2688,7 @@ index 10b8c52..079c86c 100644 if (separators_not_printed <= 0) { -@@ -2258,6 +2385,7 @@ print_sep_string (void) +@@ -2252,6 +2379,7 @@ print_sep_string (void) { for (; separators_not_printed > 0; --separators_not_printed) { @@ -1846,7 +2696,7 @@ index 10b8c52..079c86c 100644 while (l-- > 0) { /* 3 types of sep_strings: spaces only, spaces and chars, -@@ -2271,12 +2399,15 @@ print_sep_string (void) +@@ -2265,12 +2393,15 @@ print_sep_string (void) } else { @@ -1863,7 +2713,7 @@ index 10b8c52..079c86c 100644 /* sep_string ends with some spaces */ if (spaces_not_printed > 0) print_white_space (); -@@ -2307,7 +2438,7 @@ print_clump (COLUMN *p, int n, char *clump) +@@ -2298,7 +2429,7 @@ print_clump (COLUMN *p, int n, char *clump) required number of tabs and spaces. */ static void @@ -1872,7 +2722,7 @@ index 10b8c52..079c86c 100644 { if (tabify_output) { -@@ -2331,6 +2462,74 @@ print_char (char c) +@@ -2322,6 +2453,74 @@ print_char (char c) putchar (c); } @@ -1947,7 +2797,7 @@ index 10b8c52..079c86c 100644 /* Skip to page PAGE before printing. PAGE may be larger than total number of pages. */ -@@ -2507,9 +2706,9 @@ read_line (COLUMN *p) +@@ -2498,9 +2697,9 @@ read_line (COLUMN *p) align_empty_cols = false; } @@ -1959,7 +2809,7 @@ index 10b8c52..079c86c 100644 padding_not_printed = ANYWHERE; } -@@ -2578,7 +2777,7 @@ print_stored (COLUMN *p) +@@ -2569,7 +2768,7 @@ print_stored (COLUMN *p) COLUMN *q; int line = p->current_line++; @@ -1968,7 +2818,7 @@ index 10b8c52..079c86c 100644 /* FIXME UMR: Uninitialized memory read: * This is occurring while in: -@@ -2590,7 +2789,7 @@ print_stored (COLUMN *p) +@@ -2581,7 +2780,7 @@ print_stored (COLUMN *p) xmalloc [xmalloc.c:94] init_store_cols [pr.c:1648] */ @@ -1977,7 +2827,7 @@ index 10b8c52..079c86c 100644 pad_vertically = true; -@@ -2610,9 +2809,9 @@ print_stored (COLUMN *p) +@@ -2601,9 +2800,9 @@ print_stored (COLUMN *p) } } @@ -1989,7 +2839,7 @@ index 10b8c52..079c86c 100644 padding_not_printed = ANYWHERE; } -@@ -2625,8 +2824,8 @@ print_stored (COLUMN *p) +@@ -2616,8 +2815,8 @@ print_stored (COLUMN *p) if (spaces_not_printed == 0) { output_position = p->start_position + end_vector[line]; @@ -2000,7 +2850,7 @@ index 10b8c52..079c86c 100644 } return true; -@@ -2645,7 +2844,7 @@ print_stored (COLUMN *p) +@@ -2636,7 +2835,7 @@ print_stored (COLUMN *p) number of characters is 1.) */ static int @@ -2009,7 +2859,7 @@ index 10b8c52..079c86c 100644 { unsigned char uc = c; char *s = clump_buff; -@@ -2655,10 +2854,10 @@ char_to_clump (char c) +@@ -2646,10 +2845,10 @@ char_to_clump (char c) int chars; int chars_per_c = 8; @@ -2022,7 +2872,7 @@ index 10b8c52..079c86c 100644 { width = TAB_WIDTH (chars_per_c, input_position); -@@ -2739,6 +2938,164 @@ char_to_clump (char c) +@@ -2730,6 +2929,164 @@ char_to_clump (char c) return chars; } @@ -2188,14 +3038,13 @@ index 10b8c52..079c86c 100644 looking for more options and printing the next batch of files. diff --git a/src/sort.c b/src/sort.c -index 05d00cc..eb51f20 100644 +index 7af1a25..d3dc684 100644 --- a/src/sort.c +++ b/src/sort.c -@@ -30,6 +30,15 @@ +@@ -29,6 +29,14 @@ + #include #include #include - #include -+ +#if HAVE_WCHAR_H +# include +#endif @@ -2207,7 +3056,7 @@ index 05d00cc..eb51f20 100644 #include "system.h" #include "argmatch.h" #include "assure.h" -@@ -160,14 +169,39 @@ static int thousands_sep; +@@ -158,14 +166,39 @@ static int thousands_sep; /* We currently ignore multi-byte grouping chars. */ static bool thousands_sep_ignored; @@ -2248,7 +3097,7 @@ index 05d00cc..eb51f20 100644 /* The kind of blanks for '-b' to skip in various options. */ enum blanktype { bl_start, bl_end, bl_both }; -@@ -344,13 +378,11 @@ static bool stable; +@@ -342,13 +375,11 @@ static bool stable; /* An int value outside char range. */ enum { NON_CHAR = CHAR_MAX + 1 }; @@ -2265,9 +3114,9 @@ index 05d00cc..eb51f20 100644 /* Flag to remove consecutive duplicate lines from the output. Only the last of a sequence of equal lines will be output. */ -@@ -386,6 +418,46 @@ struct tempnode - static struct tempnode *volatile temphead; - static struct tempnode *volatile *temptail = &temphead; +@@ -806,6 +837,46 @@ reap_all (void) + reap (-1); + } +/* Function pointers. */ +static void @@ -2312,7 +3161,7 @@ index 05d00cc..eb51f20 100644 /* Clean up any remaining temporary files. */ static void -@@ -1343,7 +1415,7 @@ zaptemp (char const *name) +@@ -1273,7 +1344,7 @@ zaptemp (char const *name) free (node); } @@ -2321,7 +3170,7 @@ index 05d00cc..eb51f20 100644 static int struct_month_cmp (void const *m1, void const *m2) -@@ -1358,7 +1430,7 @@ struct_month_cmp (void const *m1, void const *m2) +@@ -1288,7 +1359,7 @@ struct_month_cmp (void const *m1, void const *m2) /* Initialize the character class tables. */ static void @@ -2330,7 +3179,7 @@ index 05d00cc..eb51f20 100644 { size_t i; -@@ -1370,7 +1442,7 @@ inittables (void) +@@ -1300,7 +1371,7 @@ inittables (void) fold_toupper[i] = toupper (i); } @@ -2339,7 +3188,7 @@ index 05d00cc..eb51f20 100644 /* If we're not in the "C" locale, read different names for months. */ if (hard_LC_TIME) { -@@ -1450,6 +1522,84 @@ specify_nmerge (int oi, char c, char const *s) +@@ -1380,6 +1451,84 @@ specify_nmerge (int oi, char c, char const *s) xstrtol_fatal (e, oi, c, long_options, s); } @@ -2424,7 +3273,7 @@ index 05d00cc..eb51f20 100644 /* Specify the amount of main memory to use when sorting. */ static void specify_sort_size (int oi, char c, char const *s) -@@ -1676,7 +1826,7 @@ buffer_linelim (struct buffer const *buf) +@@ -1611,7 +1760,7 @@ buffer_linelim (struct buffer const *buf) by KEY in LINE. */ static char * @@ -2433,7 +3282,7 @@ index 05d00cc..eb51f20 100644 { char *ptr = line->text, *lim = ptr + line->length - 1; size_t sword = key->sword; -@@ -1685,10 +1835,10 @@ begfield (struct line const *line, struct keyfield const *key) +@@ -1620,10 +1769,10 @@ begfield (struct line const *line, struct keyfield const *key) /* The leading field separator itself is included in a field when -t is absent. */ @@ -2446,7 +3295,7 @@ index 05d00cc..eb51f20 100644 ++ptr; if (ptr < lim) ++ptr; -@@ -1718,12 +1868,71 @@ begfield (struct line const *line, struct keyfield const *key) +@@ -1653,12 +1802,71 @@ begfield (struct line const *line, struct keyfield const *key) return ptr; } @@ -2519,7 +3368,7 @@ index 05d00cc..eb51f20 100644 { char *ptr = line->text, *lim = ptr + line->length - 1; size_t eword = key->eword, echar = key->echar; -@@ -1738,10 +1947,10 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1673,10 +1881,10 @@ limfield (struct line const *line, struct keyfield const *key) 'beginning' is the first character following the delimiting TAB. Otherwise, leave PTR pointing at the first 'blank' character after the preceding field. */ @@ -2532,7 +3381,7 @@ index 05d00cc..eb51f20 100644 ++ptr; if (ptr < lim && (eword || echar)) ++ptr; -@@ -1787,10 +1996,10 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1722,10 +1930,10 @@ limfield (struct line const *line, struct keyfield const *key) */ /* Make LIM point to the end of (one byte past) the current field. */ @@ -2545,7 +3394,7 @@ index 05d00cc..eb51f20 100644 if (newlim) lim = newlim; } -@@ -1825,6 +2034,130 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1760,6 +1968,130 @@ limfield (struct line const *line, struct keyfield const *key) return ptr; } @@ -2676,7 +3525,7 @@ index 05d00cc..eb51f20 100644 /* Fill BUF reading from FP, moving buf->left bytes from the end of buf->buf to the beginning first. If EOF is reached and the file wasn't terminated by a newline, supply one. Set up BUF's line -@@ -1911,8 +2244,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) +@@ -1846,8 +2178,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) else { if (key->skipsblanks) @@ -2701,7 +3550,7 @@ index 05d00cc..eb51f20 100644 line->keybeg = line_start; } } -@@ -2050,12 +2397,10 @@ find_unit_order (char const *number) +@@ -1985,12 +2331,10 @@ find_unit_order (char const *number) ATTRIBUTE_PURE static int @@ -2717,7 +3566,7 @@ index 05d00cc..eb51f20 100644 int diff = find_unit_order (a) - find_unit_order (b); return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep)); -@@ -2067,7 +2412,7 @@ human_numcompare (char const *a, char const *b) +@@ -2002,7 +2346,7 @@ human_numcompare (char const *a, char const *b) ATTRIBUTE_PURE static int @@ -2726,7 +3575,7 @@ index 05d00cc..eb51f20 100644 { while (blanks[to_uchar (*a)]) a++; -@@ -2077,6 +2422,25 @@ numcompare (char const *a, char const *b) +@@ -2012,6 +2356,25 @@ numcompare (char const *a, char const *b) return strnumcmp (a, b, decimal_point, thousands_sep); } @@ -2752,7 +3601,7 @@ index 05d00cc..eb51f20 100644 static int nan_compare (long double a, long double b) { -@@ -2118,7 +2482,7 @@ general_numcompare (char const *sa, char const *sb) +@@ -2053,7 +2416,7 @@ general_numcompare (char const *sa, char const *sb) Return 0 if the name in S is not recognized. */ static int @@ -2761,7 +3610,7 @@ index 05d00cc..eb51f20 100644 { size_t lo = 0; size_t hi = MONTHS_PER_YEAR; -@@ -2457,15 +2821,14 @@ debug_key (struct line const *line, struct keyfield const *key) +@@ -2392,15 +2755,14 @@ debug_key (struct line const *line, struct keyfield const *key) char saved = *lim; *lim = '\0'; @@ -2779,7 +3628,7 @@ index 05d00cc..eb51f20 100644 else if (key->general_numeric) ignore_value (strtold (beg, &tighter_lim)); else if (key->numeric || key->human_numeric) -@@ -2611,7 +2974,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2546,7 +2908,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) /* Warn about significant leading blanks. */ bool implicit_skip = key_numeric (key) || key->month; bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ @@ -2788,7 +3637,7 @@ index 05d00cc..eb51f20 100644 && ((!key->skipsblanks && !implicit_skip) || (!key->skipsblanks && key->schar) || (!key->skipeblanks && key->echar))) -@@ -2659,9 +3022,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2594,9 +2956,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) bool number_locale_warned = false; if (basic_numeric_field_span) { @@ -2801,7 +3650,7 @@ index 05d00cc..eb51f20 100644 { error (0, 0, _("field separator %s is treated as a " -@@ -2672,9 +3035,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2607,9 +2969,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) } if (basic_numeric_field_span || general_numeric_field_span) { @@ -2814,7 +3663,7 @@ index 05d00cc..eb51f20 100644 { error (0, 0, _("field separator %s is treated as a " -@@ -2682,19 +3045,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2617,19 +2979,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) quote (((char []) {decimal_point, 0}))); number_locale_warned = true; } @@ -2838,8 +3687,8 @@ index 05d00cc..eb51f20 100644 } } -@@ -2746,11 +3109,87 @@ diff_reversed (int diff, bool reversed) - return reversed ? _GL_CMP (0, diff) : diff; +@@ -2681,11 +3043,87 @@ diff_reversed (int diff, bool reversed) + return reversed ? (diff < 0) - (diff > 0) : diff; } +#if HAVE_MBRTOWC @@ -2927,7 +3776,7 @@ index 05d00cc..eb51f20 100644 { struct keyfield *key = keylist; -@@ -2831,7 +3270,7 @@ keycompare (struct line const *a, struct line const *b) +@@ -2766,7 +3204,7 @@ keycompare (struct line const *a, struct line const *b) else if (key->human_numeric) diff = human_numcompare (ta, tb); else if (key->month) @@ -2936,7 +3785,7 @@ index 05d00cc..eb51f20 100644 else if (key->random) diff = compare_random (ta, tlena, tb, tlenb); else if (key->version) -@@ -2941,6 +3380,211 @@ keycompare (struct line const *a, struct line const *b) +@@ -2876,6 +3314,211 @@ keycompare (struct line const *a, struct line const *b) return diff_reversed (diff, key->reverse); } @@ -3148,7 +3997,7 @@ index 05d00cc..eb51f20 100644 /* Compare two lines A and B, returning negative, zero, or positive depending on whether A compares less than, equal to, or greater than B. */ -@@ -2968,7 +3612,7 @@ compare (struct line const *a, struct line const *b) +@@ -2903,7 +3546,7 @@ compare (struct line const *a, struct line const *b) diff = - NONZERO (blen); else if (blen == 0) diff = 1; @@ -3157,7 +4006,7 @@ index 05d00cc..eb51f20 100644 { /* xmemcoll0 is a performance enhancement as it will not unconditionally write '\0' after the -@@ -4340,6 +4984,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) +@@ -4291,6 +4934,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) break; case 'f': key->translate = fold_toupper; @@ -3165,7 +4014,7 @@ index 05d00cc..eb51f20 100644 break; case 'g': key->general_numeric = true; -@@ -4419,7 +5064,7 @@ main (int argc, char **argv) +@@ -4370,7 +5014,7 @@ main (int argc, char **argv) initialize_exit_failure (SORT_FAILURE); hard_LC_COLLATE = hard_locale (LC_COLLATE); @@ -3174,7 +4023,7 @@ index 05d00cc..eb51f20 100644 hard_LC_TIME = hard_locale (LC_TIME); #endif -@@ -4442,6 +5087,29 @@ main (int argc, char **argv) +@@ -4393,6 +5037,29 @@ main (int argc, char **argv) thousands_sep = NON_CHAR; } @@ -3204,7 +4053,7 @@ index 05d00cc..eb51f20 100644 have_read_stdin = false; inittables (); -@@ -4717,13 +5385,34 @@ main (int argc, char **argv) +@@ -4663,13 +5330,34 @@ main (int argc, char **argv) case 't': { @@ -3237,13 +4086,13 @@ index 05d00cc..eb51f20 100644 +#endif + if (newtab_length == 1 && optarg[1]) { - if (streq (optarg, "\\0")) + if (STREQ (optarg, "\\0")) - newtab = '\0'; + newtab[0] = '\0'; else { /* Provoke with 'sort -txx'. Complain about -@@ -4734,9 +5423,11 @@ main (int argc, char **argv) +@@ -4680,9 +5368,11 @@ main (int argc, char **argv) quote (optarg)); } } @@ -3700,10 +4549,10 @@ index 0000000..26c95de + +Exit $fail diff --git a/tests/local.mk b/tests/local.mk -index 53fc53e..0148422 100644 +index b68df41..0fe8193 100644 --- a/tests/local.mk +++ b/tests/local.mk -@@ -412,6 +412,8 @@ all_tests = \ +@@ -391,6 +391,8 @@ all_tests = \ tests/sort/sort-field-limit.sh \ tests/sort/sort-files0-from.pl \ tests/sort/sort-float.sh \ @@ -3712,7 +4561,7 @@ index 53fc53e..0148422 100644 tests/sort/sort-h-thousands-sep.sh \ tests/sort/sort-merge.pl \ tests/sort/sort-merge-fdlimit.sh \ -@@ -618,6 +620,7 @@ all_tests = \ +@@ -596,6 +598,7 @@ all_tests = \ tests/du/threshold.sh \ tests/du/trailing-slash.sh \ tests/du/two-args.sh \ @@ -3720,7 +4569,7 @@ index 53fc53e..0148422 100644 tests/id/gnu-zero-uids.sh \ tests/id/no-context.sh \ tests/id/context.sh \ -@@ -774,6 +777,7 @@ all_tests = \ +@@ -752,6 +755,7 @@ all_tests = \ tests/touch/read-only.sh \ tests/touch/relative.sh \ tests/touch/trailing-slash.sh \ @@ -3795,6 +4644,76 @@ index 4b07210..68b9ea1 100755 my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; +diff --git a/tests/misc/fold.pl b/tests/misc/fold.pl +index 877322e..ba889c8 100755 +--- a/tests/misc/fold.pl ++++ b/tests/misc/fold.pl +@@ -20,9 +20,17 @@ use strict; + + (my $program_name = $0) =~ s|.*/||; + ++my $prog = 'fold'; ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; +-my $prog = 'fold'; ++ ++# uncommented to enable multibyte paths ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; + + my @Tests = + ( +@@ -44,6 +52,46 @@ my @Tests = + {OUT=>"123456\n7890\nabcdef\nghij\n123456\n7890"}], + ); + ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether fold is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ ++@Tests = triple_test \@Tests; ++ ++# Remember that triple_test creates from each test with exactly one "IN" ++# file two more tests (.p and .r suffix on name) corresponding to reading ++# input from a file and from a pipe. The pipe-reading test would fail ++# due to a race condition about 1 in 20 times. ++# Remove the IN_PIPE version of the "output-is-input" test above. ++# The others aren't susceptible because they have three inputs each. ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + diff --git a/tests/misc/sort-mb-tests.sh b/tests/misc/sort-mb-tests.sh new file mode 100644 index 0000000..11836ba @@ -3847,7 +4766,7 @@ index 0000000..11836ba + +Exit $fail diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl -index bb7469c..c1dec95 100755 +index 27d9c17..4976335 100755 --- a/tests/misc/unexpand.pl +++ b/tests/misc/unexpand.pl @@ -27,6 +27,14 @@ my $limits = getlimits (); @@ -3865,7 +4784,7 @@ index bb7469c..c1dec95 100755 my @Tests = ( ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}], -@@ -132,6 +140,37 @@ my @Tests = +@@ -128,6 +136,37 @@ my @Tests = ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}], ); @@ -4033,7 +4952,7 @@ index a3204d3..40942a5 100755 my $verbose = $ENV{VERBOSE}; diff --git a/tests/sort/sort.pl b/tests/sort/sort.pl -index 5fa9d52..a66952a 100755 +index 2ee92c4..96c7965 100755 --- a/tests/sort/sort.pl +++ b/tests/sort/sort.pl @@ -24,10 +24,15 @@ my $prog = 'sort'; @@ -4053,7 +4972,7 @@ index 5fa9d52..a66952a 100755 # Since each test is run with a file name and with redirected stdin, # the name in the diagnostic is either the file name or "-". # Normalize each diagnostic to use '-'. -@@ -428,6 +433,38 @@ foreach my $t (@Tests) +@@ -423,6 +428,38 @@ foreach my $t (@Tests) } } @@ -4092,7 +5011,7 @@ index 5fa9d52..a66952a 100755 @Tests = triple_test \@Tests; # Remember that triple_test creates from each test with exactly one "IN" -@@ -437,6 +474,7 @@ foreach my $t (@Tests) +@@ -432,6 +469,7 @@ foreach my $t (@Tests) # Remove the IN_PIPE version of the "output-is-input" test above. # The others aren't susceptible because they have three inputs each. @Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; @@ -4279,5 +5198,5 @@ index 0000000..8a82d74 +LC_ALL=C unexpand in in > out || fail=1 +compare exp out > /dev/null 2>&1 || fail=1 -- -2.52.0 +2.50.0 diff --git a/coreutils-python3.patch b/coreutils-python3.patch index 447fdbc..e6ff471 100644 --- a/coreutils-python3.patch +++ b/coreutils-python3.patch @@ -1,4 +1,4 @@ -From 8927d505ecb5334f09c48ef98ef1f464f581d0f7 Mon Sep 17 00:00:00 2001 +From f1a6e8d840a28eb2ab7a488e0d06450b7192c76d Mon Sep 17 00:00:00 2001 From: rpm-build Date: Tue, 2 Apr 2024 14:11:26 +0100 Subject: [PATCH] coreutils-python3.patch @@ -10,10 +10,10 @@ Subject: [PATCH] coreutils-python3.patch 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/init.cfg b/init.cfg -index ac05f7b..26d9516 100644 +index 612d287..9a6fa2d 100644 --- a/init.cfg +++ b/init.cfg -@@ -601,10 +601,10 @@ seek_data_capable_() +@@ -597,10 +597,10 @@ seek_data_capable_() # Skip the current test if "." lacks d_type support. require_dirent_d_type_() { @@ -37,7 +37,7 @@ index 1a2f76f..42d3924 100644 # Intended to exit 0 only on Linux/GNU systems. import os diff --git a/tests/du/move-dir-while-traversing.sh b/tests/du/move-dir-while-traversing.sh -index adf482b..cf9214a 100755 +index 1d0a359..bd03542 100755 --- a/tests/du/move-dir-while-traversing.sh +++ b/tests/du/move-dir-while-traversing.sh @@ -21,8 +21,8 @@ print_ver_ du @@ -61,5 +61,5 @@ index adf482b..cf9214a 100755 import os,sys -- -2.51.0 +2.48.1 diff --git a/coreutils-selinux.patch b/coreutils-selinux.patch new file mode 100644 index 0000000..91bc5b5 --- /dev/null +++ b/coreutils-selinux.patch @@ -0,0 +1,87 @@ +From fc96cab095d704e8bf9934812dd8d6f87fbf4be4 Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Wed, 30 Aug 2023 17:19:58 +0200 +Subject: [PATCH] coreutils-selinux.patch + +--- + src/cp.c | 19 ++++++++++++++++++- + src/install.c | 12 +++++++++++- + 2 files changed, 29 insertions(+), 2 deletions(-) + +diff --git a/src/cp.c b/src/cp.c +index a0ec067..1169c6a 100644 +--- a/src/cp.c ++++ b/src/cp.c +@@ -996,7 +996,7 @@ main (int argc, char **argv) + selinux_enabled = (0 < is_selinux_enabled ()); + cp_option_init (&x); + +- while ((c = getopt_long (argc, argv, "abdfHilLnprst:uvxPRS:TZ", ++ while ((c = getopt_long (argc, argv, "abcdfHilLnprst:uvxPRS:TZ", + long_opts, nullptr)) + != -1) + { +@@ -1048,6 +1048,23 @@ main (int argc, char **argv) + copy_contents = true; + break; + ++ case 'c': ++ fprintf (stderr, "%s: warning: option '-c' is deprecated," ++ " please use '--preserve=context' instead\n", argv[0]); ++ if (x.set_security_context) ++ { ++ fprintf (stderr, ++ "%s: cannot force target context and preserve it\n", ++ argv[0]); ++ exit (1); ++ } ++ else if (selinux_enabled) ++ { ++ x.preserve_security_context = true; ++ x.require_preserve_context = true; ++ } ++ break; ++ + case 'd': + x.preserve_links = true; + x.dereference = DEREF_NEVER; +diff --git a/src/install.c b/src/install.c +index b3b26ab..2d2f072 100644 +--- a/src/install.c ++++ b/src/install.c +@@ -807,7 +807,7 @@ main (int argc, char **argv) + dir_arg = false; + umask (0); + +- while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pt:TvS:Z", long_options, ++ while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pPt:TvS:Z", long_options, + nullptr)) + != -1) + { +@@ -872,6 +872,9 @@ main (int argc, char **argv) + no_target_directory = true; + break; + ++ case 'P': ++ fprintf (stderr, "%s: warning: option '-P' is deprecated," ++ " please use '--preserve-context' instead\n", argv[0]); + case PRESERVE_CONTEXT_OPTION: + if (! selinux_enabled) + { +@@ -879,6 +882,13 @@ main (int argc, char **argv) + "this kernel is not SELinux-enabled")); + break; + } ++ if (x.set_security_context) ++ { ++ fprintf (stderr, ++ "%s: cannot force target context and preserve it\n", ++ argv[0]); ++ exit (1); ++ } + x.preserve_security_context = true; + use_default_selinux_context = false; + break; +-- +2.48.1 + diff --git a/coreutils.spec b/coreutils.spec index 6712263..d6b3632 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,7 +1,7 @@ Summary: A set of basic GNU tools commonly used in shell scripts Name: coreutils -Version: 9.9 -Release: 2%{?dist} +Version: 9.7 +Release: 6%{?dist} # some used parts of gnulib are under various variants of LGPL License: GPL-3.0-or-later AND GFDL-1.3-no-invariants-or-later AND LGPL-2.1-or-later AND LGPL-3.0-or-later Url: https://www.gnu.org/software/coreutils/ @@ -32,17 +32,29 @@ Patch103: coreutils-python3.patch # df --direct Patch104: coreutils-df-direct.patch -# gnulib C23 support -# https://github.com/coreutils/gnulib/commit/df17f4f37ed3ca373d23ad42eae51122bdb96626 -Patch105: coreutils-9.9-gnulib-c23.patch +# cp/mv: do not fail when copying of trivial NFSv4 ACLs fails (rhbz#2363149) +# https://git.savannah.gnu.org/cgit/gnulib.git/patch?id=8a356b77717a2e4f735ec06e326880ca1f61aadb +# https://git.savannah.gnu.org/cgit/gnulib.git/patch?id=955360a66c99bdd9ac3688519a8b521b06958fd3 +Patch105: coreutils-9.6-cp-improve-nfsv4-acl-support.patch -# fix cut test failure on aarch64 rawhide (rhbz#2424302) -# https://github.com/coreutils/coreutils/commit/95044cb5eaea83d02f768feb5ab79fcf5e6ad782 -Patch106: coreutils-9.9-fix-cut-test-aarch64.patch +# sort: fix buffer under-read (CVE-2025-5278) +# https://cgit.git.savannah.gnu.org/cgit/coreutils.git/patch/?id=8c9602e3a145e9596dc1a63c6ed67865814b6633 +Patch106: coreutils-CVE-2025-5278.patch + +# stty: add support for arbitrary baud rates (rhbz#2375439) +# https://cgit.git.savannah.gnu.org/cgit/coreutils.git/patch/?id=357fda90d15fd3f7dba61e1ab322b183a48d0081 +# https://cgit.git.savannah.gnu.org/cgit/coreutils.git/patch/?id=efaec8078142996d958b6720b85a13b12497c3d0 +# https://cgit.git.savannah.gnu.org/cgit/coreutils.git/patch/?id=b7db7757831e93ca44ae59e1921bc4ebbc87974f +# https://cgit.git.savannah.gnu.org/cgit/coreutils.git/patch/?id=8b05eca972f70858749a946ac24f08d0718c1be6 +# https://cgit.git.savannah.gnu.org/cgit/coreutils.git/patch/?id=3d35b3c0e56bd556c90dc98c3e5e2e7289b0eb0d +Patch107: coreutils-9.7-stty-arbitrary-baud-rates.patch # (sb) lin18nux/lsb compliance - multibyte functionality patch Patch800: coreutils-i18n.patch +# downstream SELinux options deprecated since 2009 +Patch950: coreutils-selinux.patch + Conflicts: filesystem < 3 # To avoid clobbering installs @@ -164,7 +176,7 @@ find tests -name '*.sh' -perm 0644 -print -exec chmod 0755 '{}' '+' # FIXME: Force a newer gettext version to workaround `autoreconf -i` errors # with coreutils 9.6 and bundled gettext 0.19.2 from gettext-common-devel. -sed -i "s/0.19.2/$(rpm -q --queryformat '%%{VERSION}\n' gettext-devel)/" bootstrap.conf configure.ac +sed -i 's/0.19.2/0.22.5/' bootstrap.conf configure.ac autoreconf -fiv @@ -286,22 +298,9 @@ rm -f $RPM_BUILD_ROOT%{_infodir}/dir %license COPYING %changelog -* Tue Jan 13 2026 Lukáš Zaoral - 9.9-2 -- fix cut test failure on aarch64 rawhide (rhbz#2424302) - -* Wed Nov 26 2025 Lukáš Zaoral - 9.9-1 -- rebase to latest upstream release (rhbz#2413803) - -* Mon Sep 29 2025 Lukáš Zaoral - 9.8-3 +* Mon Sep 29 2025 Lukáš Zaoral - 9.7-6 - require gnulib-l10n for translations of gnulib messages (rhbz#2393892) -* Thu Sep 25 2025 Lukáš Zaoral - 9.8-2 -- tail: fix tailing larger number of lines in regular files (rhbz#2398008) - -* Wed Sep 24 2025 Lukáš Zaoral - 9.8-1 -- rebase to latest upstream release (rhbz#2397467) -- remove downstream patch for selinux options deprecated since 2009 - * Wed Jul 23 2025 Fedora Release Engineering - 9.7-5 - Rebuilt for https://fedoraproject.org/wiki/Fedora_43_Mass_Rebuild diff --git a/sources b/sources index 0952ab1..0e9a66d 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -SHA512 (coreutils-9.9.tar.xz.sig) = 0a3dfdfa6b4234e2e1d42142269f959bdf3cf8f6605a50270a27eff84dd22588f182121f7dd3eeb04be45f5109d02690215065b3d3b43882874d0e165a1435d0 -SHA512 (coreutils-9.9.tar.xz) = e7b0e59f7732d2c098ea4934014f470248bd5c4764210e9200a698010a8e3b95bbb26e543f0cd73ed5a4b8e1f8cda932c73f39954d68175e4deaa47526610c65 +SHA512 (coreutils-9.7.tar.xz) = fe81e6ba4fb492095153d5baac1eca8f07ece0957849de746a2a858cf007893cc2ded595a31a5e5d43d13216cc44b9d74a3245d9f23221ecc8cd00f428f27414 +SHA512 (coreutils-9.7.tar.xz.sig) = 48d86a19cee3c153f01f7478847f4621685c02e59942540bb20b30e314df05230817b87d0e73acd953e79fab35718e5bea57f25fe511a2c275a85ced4b317bae