diff --git a/coreutils-8.32-DIR_COLORS.patch b/coreutils-8.32-DIR_COLORS.patch index ee037bf..37ce3e6 100644 --- a/coreutils-8.32-DIR_COLORS.patch +++ b/coreutils-8.32-DIR_COLORS.patch @@ -1,4 +1,4 @@ -From c7b13f5e1a7ad012c510a8bdd5a8943ab4b55833 Mon Sep 17 00:00:00 2001 +From bca11e30e8a6281a8cbddc9fb196dd86ab09c955 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Fri, 17 Jun 2016 16:58:18 +0200 Subject: [PATCH] downstream changes to default DIR_COLORS @@ -9,7 +9,7 @@ Subject: [PATCH] downstream changes to default DIR_COLORS 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/DIR_COLORS b/DIR_COLORS -index b465771..ad42b09 100644 +index 540f6cd..b4785b6 100644 --- a/DIR_COLORS +++ b/DIR_COLORS @@ -1,3 +1,7 @@ @@ -30,17 +30,17 @@ index b465771..ad42b09 100644 # =================================================================== # Terminal filters # =================================================================== -@@ -69,7 +76,7 @@ DOOR 01;35 # door +@@ -70,7 +77,7 @@ DOOR 01;35 # door BLK 40;33;01 # block device driver CHR 40;33;01 # character device driver ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file ... -MISSING 00 # ... and the files they point to +MISSING 01;37;41 # ... and the files they point to - SETUID 37;41 # file that is setuid (u+s) - SETGID 30;43 # file that is setgid (g+s) - CAPABILITY 00 # file with capability (very expensive to lookup) + SETUID 37;41 # regular file that is setuid (u+s) + SETGID 30;43 # regular file that is setgid (g+s) + CAPABILITY 00 # regular file with capability (very expensive to lookup) diff --git a/DIR_COLORS.lightbgcolor b/DIR_COLORS.lightbgcolor -index eab6258..1627b63 100644 +index e3b0ec3..39a0a4c 100644 --- a/DIR_COLORS.lightbgcolor +++ b/DIR_COLORS.lightbgcolor @@ -1,3 +1,9 @@ @@ -63,7 +63,7 @@ index eab6258..1627b63 100644 # =================================================================== # Terminal filters # =================================================================== -@@ -59,17 +68,17 @@ TERM xterm* +@@ -60,17 +69,17 @@ TERM xterm* #NORMAL 00 # no color code at all #FILE 00 # regular file: use no color at all RESET 0 # reset to "normal" color @@ -83,18 +83,18 @@ index eab6258..1627b63 100644 ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file ... -MISSING 00 # ... and the files they point to +MISSING 01;37;41 # ... and the files they point to - SETUID 37;41 # file that is setuid (u+s) - SETGID 30;43 # file that is setgid (g+s) - CAPABILITY 00 # file with capability (very expensive to lookup) -@@ -78,7 +87,7 @@ OTHER_WRITABLE 34;42 # dir that is other-writable (o+w) and not sticky + SETUID 37;41 # regular file that is setuid (u+s) + SETGID 30;43 # regular file that is setgid (g+s) + CAPABILITY 00 # regular file with capability (very expensive to lookup) +@@ -79,7 +88,7 @@ OTHER_WRITABLE 34;42 # dir that is other-writable (o+w) and not sticky STICKY 37;44 # dir with the sticky bit set (+t) and not other-writable - # This is for files with execute permission: + # This is for regular files with execute permission: -EXEC 01;32 +EXEC 00;32 # =================================================================== # File extension attributes -- -2.34.1 +2.49.0 diff --git a/coreutils-9.4-CVE-2024-0684.patch b/coreutils-9.4-CVE-2024-0684.patch deleted file mode 100644 index 64583af..0000000 --- a/coreutils-9.4-CVE-2024-0684.patch +++ /dev/null @@ -1,31 +0,0 @@ -From c4c5ed8f4e9cd55a12966d4f520e3a13101637d9 Mon Sep 17 00:00:00 2001 -From: Paul Eggert -Date: Tue, 16 Jan 2024 13:48:32 -0800 -Subject: [PATCH] split: do not shrink hold buffer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -* src/split.c (line_bytes_split): Do not shrink hold buffer. -If it’s large for this batch it’s likely to be large for the next -batch, and for ‘split’ it’s not worth the complexity/CPU hassle to -shrink it. Do not assume hold_size can be bufsize. ---- - src/split.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/src/split.c b/src/split.c -index 64020c859..037960a59 100644 ---- a/src/split.c -+++ b/src/split.c -@@ -809,10 +809,7 @@ line_bytes_split (intmax_t n_bytes, char *buf, idx_t bufsize) - { - cwrite (n_out == 0, hold, n_hold); - n_out += n_hold; -- if (n_hold > bufsize) -- hold = xirealloc (hold, bufsize); - n_hold = 0; -- hold_size = bufsize; - } - - /* Output to eol if present. */ diff --git a/coreutils-9.4-systemd-coredump.patch b/coreutils-9.4-systemd-coredump.patch deleted file mode 100644 index 13f69e0..0000000 --- a/coreutils-9.4-systemd-coredump.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 2616c6be1c244424617997151c67bcab2dacbcfe Mon Sep 17 00:00:00 2001 -From: rpm-build -Date: Thu, 31 Aug 2023 14:34:05 +0200 -Subject: [PATCH] coreutils-9.4-systemd-coredump.patch - -Cherry picked from gnulib upstream commits: -* 1e6a26f9312bb47e070f94b17b14dc1a6ffbb74f ("readutmp: fix core dump if --enable-systemd") -* 3af1d7b0ce3a8e3ae565e7cea10cee6fd7cb8109 ("readutmp: Fix memory leak introduced by last commit.") ---- - lib/readutmp.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/lib/readutmp.c b/lib/readutmp.c -index 0173b7e..ec09feb 100644 ---- a/lib/readutmp.c -+++ b/lib/readutmp.c -@@ -795,7 +795,7 @@ read_utmp_from_systemd (idx_t *n_entries, STRUCT_UTMP **utmp_buf, int options) - { - char **sessions; - int num_sessions = sd_get_sessions (&sessions); -- if (num_sessions >= 0) -+ if (num_sessions >= 0 && sessions != NULL) - { - char **session_ptr; - for (session_ptr = sessions; *session_ptr != NULL; session_ptr++) --- -2.41.0 - diff --git a/coreutils-9.4-tail-64k-pages.patch b/coreutils-9.4-tail-64k-pages.patch deleted file mode 100644 index 1165f7b..0000000 --- a/coreutils-9.4-tail-64k-pages.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 73d119f4f8052a9fb6cef13cd9e75d5a4e23311a Mon Sep 17 00:00:00 2001 -From: dann frazier -Date: Wed, 29 Nov 2023 18:32:34 -0700 -Subject: [PATCH] tail: fix tailing sysfs files where PAGE_SIZE > BUFSIZ - -* src/tail.c (file_lines): Ensure we use a buffer size >= PAGE_SIZE when -searching backwards to avoid seeking within a file, -which on sysfs files is accepted but also returns no data. -* tests/tail/tail-sysfs.sh: Add a new test. -* tests/local.mk: Reference the new test. -* NEWS: Mention the bug fix. -Fixes https://bugs.gnu.org/67490 - -Upstream-commit: 73d119f4f8052a9fb6cef13cd9e75d5a4e23311a -Cherry-picked-by: Lukáš Zaoral ---- - src/tail.c | 57 +++++++++++++++++++++++++++++----------- - tests/local.mk | 1 + - tests/tail/tail-sysfs.sh | 34 ++++++++++++++++++++++++ - 3 files changed, 77 insertions(+), 15 deletions(-) - create mode 100755 tests/tail/tail-sysfs.sh - -diff --git a/src/tail.c b/src/tail.c -index c45f3b65a..6979e0ba3 100644 ---- a/src/tail.c -+++ b/src/tail.c -@@ -208,6 +208,9 @@ static int nbpids = 0; - that is writing to all followed files. */ - static pid_t pid; - -+/* Used to determine the buffer size when scanning backwards in a file. */ -+static idx_t page_size; -+ - /* True if we have ever read standard input. */ - static bool have_read_stdin; - -@@ -515,22 +518,40 @@ xlseek (int fd, off_t offset, int whence, char const *filename) - Return true if successful. */ - - static bool --file_lines (char const *pretty_filename, int fd, uintmax_t n_lines, -- off_t start_pos, off_t end_pos, uintmax_t *read_pos) -+file_lines (char const *pretty_filename, int fd, struct stat const *sb, -+ uintmax_t n_lines, off_t start_pos, off_t end_pos, -+ uintmax_t *read_pos) - { -- char buffer[BUFSIZ]; -+ char *buffer; - size_t bytes_read; -+ blksize_t bufsize = BUFSIZ; - off_t pos = end_pos; -+ bool ok = true; - - if (n_lines == 0) - return true; - -+ /* Be careful with files with sizes that are a multiple of the page size, -+ as on /proc or /sys file systems these files accept seeking to within -+ the file, but then return no data when read. So use a buffer that's -+ at least PAGE_SIZE to avoid seeking within such files. -+ -+ We could also indirectly use a large enough buffer through io_blksize() -+ however this would be less efficient in the common case, as it would -+ generally pick a larger buffer size, resulting in reading more data -+ from the end of the file. */ -+ affirm (S_ISREG (sb->st_mode)); -+ if (sb->st_size % page_size == 0) -+ bufsize = MAX (BUFSIZ, page_size); -+ -+ buffer = xmalloc (bufsize); -+ - /* Set 'bytes_read' to the size of the last, probably partial, buffer; -- 0 < 'bytes_read' <= 'BUFSIZ'. */ -- bytes_read = (pos - start_pos) % BUFSIZ; -+ 0 < 'bytes_read' <= 'bufsize'. */ -+ bytes_read = (pos - start_pos) % bufsize; - if (bytes_read == 0) -- bytes_read = BUFSIZ; -- /* Make 'pos' a multiple of 'BUFSIZ' (0 if the file is short), so that all -+ bytes_read = bufsize; -+ /* Make 'pos' a multiple of 'bufsize' (0 if the file is short), so that all - reads will be on block boundaries, which might increase efficiency. */ - pos -= bytes_read; - xlseek (fd, pos, SEEK_SET, pretty_filename); -@@ -538,7 +559,8 @@ file_lines (char const *pretty_filename, int fd, uintmax_t n_lines, - if (bytes_read == SAFE_READ_ERROR) - { - error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); -- return false; -+ ok = false; -+ goto free_buffer; - } - *read_pos = pos + bytes_read; - -@@ -565,7 +587,7 @@ file_lines (char const *pretty_filename, int fd, uintmax_t n_lines, - xwrite_stdout (nl + 1, bytes_read - (n + 1)); - *read_pos += dump_remainder (false, pretty_filename, fd, - end_pos - (pos + bytes_read)); -- return true; -+ goto free_buffer; - } - } - -@@ -577,23 +599,26 @@ file_lines (char const *pretty_filename, int fd, uintmax_t n_lines, - xlseek (fd, start_pos, SEEK_SET, pretty_filename); - *read_pos = start_pos + dump_remainder (false, pretty_filename, fd, - end_pos); -- return true; -+ goto free_buffer; - } -- pos -= BUFSIZ; -+ pos -= bufsize; - xlseek (fd, pos, SEEK_SET, pretty_filename); - -- bytes_read = safe_read (fd, buffer, BUFSIZ); -+ bytes_read = safe_read (fd, buffer, bufsize); - if (bytes_read == SAFE_READ_ERROR) - { - error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); -- return false; -+ ok = false; -+ goto free_buffer; - } - - *read_pos = pos + bytes_read; - } - while (bytes_read > 0); - -- return true; -+free_buffer: -+ free (buffer); -+ return ok; - } - - /* Print the last N_LINES lines from the end of the standard input, -@@ -1915,7 +1940,7 @@ tail_lines (char const *pretty_filename, int fd, uintmax_t n_lines, - { - *read_pos = end_pos; - if (end_pos != 0 -- && ! file_lines (pretty_filename, fd, n_lines, -+ && ! file_lines (pretty_filename, fd, &stats, n_lines, - start_pos, end_pos, read_pos)) - return false; - } -@@ -2337,6 +2362,8 @@ main (int argc, char **argv) - - atexit (close_stdout); - -+ page_size = getpagesize (); -+ - have_read_stdin = false; - - count_lines = true; -diff --git a/tests/local.mk b/tests/local.mk -index db4ee7ed8..bf03238c3 100644 ---- a/tests/local.mk -+++ b/tests/local.mk -@@ -257,6 +257,7 @@ all_tests = \ - tests/seq/seq-precision.sh \ - tests/head/head.pl \ - tests/head/head-elide-tail.pl \ -+ tests/tail/tail-sysfs.sh \ - tests/tail/tail-n0f.sh \ - tests/ls/ls-misc.pl \ - tests/date/date.pl \ -diff --git a/tests/tail/tail-sysfs.sh b/tests/tail/tail-sysfs.sh -new file mode 100755 -index 000000000..00874b3dc ---- /dev/null -+++ b/tests/tail/tail-sysfs.sh -@@ -0,0 +1,34 @@ -+#!/bin/sh -+# sysfs files have weird properties that can be influenced by page size -+ -+# Copyright 2023 Free Software Foundation, Inc. -+ -+# This program is free software: you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation, either version 3 of the License, or -+# (at your option) any later version. -+ -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+ -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src -+print_ver_ tail -+ -+file='/sys/kernel/profiling' -+ -+test -r "$file" || skip_ "No $file file" -+ -+cp -f "$file" exp || framework_failure_ -+ -+tail -n1 "$file" > out || fail=1 -+compare exp out || fail=1 -+ -+tail -c2 "$file" > out || fail=1 -+compare exp out || fail=1 -+ -+Exit $fail diff --git a/coreutils-9.9-fix-cut-test-aarch64.patch b/coreutils-9.9-fix-cut-test-aarch64.patch new file mode 100644 index 0000000..600f87b --- /dev/null +++ b/coreutils-9.9-fix-cut-test-aarch64.patch @@ -0,0 +1,28 @@ +From 95044cb5eaea83d02f768feb5ab79fcf5e6ad782 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?P=C3=A1draig=20Brady?= +Date: Mon, 22 Dec 2025 17:12:48 +0000 +Subject: [PATCH] tests: avoid false failure due to ulimit on aarch64 + +* tests/cut/cut-huge-range.sh: Add an extra 1MiB headroom, +which was seen with aarch64. +Reported at https://bugzilla.redhat.com/2424302 + +Cherry-picked-by: Lukáš Zaoral +Upstream-commit: 95044cb5eaea83d02f768feb5ab79fcf5e6ad782 +--- + tests/cut/cut-huge-range.sh | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tests/cut/cut-huge-range.sh b/tests/cut/cut-huge-range.sh +index 4bd1b129d8..98d7e8f0b9 100755 +--- a/tests/cut/cut-huge-range.sh ++++ b/tests/cut/cut-huge-range.sh +@@ -22,6 +22,7 @@ getlimits_ + + vm=$(get_min_ulimit_v_ returns_ 0 cut -b1 /dev/null) \ + || skip_ 'shell lacks ulimit, or ASAN enabled' ++vm=$(($vm+1000)) # https://bugzilla.redhat.com/2424302 + + # Ensure we can cut up to our sentinel value. + # Don't use expr to subtract one, + diff --git a/coreutils-9.9-gnulib-c23.patch b/coreutils-9.9-gnulib-c23.patch new file mode 100644 index 0000000..82e3899 --- /dev/null +++ b/coreutils-9.9-gnulib-c23.patch @@ -0,0 +1,169 @@ +From 891761bca1aa78336e5b18c121075b6e4696c5d4 Mon Sep 17 00:00:00 2001 +From: Paul Eggert +Date: Sun, 23 Nov 2025 00:50:40 -0800 +Subject: [PATCH] Port to C23 qualifier-generic fns like strchr +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This ports Gnulib to strict C23 platforms that reject code +like ‘char *q = strchr (P, 'x');’ when P is a pointer to const, +because in C23 strchr is a qualifier-generic function so +strchr (P, 'x') returns char const *. +This patch does not attempt to do the following two things, +which might be useful in the future: +1. When compiling on non-C23 platforms, check user code for +portability to platforms that define qualifier-generic functions. +2. Port Gnulib to platforms that have qualifier-generic functions +not listed in the C23 standard, e.g., strchrnul. I don’t know +of any such platforms. +* lib/mbschr.c (mbschr): +* lib/memchr2.c (memchr2): +Port to C23, where functions like strchr are qualifier-generic. +* lib/c++defs.h (_GL_FUNCDECL_SYS_NAME): New macro. +* lib/c++defs.h (_GL_FUNCDECL_SYS): +* lib/stdlib.in.h (bsearch): +Use it, to prevent C23 names like strchr from acting like macros. +* lib/string.in.h (memchr, strchr, strpbrk, strrchr): +Do not #undef when GNULIB_POSIXCHECK is defined, as this could +cause conforming C23 code to fail to conform. It’s not clear why +_GL_WARN_ON_USE_CXX; perhaps it was needed but isn’t any more? +But for now, limit the removal of #undef to these four functions +where #undeffing is clearly undesirable in C23. +* lib/wchar.in.h (wmemchr): Parenthesize function name in decl, +to prevent it from acting like a macro. + +Cherry-picked-by: Lukáš Zaoral +Upstream-commit: df17f4f37ed3ca373d23ad42eae51122bdb96626 +--- + lib/c++defs.h | 12 +++++++++++- + lib/mbschr.c | 2 +- + lib/memchr2.c | 2 +- + lib/stdlib.in.h | 6 +++--- + lib/string.in.h | 4 ---- + lib/wchar.in.h | 2 +- + 6 files changed, 17 insertions(+), 11 deletions(-) + +diff --git a/lib/c++defs.h b/lib/c++defs.h +index b77979a..7384457 100644 +--- a/lib/c++defs.h ++++ b/lib/c++defs.h +@@ -127,6 +127,16 @@ + #define _GL_FUNCDECL_RPL_1(rpl_func,rettype,parameters,...) \ + _GL_EXTERN_C_FUNC __VA_ARGS__ rettype rpl_func parameters + ++/* _GL_FUNCDECL_SYS_NAME (func) expands to plain func if C++, and to ++ parenthsized func otherwise. Parenthesization is needed in C23 if ++ the function is like strchr and so is a qualifier-generic macro ++ that expands to something more complicated. */ ++#ifdef __cplusplus ++# define _GL_FUNCDECL_SYS_NAME(func) func ++#else ++# define _GL_FUNCDECL_SYS_NAME(func) (func) ++#endif ++ + /* _GL_FUNCDECL_SYS (func, rettype, parameters, [attributes]); + declares the system function, named func, with the given prototype, + consisting of return type, parameters, and attributes. +@@ -139,7 +149,7 @@ + _GL_FUNCDECL_SYS (posix_openpt, int, (int flags), _GL_ATTRIBUTE_NODISCARD); + */ + #define _GL_FUNCDECL_SYS(func,rettype,parameters,...) \ +- _GL_EXTERN_C_FUNC __VA_ARGS__ rettype func parameters ++ _GL_EXTERN_C_FUNC __VA_ARGS__ rettype _GL_FUNCDECL_SYS_NAME (func) parameters + + /* _GL_CXXALIAS_RPL (func, rettype, parameters); + declares a C++ alias called GNULIB_NAMESPACE::func +diff --git a/lib/mbschr.c b/lib/mbschr.c +index c9e14b5..6582134 100644 +--- a/lib/mbschr.c ++++ b/lib/mbschr.c +@@ -65,5 +65,5 @@ mbschr (const char *string, int c) + return NULL; + } + else +- return strchr (string, c); ++ return (char *) strchr (string, c); + } +diff --git a/lib/memchr2.c b/lib/memchr2.c +index 7493823..d7724ae 100644 +--- a/lib/memchr2.c ++++ b/lib/memchr2.c +@@ -55,7 +55,7 @@ memchr2 (void const *s, int c1_in, int c2_in, size_t n) + c2 = (unsigned char) c2_in; + + if (c1 == c2) +- return memchr (s, c1, n); ++ return (void *) memchr (s, c1, n); + + /* Handle the first few bytes by reading one byte at a time. + Do this until VOID_PTR is aligned on a longword boundary. */ +diff --git a/lib/stdlib.in.h b/lib/stdlib.in.h +index bef0aaa..fd0e1e0 100644 +--- a/lib/stdlib.in.h ++++ b/lib/stdlib.in.h +@@ -224,9 +224,9 @@ _GL_INLINE_HEADER_BEGIN + + /* Declarations for ISO C N3322. */ + #if defined __GNUC__ && __GNUC__ >= 15 && !defined __clang__ +-_GL_EXTERN_C void *bsearch (const void *__key, +- const void *__base, size_t __nmemb, size_t __size, +- int (*__compare) (const void *, const void *)) ++_GL_EXTERN_C void *_GL_FUNCDECL_SYS_NAME (bsearch) ++ (const void *__key, const void *__base, size_t __nmemb, size_t __size, ++ int (*__compare) (const void *, const void *)) + _GL_ATTRIBUTE_NONNULL_IF_NONZERO (2, 3) _GL_ARG_NONNULL ((5)); + _GL_EXTERN_C void qsort (void *__base, size_t __nmemb, size_t __size, + int (*__compare) (const void *, const void *)) +diff --git a/lib/string.in.h b/lib/string.in.h +index fdcdd21..8b56acf 100644 +--- a/lib/string.in.h ++++ b/lib/string.in.h +@@ -409,7 +409,6 @@ _GL_CXXALIASWARN1 (memchr, void const *, + _GL_CXXALIASWARN (memchr); + # endif + #elif defined GNULIB_POSIXCHECK +-# undef memchr + /* Assume memchr is always declared. */ + _GL_WARN_ON_USE (memchr, "memchr has platform-specific bugs - " + "use gnulib module memchr for portability" ); +@@ -674,7 +673,6 @@ _GL_WARN_ON_USE (stpncpy, "stpncpy is unportable - " + #if defined GNULIB_POSIXCHECK + /* strchr() does not work with multibyte strings if the locale encoding is + GB18030 and the character to be searched is a digit. */ +-# undef strchr + /* Assume strchr is always declared. */ + _GL_WARN_ON_USE_CXX (strchr, + const char *, char *, (const char *, int), +@@ -981,7 +979,6 @@ _GL_CXXALIASWARN (strpbrk); + Even in this simple case, it does not work with multibyte strings if the + locale encoding is GB18030 and one of the characters to be searched is a + digit. */ +-# undef strpbrk + _GL_WARN_ON_USE_CXX (strpbrk, + const char *, char *, (const char *, const char *), + "strpbrk cannot work correctly on character strings " +@@ -1011,7 +1008,6 @@ _GL_WARN_ON_USE (strspn, "strspn cannot work correctly on character strings " + #if defined GNULIB_POSIXCHECK + /* strrchr() does not work with multibyte strings if the locale encoding is + GB18030 and the character to be searched is a digit. */ +-# undef strrchr + /* Assume strrchr is always declared. */ + _GL_WARN_ON_USE_CXX (strrchr, + const char *, char *, (const char *, int), +diff --git a/lib/wchar.in.h b/lib/wchar.in.h +index ab602a2..6be4515 100644 +--- a/lib/wchar.in.h ++++ b/lib/wchar.in.h +@@ -301,7 +301,7 @@ _GL_EXTERN_C int wcsncmp (const wchar_t *__s1, const wchar_t *__s2, size_t __n) + _GL_ATTRIBUTE_NONNULL_IF_NONZERO (1, 3) + _GL_ATTRIBUTE_NONNULL_IF_NONZERO (2, 3); + # ifndef __cplusplus +-_GL_EXTERN_C wchar_t *wmemchr (const wchar_t *__s, wchar_t __wc, size_t __n) ++_GL_EXTERN_C wchar_t *(wmemchr) (const wchar_t *__s, wchar_t __wc, size_t __n) + _GL_ATTRIBUTE_NONNULL_IF_NONZERO (1, 3); + # endif + _GL_EXTERN_C wchar_t *wmemset (wchar_t *__s, wchar_t __wc, size_t __n) +-- +2.52.0 + diff --git a/coreutils-df-direct.patch b/coreutils-df-direct.patch index 9e3434a..341ee2c 100644 --- a/coreutils-df-direct.patch +++ b/coreutils-df-direct.patch @@ -1,4 +1,4 @@ -From 6e36198f10a2f63b89c89ebb5d5c185b20fb3a63 Mon Sep 17 00:00:00 2001 +From 91be1a584108a6a3d96f64382bbf206c4213b3db Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Mon, 29 Mar 2010 17:20:34 +0000 Subject: [PATCH] coreutils-df-direct.patch @@ -11,10 +11,10 @@ Subject: [PATCH] coreutils-df-direct.patch create mode 100755 tests/df/direct.sh diff --git a/doc/coreutils.texi b/doc/coreutils.texi -index 5b9a597..6810c15 100644 +index b420606..0ccb368 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi -@@ -12074,6 +12074,13 @@ some systems (notably Solaris), doing this yields more up to date results, +@@ -12597,6 +12597,13 @@ some systems (notably Solaris), doing this yields more up to date results, but in general this option makes @command{df} much slower, especially when there are many or very busy file systems. @@ -29,10 +29,10 @@ index 5b9a597..6810c15 100644 @opindex --total @cindex grand total of file system size, usage and available space diff --git a/src/df.c b/src/df.c -index 48025b9..c8efa5b 100644 +index 75e638c..ef9f0a7 100644 --- a/src/df.c +++ b/src/df.c -@@ -125,6 +125,9 @@ static bool print_type; +@@ -121,6 +121,9 @@ static bool print_type; /* If true, print a grand total at the end. */ static bool print_grand_total; @@ -42,7 +42,7 @@ index 48025b9..c8efa5b 100644 /* Grand total data. */ static struct fs_usage grand_fsu; -@@ -252,13 +255,15 @@ enum +@@ -248,13 +251,15 @@ enum NO_SYNC_OPTION = CHAR_MAX + 1, SYNC_OPTION, TOTAL_OPTION, @@ -59,10 +59,10 @@ index 48025b9..c8efa5b 100644 {"inodes", no_argument, nullptr, 'i'}, {"human-readable", no_argument, nullptr, 'h'}, {"si", no_argument, nullptr, 'H'}, -@@ -583,7 +588,10 @@ get_header (void) - for (col = 0; col < ncolumns; col++) +@@ -571,7 +576,10 @@ get_header (void) + for (idx_t col = 0; col < ncolumns; col++) { - char *cell = nullptr; + char *cell; - char const *header = _(columns[col]->caption); + char const *header = (columns[col]->field == TARGET_FIELD + && direct_statfs)? @@ -71,7 +71,7 @@ index 48025b9..c8efa5b 100644 if (columns[col]->field == SIZE_FIELD && (header_mode == DEFAULT_MODE -@@ -1486,6 +1494,17 @@ get_point (char const *point, const struct stat *statp) +@@ -1446,6 +1454,17 @@ get_point (char const *point, const struct stat *statp) static void get_entry (char const *name, struct stat const *statp) { @@ -89,7 +89,7 @@ index 48025b9..c8efa5b 100644 if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode)) && get_device (name)) return; -@@ -1556,6 +1575,7 @@ or all file systems by default.\n\ +@@ -1516,6 +1535,7 @@ or all file systems by default.\n\ -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\ '-BM' prints sizes in units of 1,048,576 bytes;\n\ see SIZE format below\n\ @@ -97,7 +97,7 @@ index 48025b9..c8efa5b 100644 -h, --human-readable print sizes in powers of 1024 (e.g., 1023M)\n\ -H, --si print sizes in powers of 1000 (e.g., 1.1G)\n\ "), stdout); -@@ -1646,6 +1666,9 @@ main (int argc, char **argv) +@@ -1610,6 +1630,9 @@ main (int argc, char **argv) xstrtol_fatal (e, oi, c, long_options, optarg); } break; @@ -107,7 +107,7 @@ index 48025b9..c8efa5b 100644 case 'i': if (header_mode == OUTPUT_MODE) { -@@ -1742,6 +1765,13 @@ main (int argc, char **argv) +@@ -1706,6 +1729,13 @@ main (int argc, char **argv) } } @@ -183,5 +183,5 @@ index 0000000..8e4cfb8 + +Exit $fail -- -2.31.1 +2.52.0 diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch index be8e0b1..83579e9 100644 --- a/coreutils-i18n.patch +++ b/coreutils-i18n.patch @@ -1,66 +1,61 @@ -From 3a1b92e80708319bcc89852e3da1029c3d1ff6b3 Mon Sep 17 00:00:00 2001 +From a81b096084524e9aeef5e8b81fc829eb9efec581 Mon Sep 17 00:00:00 2001 From: rpm-build Date: Wed, 30 Aug 2023 17:19:58 +0200 Subject: [PATCH] coreutils-i18n.patch --- - bootstrap.conf | 1 + + bootstrap.conf | 2 + configure.ac | 6 + lib/linebuffer.h | 8 + lib/mbfile.c | 20 + - lib/mbfile.h | 267 ++++++++++++ - m4/mbfile.m4 | 14 + + lib/mbfile.h | 283 +++++++++++++ + m4/mbfile.m4 | 16 + src/cut.c | 508 +++++++++++++++++++++-- src/expand-common.c | 114 ++++++ src/expand-common.h | 12 + src/expand.c | 90 +++- - src/fold.c | 312 ++++++++++++-- - src/join.c | 359 ++++++++++++++-- src/local.mk | 4 +- src/pr.c | 443 ++++++++++++++++++-- - src/sort.c | 792 +++++++++++++++++++++++++++++++++--- - src/unexpand.c | 102 ++++- - src/uniq.c | 119 +++++- + src/sort.c | 791 +++++++++++++++++++++++++++++++++--- + src/unexpand.c | 101 ++++- tests/Coreutils.pm | 3 + tests/expand/mb.sh | 183 +++++++++ tests/i18n/sort.sh | 29 ++ tests/local.mk | 4 + tests/misc/expand.pl | 42 ++ - tests/misc/fold.pl | 50 ++- - tests/misc/join.pl | 50 +++ tests/misc/sort-mb-tests.sh | 45 ++ tests/misc/unexpand.pl | 39 ++ tests/pr/pr-tests.pl | 49 +++ tests/sort/sort-merge.pl | 42 ++ tests/sort/sort.pl | 40 +- tests/unexpand/mb.sh | 172 ++++++++ - tests/uniq/uniq.pl | 55 +++ - 31 files changed, 3732 insertions(+), 242 deletions(-) + 25 files changed, 2879 insertions(+), 167 deletions(-) create mode 100644 lib/mbfile.c create mode 100644 lib/mbfile.h create mode 100644 m4/mbfile.m4 - create mode 100755 tests/expand/mb.sh - create mode 100755 tests/i18n/sort.sh - create mode 100755 tests/misc/sort-mb-tests.sh - create mode 100755 tests/unexpand/mb.sh + create mode 100644 tests/expand/mb.sh + create mode 100644 tests/i18n/sort.sh + create mode 100644 tests/misc/sort-mb-tests.sh + create mode 100644 tests/unexpand/mb.sh diff --git a/bootstrap.conf b/bootstrap.conf -index bd73ff2..0e450cd 100644 +index ec68ac8..ec2fbbe 100644 --- a/bootstrap.conf +++ b/bootstrap.conf -@@ -167,6 +167,7 @@ gnulib_modules=" - maintainer-makefile +@@ -171,6 +171,8 @@ gnulib_modules=" malloc-gnu manywarnings + mbbuf ++ mbchar + mbfile mbrlen + mbrtoc32 mbrtowc - mbsalign diff --git a/configure.ac b/configure.ac -index 8ffc0b7..ca3305d 100644 +index 5e99ef3..ac07577 100644 --- a/configure.ac +++ b/configure.ac -@@ -448,6 +448,12 @@ fi +@@ -465,6 +465,12 @@ fi # I'm leaving it here for now. This whole thing needs to be modernized... gl_WINSIZE_IN_PTEM @@ -74,12 +69,12 @@ index 8ffc0b7..ca3305d 100644 if test $gl_cv_sys_tiocgwinsz_needs_termios_h = no && \ diff --git a/lib/linebuffer.h b/lib/linebuffer.h -index b4cc8e4..f2bbb52 100644 +index ca56f80..509b7e6 100644 --- a/lib/linebuffer.h +++ b/lib/linebuffer.h -@@ -22,6 +22,11 @@ - # include "idx.h" - # include +@@ -27,6 +27,11 @@ extern "C" { + #endif + +/* Get mbstate_t. */ +# if HAVE_WCHAR_H @@ -89,7 +84,7 @@ index b4cc8e4..f2bbb52 100644 /* A 'struct linebuffer' holds a line of text. */ struct linebuffer -@@ -29,6 +34,9 @@ struct linebuffer +@@ -34,6 +39,9 @@ struct linebuffer idx_t size; /* Allocated. */ idx_t length; /* Used. */ char *buffer; @@ -101,12 +96,12 @@ index b4cc8e4..f2bbb52 100644 /* Initialize linebuffer LINEBUFFER for use. */ diff --git a/lib/mbfile.c b/lib/mbfile.c new file mode 100644 -index 0000000..8d2957b +index 0000000..f4e3e77 --- /dev/null +++ b/lib/mbfile.c @@ -0,0 +1,20 @@ +/* Multibyte character I/O: macros for multi-byte encodings. -+ Copyright (C) 2012-2023 Free Software Foundation, Inc. ++ Copyright (C) 2012-2025 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as @@ -127,12 +122,12 @@ index 0000000..8d2957b +#include "mbfile.h" diff --git a/lib/mbfile.h b/lib/mbfile.h new file mode 100644 -index 0000000..ad61c19 +index 0000000..c852f31 --- /dev/null +++ b/lib/mbfile.h -@@ -0,0 +1,267 @@ +@@ -0,0 +1,283 @@ +/* Multibyte character I/O: macros for multi-byte encodings. -+ Copyright (C) 2001, 2005, 2009-2023 Free Software Foundation, Inc. ++ Copyright (C) 2001, 2005, 2009-2025 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as @@ -187,6 +182,7 @@ index 0000000..ad61c19 + +#include +#include ++#include +#include +#include + @@ -197,14 +193,22 @@ index 0000000..ad61c19 +# define MBFILE_INLINE _GL_INLINE +#endif + ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++ ++/* Guarantee two characters of pushback. */ ++#define MBFILE_MAX_PUSHBACK 2 ++ +struct mbfile_multi { + FILE *fp; + bool eof_seen; -+ bool have_pushback; ++ unsigned int pushback_count; /* <= MBFILE_MAX_PUSHBACK */ + mbstate_t state; + unsigned int bufcount; + char buf[MBCHAR_BUF_SIZE]; -+ struct mbchar pushback; ++ struct mbchar pushback[MBFILE_MAX_PUSHBACK]; +}; + +MBFILE_INLINE void @@ -213,19 +217,19 @@ index 0000000..ad61c19 + unsigned int new_bufcount; + size_t bytes; + ++ /* Return character pushed back, if there is one. */ ++ if (mbf->pushback_count > 0) ++ { ++ mb_copy (mbc, &mbf->pushback[mbf->pushback_count - 1]); ++ mbf->pushback_count--; ++ return; ++ } ++ + /* If EOF has already been seen, don't use getc. This matters if + mbf->fp is connected to an interactive tty. */ + if (mbf->eof_seen) + goto eof; + -+ /* Return character pushed back, if there is one. */ -+ if (mbf->have_pushback) -+ { -+ mb_copy (mbc, &mbf->pushback); -+ mbf->have_pushback = false; -+ return; -+ } -+ + new_bufcount = mbf->bufcount; + + /* If mbf->state is not in an initial state, some more 32-bit wide character @@ -374,8 +378,10 @@ index 0000000..ad61c19 +MBFILE_INLINE void +mbfile_multi_ungetc (const struct mbchar *mbc, struct mbfile_multi *mbf) +{ -+ mb_copy (&mbf->pushback, mbc); -+ mbf->have_pushback = true; ++ if (mbf->pushback_count == MBFILE_MAX_PUSHBACK) ++ abort (); ++ mb_copy (&mbf->pushback[mbf->pushback_count], mbc); ++ mbf->pushback_count++; +} + +typedef struct mbfile_multi mb_file_t; @@ -385,7 +391,7 @@ index 0000000..ad61c19 +#define mbf_init(mbf, stream) \ + ((mbf).fp = (stream), \ + (mbf).eof_seen = false, \ -+ (mbf).have_pushback = false, \ ++ (mbf).pushback_count = 0, \ + mbszero (&(mbf).state), \ + (mbf).bufcount = 0) + @@ -395,20 +401,27 @@ index 0000000..ad61c19 + +#define mb_iseof(mbc) ((mbc).bytes == 0) + ++ ++#ifdef __cplusplus ++} ++#endif ++ +_GL_INLINE_HEADER_END + +#endif /* _MBFILE_H */ diff --git a/m4/mbfile.m4 b/m4/mbfile.m4 new file mode 100644 -index 0000000..83068a9 +index 0000000..1d126e0 --- /dev/null +++ b/m4/mbfile.m4 -@@ -0,0 +1,14 @@ -+# mbfile.m4 serial 7 -+dnl Copyright (C) 2005, 2008-2023 Free Software Foundation, Inc. +@@ -0,0 +1,16 @@ ++# mbfile.m4 ++# serial 7 ++dnl Copyright (C) 2005, 2008-2025 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. ++dnl This file is offered as-is, without any warranty. + +dnl autoconf tests required for use of mbfile.h +dnl From Bruno Haible. @@ -419,7 +432,7 @@ index 0000000..83068a9 + : +]) diff --git a/src/cut.c b/src/cut.c -index b4edbab..65e4658 100644 +index f0effb9..36479d6 100644 --- a/src/cut.c +++ b/src/cut.c @@ -27,6 +27,11 @@ @@ -1079,18 +1092,18 @@ index b4edbab..65e4658 100644 if (have_read_stdin && fclose (stdin) == EOF) diff --git a/src/expand-common.c b/src/expand-common.c -index 89fa56a..c102e6e 100644 +index 14dd804..0d8eaaa 100644 --- a/src/expand-common.c +++ b/src/expand-common.c -@@ -18,6 +18,7 @@ - +@@ -19,6 +19,7 @@ + #include #include #include +#include #include "system.h" + #include "c-ctype.h" #include "fadvise.h" - #include "quote.h" -@@ -122,6 +123,119 @@ set_increment_size (uintmax_t tabval) +@@ -132,6 +133,119 @@ set_increment_size (colno tabval) return ok; } @@ -1211,10 +1224,10 @@ index 89fa56a..c102e6e 100644 to the list of tab stops. */ extern void diff --git a/src/expand-common.h b/src/expand-common.h -index daed31e..f6b2f68 100644 +index 46ef4e3..e19469b 100644 --- a/src/expand-common.h +++ b/src/expand-common.h -@@ -25,6 +25,18 @@ extern size_t max_column_width; +@@ -29,6 +29,18 @@ extern idx_t max_column_width; /* The desired exit status. */ extern int exit_status; @@ -1232,12 +1245,12 @@ index daed31e..f6b2f68 100644 + /* Add tab stop TABVAL to the end of 'tab_list'. */ extern void - add_tab_stop (uintmax_t tabval); + add_tab_stop (colno tabval); diff --git a/src/expand.c b/src/expand.c -index 0e74d0c..7080c51 100644 +index 5ec7ce9..65ac315 100644 --- a/src/expand.c +++ b/src/expand.c -@@ -37,6 +37,9 @@ +@@ -38,6 +38,9 @@ #include #include #include @@ -1247,7 +1260,7 @@ index 0e74d0c..7080c51 100644 #include "system.h" #include "expand-common.h" -@@ -95,19 +98,41 @@ expand (void) +@@ -96,19 +99,41 @@ expand (void) { /* Input stream. */ FILE *fp = next_file (nullptr); @@ -1293,9 +1306,9 @@ index 0e74d0c..7080c51 100644 /* The following variables have valid values only when CONVERT is true: */ -@@ -117,17 +142,48 @@ expand (void) +@@ -118,17 +143,48 @@ expand (void) /* Index in TAB_LIST of next tab stop to examine. */ - size_t tab_index = 0; + idx_t tab_index = 0; - /* Convert a line of text. */ @@ -1345,8 +1358,8 @@ index 0e74d0c..7080c51 100644 + if (mb_iseq (c, '\t')) { /* Column the next input tab stop is on. */ - uintmax_t next_tab_column; -@@ -146,32 +202,34 @@ expand (void) + bool last_tab; +@@ -139,31 +195,33 @@ expand (void) if (putchar (' ') < 0) write_error (); @@ -1365,14 +1378,13 @@ index 0e74d0c..7080c51 100644 + /* A leading control character could make us trip over. */ + else if (!mb_iscntrl (c)) { -- column++; -+ column += mb_width (c); - if (!column) +- if (ckd_add (&column, column, 1)) ++ if (ckd_add (&column, column, mb_width (c))) error (EXIT_FAILURE, 0, _("input line is too long")); } - convert &= convert_entire_line || !! isblank (c); -+ convert &= convert_entire_line || mb_isblank (c); ++ convert &= convert_entire_line || !! mb_isblank (c); } - if (c < 0) @@ -1389,924 +1401,28 @@ index 0e74d0c..7080c51 100644 } } -diff --git a/src/fold.c b/src/fold.c -index 5c0428d..2372047 100644 ---- a/src/fold.c -+++ b/src/fold.c -@@ -22,10 +22,32 @@ - #include - #include - -+/* Get mbstate_t, mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ -+/* Get iswprint(), iswblank(), wcwidth(). */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+ - #include "system.h" - #include "fadvise.h" - #include "xdectoint.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# undef MB_LEN_MAX -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - #define TAB_WIDTH 8 - - /* The official name of this program (e.g., no 'g' prefix). */ -@@ -33,20 +55,41 @@ - - #define AUTHORS proper_name ("David MacKenzie") - -+#define FATAL_ERROR(Message) \ -+ do \ -+ { \ -+ error (0, 0, (Message)); \ -+ usage (2); \ -+ } \ -+ while (0) -+ -+enum operating_mode -+{ -+ /* Fold texts by columns that are at the given positions. */ -+ column_mode, -+ -+ /* Fold texts by bytes that are at the given positions. */ -+ byte_mode, -+ -+ /* Fold texts by characters that are at the given positions. */ -+ character_mode, -+}; -+ -+/* The argument shows current mode. (Default: column_mode) */ -+static enum operating_mode operating_mode; -+ - /* If nonzero, try to break on whitespace. */ - static bool break_spaces; - --/* If nonzero, count bytes, not column positions. */ --static bool count_bytes; -- - /* If nonzero, at least one of the files we read was standard input. */ - static bool have_read_stdin; - --static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::"; -+static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; - - static struct option const longopts[] = - { - {"bytes", no_argument, nullptr, 'b'}, -+ {"characters", no_argument, nullptr, 'c'}, - {"spaces", no_argument, nullptr, 's'}, - {"width", required_argument, nullptr, 'w'}, - {GETOPT_HELP_OPTION_DECL}, -@@ -74,6 +117,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ - - fputs (_("\ - -b, --bytes count bytes rather than columns\n\ -+ -c, --characters count characters rather than columns\n\ - -s, --spaces break at spaces\n\ - -w, --width=WIDTH use WIDTH columns instead of 80\n\ - "), stdout); -@@ -91,7 +135,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ - static size_t - adjust_column (size_t column, char c) - { -- if (!count_bytes) -+ if (operating_mode != byte_mode) - { - if (c == '\b') - { -@@ -114,30 +158,14 @@ adjust_column (size_t column, char c) - to stdout, with maximum line length WIDTH. - Return true if successful. */ - --static bool --fold_file (char const *filename, size_t width) -+static void -+fold_text (FILE *istream, size_t width, int *saved_errno) - { -- FILE *istream; - int c; - size_t column = 0; /* Screen column where next char will go. */ - size_t offset_out = 0; /* Index in 'line_out' for next char. */ - static char *line_out = nullptr; - static size_t allocated_out = 0; -- int saved_errno; -- -- if (STREQ (filename, "-")) -- { -- istream = stdin; -- have_read_stdin = true; -- } -- else -- istream = fopen (filename, "r"); -- -- if (istream == nullptr) -- { -- error (0, errno, "%s", quotef (filename)); -- return false; -- } - - fadvise (istream, FADVISE_SEQUENTIAL); - -@@ -167,6 +195,15 @@ fold_file (char const *filename, size_t width) - bool found_blank = false; - size_t logical_end = offset_out; - -+ /* If LINE_OUT has no wide character, -+ put a new wide character in LINE_OUT -+ if column is bigger than width. */ -+ if (offset_out == 0) -+ { -+ line_out[offset_out++] = c; -+ continue; -+ } -+ - /* Look for the last blank. */ - while (logical_end) - { -@@ -213,13 +250,225 @@ fold_file (char const *filename, size_t width) - line_out[offset_out++] = c; - } - -- saved_errno = errno; -+ *saved_errno = errno; - if (!ferror (istream)) -- saved_errno = 0; -+ *saved_errno = 0; - - if (offset_out) - fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); - -+} -+ -+#if HAVE_MBRTOWC -+static void -+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) -+{ -+ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ -+ size_t buflen = 0; /* The length of the byte sequence in buf. */ -+ char *bufpos = buf; /* Next read position of BUF. */ -+ wint_t wc; /* A gotten wide character. */ -+ size_t mblength; /* The byte size of a multibyte character which shows -+ as same character as WC. */ -+ mbstate_t state, state_bak; /* State of the stream. */ -+ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */ -+ -+ static char *line_out = NULL; -+ size_t offset_out = 0; /* Index in `line_out' for next char. */ -+ static size_t allocated_out = 0; -+ -+ int increment; -+ size_t column = 0; -+ -+ size_t last_blank_pos; -+ size_t last_blank_column; -+ int is_blank_seen; -+ int last_blank_increment = 0; -+ int is_bs_following_last_blank; -+ size_t bs_following_last_blank_num; -+ int is_cr_after_last_blank; -+ -+#define CLEAR_FLAGS \ -+ do \ -+ { \ -+ last_blank_pos = 0; \ -+ last_blank_column = 0; \ -+ is_blank_seen = 0; \ -+ is_bs_following_last_blank = 0; \ -+ bs_following_last_blank_num = 0; \ -+ is_cr_after_last_blank = 0; \ -+ } \ -+ while (0) -+ -+#define START_NEW_LINE \ -+ do \ -+ { \ -+ putchar ('\n'); \ -+ column = 0; \ -+ offset_out = 0; \ -+ CLEAR_FLAGS; \ -+ } \ -+ while (0) -+ -+ CLEAR_FLAGS; -+ memset (&state, '\0', sizeof(mbstate_t)); -+ -+ for (;; bufpos += mblength, buflen -= mblength) -+ { -+ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) -+ { -+ memmove (buf, bufpos, buflen); -+ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); -+ bufpos = buf; -+ } -+ -+ if (buflen < 1) -+ break; -+ -+ /* Get a wide character. */ -+ state_bak = state; -+ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); -+ -+ switch (mblength) -+ { -+ case (size_t)-1: -+ case (size_t)-2: -+ convfail++; -+ state = state_bak; -+ /* Fall through. */ -+ -+ case 0: -+ mblength = 1; -+ break; -+ } -+ -+rescan: -+ if (operating_mode == byte_mode) /* byte mode */ -+ increment = mblength; -+ else if (operating_mode == character_mode) /* character mode */ -+ increment = 1; -+ else /* column mode */ -+ { -+ if (convfail) -+ increment = 1; -+ else -+ { -+ switch (wc) -+ { -+ case L'\n': -+ fwrite (line_out, sizeof(char), offset_out, stdout); -+ START_NEW_LINE; -+ continue; -+ -+ case L'\b': -+ increment = (column > 0) ? -1 : 0; -+ break; -+ -+ case L'\r': -+ increment = -1 * column; -+ break; -+ -+ case L'\t': -+ increment = 8 - column % 8; -+ break; -+ -+ default: -+ increment = wcwidth (wc); -+ increment = (increment < 0) ? 0 : increment; -+ } -+ } -+ } -+ -+ if (column + increment > width && break_spaces && last_blank_pos) -+ { -+ fwrite (line_out, sizeof(char), last_blank_pos, stdout); -+ putchar ('\n'); -+ -+ offset_out = offset_out - last_blank_pos; -+ column = column - last_blank_column + ((is_cr_after_last_blank) -+ ? last_blank_increment : bs_following_last_blank_num); -+ memmove (line_out, line_out + last_blank_pos, offset_out); -+ CLEAR_FLAGS; -+ goto rescan; -+ } -+ -+ if (column + increment > width && column != 0) -+ { -+ fwrite (line_out, sizeof(char), offset_out, stdout); -+ START_NEW_LINE; -+ goto rescan; -+ } -+ -+ if (allocated_out < offset_out + mblength) -+ { -+ line_out = X2REALLOC (line_out, &allocated_out); -+ } -+ -+ memcpy (line_out + offset_out, bufpos, mblength); -+ offset_out += mblength; -+ column += increment; -+ -+ if (is_blank_seen && !convfail && wc == L'\r') -+ is_cr_after_last_blank = 1; -+ -+ if (is_bs_following_last_blank && !convfail && wc == L'\b') -+ ++bs_following_last_blank_num; -+ else -+ is_bs_following_last_blank = 0; -+ -+ if (break_spaces && !convfail && iswblank (wc)) -+ { -+ last_blank_pos = offset_out; -+ last_blank_column = column; -+ is_blank_seen = 1; -+ last_blank_increment = increment; -+ is_bs_following_last_blank = 1; -+ bs_following_last_blank_num = 0; -+ is_cr_after_last_blank = 0; -+ } -+ } -+ -+ *saved_errno = errno; -+ if (!ferror (istream)) -+ *saved_errno = 0; -+ -+ if (offset_out) -+ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); -+ -+} -+#endif -+ -+/* Fold file FILENAME, or standard input if FILENAME is "-", -+ to stdout, with maximum line length WIDTH. -+ Return 0 if successful, 1 if an error occurs. */ -+ -+static bool -+fold_file (char const *filename, size_t width) -+{ -+ FILE *istream; -+ int saved_errno; -+ -+ if (STREQ (filename, "-")) -+ { -+ istream = stdin; -+ have_read_stdin = 1; -+ } -+ else -+ istream = fopen (filename, "r"); -+ -+ if (istream == NULL) -+ { -+ error (0, errno, "%s", filename); -+ return 1; -+ } -+ -+ /* Define how ISTREAM is being folded. */ -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ fold_multibyte_text (istream, width, &saved_errno); -+ else -+#endif -+ fold_text (istream, width, &saved_errno); -+ - if (STREQ (filename, "-")) - clearerr (istream); - else if (fclose (istream) != 0 && !saved_errno) -@@ -250,7 +499,8 @@ main (int argc, char **argv) - - atexit (close_stdout); - -- break_spaces = count_bytes = have_read_stdin = false; -+ operating_mode = column_mode; -+ break_spaces = have_read_stdin = false; - - while ((optc = getopt_long (argc, argv, shortopts, longopts, nullptr)) != -1) - { -@@ -259,7 +509,15 @@ main (int argc, char **argv) - switch (optc) - { - case 'b': /* Count bytes rather than columns. */ -- count_bytes = true; -+ if (operating_mode != column_mode) -+ FATAL_ERROR (_("only one way of folding may be specified")); -+ operating_mode = byte_mode; -+ break; -+ -+ case 'c': -+ if (operating_mode != column_mode) -+ FATAL_ERROR (_("only one way of folding may be specified")); -+ operating_mode = character_mode; - break; - - case 's': /* Break at word boundaries. */ -diff --git a/src/join.c b/src/join.c -index 0bcfa75..8a3bcf1 100644 ---- a/src/join.c -+++ b/src/join.c -@@ -21,18 +21,32 @@ - #include - #include - -+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ -+/* Get iswblank(), towupper. */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+ - #include "system.h" - #include "assure.h" - #include "fadvise.h" - #include "hard-locale.h" - #include "linebuffer.h" --#include "memcasecmp.h" - #include "quote.h" - #include "stdio--.h" - #include "xmemcoll.h" - #include "xstrtol.h" - #include "argmatch.h" - -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ - /* The official name of this program (e.g., no 'g' prefix). */ - #define PROGRAM_NAME "join" - -@@ -134,10 +148,12 @@ static struct outlist outlist_head; - /* Last element in 'outlist', where a new element can be added. */ - static struct outlist *outlist_end = &outlist_head; - --/* Tab character separating fields. If negative, fields are separated -- by any nonempty string of blanks, otherwise by exactly one -- tab character whose value (when cast to unsigned char) equals TAB. */ --static int tab = -1; -+/* Tab character separating fields. If NULL, fields are separated -+ by any nonempty string of blanks. */ -+static char *tab = NULL; -+ -+/* The number of bytes used for tab. */ -+static size_t tablen = 0; - - /* If nonzero, check that the input is correctly ordered. */ - static enum -@@ -277,13 +293,14 @@ xfields (struct line *line) - if (ptr == lim) - return; - -- if (0 <= tab && tab != '\n') -+ if (tab != NULL) - { -+ unsigned char t = tab[0]; - char *sep; -- for (; (sep = memchr (ptr, tab, lim - ptr)) != nullptr; ptr = sep + 1) -+ for (; (sep = memchr (ptr, t, lim - ptr)) != nullptr; ptr = sep + 1) - extract_field (line, ptr, sep - ptr); - } -- else if (tab < 0) -+ else - { - /* Skip leading blanks before the first field. */ - while (field_sep (*ptr)) -@@ -307,6 +324,147 @@ xfields (struct line *line) - extract_field (line, ptr, lim - ptr); - } - -+#if HAVE_MBRTOWC -+static void -+xfields_multibyte (struct line *line) -+{ -+ char *ptr = line->buf.buffer; -+ char const *lim = ptr + line->buf.length - 1; -+ wchar_t wc = 0; -+ size_t mblength = 1; -+ mbstate_t state, state_bak; -+ -+ memset (&state, 0, sizeof (mbstate_t)); -+ -+ if (ptr >= lim) -+ return; -+ -+ if (tab != NULL) -+ { -+ char *sep = ptr; -+ for (; ptr < lim; ptr = sep + mblength) -+ { -+ sep = ptr; -+ while (sep < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (mblength == tablen && !memcmp (sep, tab, mblength)) -+ break; -+ else -+ { -+ sep += mblength; -+ continue; -+ } -+ } -+ -+ if (sep >= lim) -+ break; -+ -+ extract_field (line, ptr, sep - ptr); -+ } -+ } -+ else -+ { -+ /* Skip leading blanks before the first field. */ -+ while(ptr < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (!iswblank(wc) && wc != '\n') -+ break; -+ ptr += mblength; -+ } -+ -+ do -+ { -+ char *sep; -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ sep = ptr + mblength; -+ while (sep < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (iswblank (wc) || wc == '\n') -+ break; -+ -+ sep += mblength; -+ } -+ -+ extract_field (line, ptr, sep - ptr); -+ if (sep >= lim) -+ return; -+ -+ state_bak = state; -+ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ ptr = sep + mblength; -+ while (ptr < lim) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); -+ if (mblength == (size_t)-1 || mblength == (size_t)-2) -+ { -+ mblength = 1; -+ state = state_bak; -+ break; -+ } -+ mblength = (mblength < 1) ? 1 : mblength; -+ -+ if (!iswblank (wc) && wc != '\n') -+ break; -+ -+ ptr += mblength; -+ } -+ } -+ while (ptr < lim); -+ } -+ -+ extract_field (line, ptr, lim - ptr); -+} -+#endif -+ - static void - freeline (struct line *line) - { -@@ -328,56 +486,133 @@ keycmp (struct line const *line1, struct line const *line2, - idx_t jf_1, idx_t jf_2) - { - /* Start of field to compare in each file. */ -- char *beg1; -- char *beg2; -- -- idx_t len1; -- idx_t len2; /* Length of fields to compare. */ -+ char *beg[2]; -+ char *copy[2]; -+ idx_t len[2]; /* Length of fields to compare. */ - int diff; -+ int i, j; -+ int mallocd = 0; - - if (jf_1 < line1->nfields) - { -- beg1 = line1->fields[jf_1].beg; -- len1 = line1->fields[jf_1].len; -+ beg[0] = line1->fields[jf_1].beg; -+ len[0] = line1->fields[jf_1].len; - } - else - { -- beg1 = nullptr; -- len1 = 0; -+ beg[0] = nullptr; -+ len[0] = 0; - } - - if (jf_2 < line2->nfields) - { -- beg2 = line2->fields[jf_2].beg; -- len2 = line2->fields[jf_2].len; -+ beg[1] = line2->fields[jf_2].beg; -+ len[1] = line2->fields[jf_2].len; - } - else - { -- beg2 = nullptr; -- len2 = 0; -+ beg[1] = nullptr; -+ len[1] = 0; - } - -- if (len1 == 0) -- return len2 == 0 ? 0 : -1; -- if (len2 == 0) -+ if (len[0] == 0) -+ return len[1] == 0 ? 0 : -1; -+ if (len[1] == 0) - return 1; - - if (ignore_case) - { -- /* FIXME: ignore_case does not work with NLS (in particular, -- with multibyte chars). */ -- diff = memcasecmp (beg1, beg2, MIN (len1, len2)); -+#ifdef HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ size_t mblength; -+ wchar_t wc, uwc; -+ mbstate_t state, state_bak; -+ -+ memset (&state, '\0', sizeof (mbstate_t)); -+ -+ for (i = 0; i < 2; i++) -+ { -+ mallocd = 1; -+ copy[i] = xmalloc (len[i] + 1); -+ memset (copy[i], '\0',len[i] + 1); -+ -+ for (j = 0; j < MIN (len[0], len[1]);) -+ { -+ state_bak = state; -+ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); -+ -+ switch (mblength) -+ { -+ case (size_t) -1: -+ case (size_t) -2: -+ state = state_bak; -+ /* Fall through */ -+ case 0: -+ mblength = 1; -+ break; -+ -+ default: -+ uwc = towupper (wc); -+ -+ if (uwc != wc) -+ { -+ mbstate_t state_wc; -+ size_t mblen; -+ -+ memset (&state_wc, '\0', sizeof (mbstate_t)); -+ mblen = wcrtomb (copy[i] + j, uwc, &state_wc); -+ assert (mblen != (size_t)-1); -+ } -+ else -+ memcpy (copy[i] + j, beg[i] + j, mblength); -+ } -+ j += mblength; -+ } -+ copy[i][j] = '\0'; -+ } -+ } -+ else -+#endif -+ { -+ for (i = 0; i < 2; i++) -+ { -+ mallocd = 1; -+ copy[i] = xmalloc (len[i] + 1); -+ -+ for (j = 0; j < MIN (len[0], len[1]); j++) -+ copy[i][j] = toupper (beg[i][j]); -+ -+ copy[i][j] = '\0'; -+ } -+ } - } - else - { -- if (hard_LC_COLLATE) -- return xmemcoll (beg1, len1, beg2, len2); -- diff = memcmp (beg1, beg2, MIN (len1, len2)); -+ copy[0] = beg[0]; -+ copy[1] = beg[1]; - } - -+ if (hard_LC_COLLATE) -+ { -+ diff = xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); -+ -+ if (mallocd) -+ for (i = 0; i < 2; i++) -+ free (copy[i]); -+ -+ return diff; -+ } -+ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); -+ -+ if (mallocd) -+ for (i = 0; i < 2; i++) -+ free (copy[i]); -+ -+ - if (diff) - return diff; -- return (len1 > len2) - (len1 < len2); -+ return len[0] - len[1]; - } - - /* Check that successive input lines PREV and CURRENT from input file -@@ -469,6 +704,11 @@ get_line (FILE *fp, struct line **linep, int which) - } - ++line_no[which - 1]; - -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ xfields_multibyte (line); -+ else -+#endif - xfields (line); - - if (prevline[which - 1]) -@@ -562,21 +802,28 @@ prfield (idx_t n, struct line const *line) - - /* Output all the fields in line, other than the join field. */ - -+#define PUT_TAB_CHAR \ -+ do \ -+ { \ -+ (tab != NULL) ? \ -+ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ -+ } \ -+ while (0) -+ - static void - prfields (struct line const *line, idx_t join_field, idx_t autocount) - { - idx_t i; - idx_t nfields = autoformat ? autocount : line->nfields; -- char output_separator = tab < 0 ? ' ' : tab; - - for (i = 0; i < join_field && i < nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line); - } - for (i = join_field + 1; i < nfields; ++i) - { -- putchar (output_separator); -+ PUT_TAB_CHAR; - prfield (i, line); - } - } -@@ -587,7 +834,6 @@ static void - prjoin (struct line const *line1, struct line const *line2) - { - const struct outlist *outlist; -- char output_separator = tab < 0 ? ' ' : tab; - idx_t field; - struct line const *line; - -@@ -621,7 +867,7 @@ prjoin (struct line const *line1, struct line const *line2) - o = o->next; - if (o == nullptr) - break; -- putchar (output_separator); -+ PUT_TAB_CHAR; - } - putchar (eolchar); - } -@@ -1086,20 +1332,43 @@ main (int argc, char **argv) - - case 't': - { -- unsigned char newtab = optarg[0]; -+ char *newtab = NULL; -+ size_t newtablen; -+ newtab = xstrdup (optarg); -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ mbstate_t state; -+ -+ memset (&state, 0, sizeof (mbstate_t)); -+ newtablen = mbrtowc (NULL, newtab, -+ strnlen (newtab, MB_LEN_MAX), -+ &state); -+ if (newtablen == (size_t) 0 -+ || newtablen == (size_t) -1 -+ || newtablen == (size_t) -2) -+ newtablen = 1; -+ } -+ else -+#endif -+ newtablen = 1; - if (! newtab) -- newtab = '\n'; /* '' => process the whole line. */ -+ newtab = (char*)"\n"; /* '' => process the whole line. */ - else if (optarg[1]) - { -- if (STREQ (optarg, "\\0")) -- newtab = '\0'; -- else -- error (EXIT_FAILURE, 0, _("multi-character tab %s"), -- quote (optarg)); -+ if (newtablen == 1 && newtab[1]) -+ { -+ if (STREQ (newtab, "\\0")) -+ newtab[0] = '\0'; -+ } -+ } -+ if (tab != NULL && strcmp (tab, newtab)) -+ { -+ free (newtab); -+ error (EXIT_FAILURE, 0, _("incompatible tabs")); - } -- if (0 <= tab && tab != newtab) -- error (EXIT_FAILURE, 0, _("incompatible tabs")); - tab = newtab; -+ tablen = newtablen; - } - break; - diff --git a/src/local.mk b/src/local.mk -index f45b911..6f7036a 100644 +index a8ad6b4..b0e61ec 100644 --- a/src/local.mk +++ b/src/local.mk -@@ -447,8 +447,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) +@@ -490,8 +490,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) src_basenc_SOURCES = src/basenc.c src_basenc_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS) -src_expand_SOURCES = src/expand.c src/expand-common.c -src_unexpand_SOURCES = src/unexpand.c src/expand-common.c -+src_expand_SOURCES = src/expand.c src/expand-common.c lib/mbfile.c -+src_unexpand_SOURCES = src/unexpand.c src/expand-common.c lib/mbfile.c ++src_expand_SOURCES = src/expand.c src/expand-common.c lib/mbfile.c lib/mbchar.c ++src_unexpand_SOURCES = src/unexpand.c src/expand-common.c lib/mbfile.c lib/mbchar.c src_wc_SOURCES = src/wc.c - if USE_AVX2_WC_LINECOUNT + if USE_AVX512_WC_LINECOUNT diff --git a/src/pr.c b/src/pr.c -index 419545c..702e025 100644 +index 10b8c52..079c86c 100644 --- a/src/pr.c +++ b/src/pr.c @@ -312,6 +312,24 @@ + #include #include - #include #include + +/* Get MB_LEN_MAX. */ @@ -2327,9 +1443,9 @@ index 419545c..702e025 100644 +#endif + #include "system.h" + #include "c-ctype.h" #include "fadvise.h" - #include "hard-locale.h" -@@ -324,6 +342,18 @@ +@@ -325,6 +343,18 @@ #include "xstrtol-error.h" #include "xdectoint.h" @@ -2348,7 +1464,7 @@ index 419545c..702e025 100644 /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "pr" -@@ -416,7 +446,20 @@ struct COLUMN +@@ -417,7 +447,20 @@ struct COLUMN typedef struct COLUMN COLUMN; @@ -2370,9 +1486,9 @@ index 419545c..702e025 100644 static bool read_line (COLUMN *p); static bool print_page (void); static bool print_stored (COLUMN *p); -@@ -428,6 +471,7 @@ static void add_line_number (COLUMN *p); - static void getoptnum (char const *n_str, int min, int *num, - char const *errfmt); +@@ -428,6 +471,7 @@ static void pad_across_to (int position); + static void add_line_number (COLUMN *p); + static int getoptnum (char const *n_str, int min, char const *errfmt); static void getoptarg (char *arg, char switch_char, char *character, + int *character_length, int *character_width, int *number); @@ -2505,7 +1621,7 @@ index 419545c..702e025 100644 /* Could check tab width > 0. */ tabify_output = true; break; -@@ -985,8 +1068,8 @@ main (int argc, char **argv) +@@ -986,8 +1069,8 @@ main (int argc, char **argv) case 'n': numbered_lines = true; if (optarg) @@ -2516,7 +1632,7 @@ index 419545c..702e025 100644 break; case 'N': skip_count = false; -@@ -1011,6 +1094,7 @@ main (int argc, char **argv) +@@ -1013,6 +1096,7 @@ main (int argc, char **argv) /* Reset an additional input of -s, -S dominates -s */ col_sep_string = ""; col_sep_length = 0; @@ -2524,7 +1640,7 @@ index 419545c..702e025 100644 use_col_separator = true; if (optarg) separator_string (optarg); -@@ -1165,7 +1249,8 @@ getoptnum (char const *n_str, int min, int *num, char const *err) +@@ -1168,7 +1252,8 @@ getoptnum (char const *n_str, int min, char const *err) a number. */ static void @@ -2534,10 +1650,10 @@ index 419545c..702e025 100644 { if (!*arg) { -@@ -1174,7 +1259,41 @@ getoptarg (char *arg, char switch_char, char *character, int *number) +@@ -1177,7 +1262,41 @@ getoptarg (char *arg, char switch_char, char *character, int *number) } - if (!ISDIGIT (*arg)) + if (!c_isdigit (*arg)) - *character = *arg++; + { +#ifdef HAVE_MBRTOWC @@ -2577,7 +1693,7 @@ index 419545c..702e025 100644 if (*arg) { long int tmp_long; -@@ -1203,6 +1322,11 @@ static void +@@ -1206,6 +1325,11 @@ static void init_parameters (int number_of_files) { int chars_used_by_number = 0; @@ -2589,7 +1705,7 @@ index 419545c..702e025 100644 lines_per_body = lines_per_page - lines_per_header - lines_per_footer; if (lines_per_body <= 0) -@@ -1240,7 +1364,7 @@ init_parameters (int number_of_files) +@@ -1243,7 +1367,7 @@ init_parameters (int number_of_files) else col_sep_string = column_separator; @@ -2598,7 +1714,7 @@ index 419545c..702e025 100644 use_col_separator = true; } /* It's rather pointless to define a TAB separator with column -@@ -1272,11 +1396,11 @@ init_parameters (int number_of_files) +@@ -1275,11 +1399,11 @@ init_parameters (int number_of_files) + TAB_WIDTH (chars_per_input_tab, chars_per_number); */ /* Estimate chars_per_text without any margin and keep it constant. */ @@ -2612,7 +1728,7 @@ index 419545c..702e025 100644 /* The number is part of the column width unless we are printing files in parallel. */ -@@ -1285,7 +1409,7 @@ init_parameters (int number_of_files) +@@ -1288,7 +1412,7 @@ init_parameters (int number_of_files) } int sep_chars, useful_chars; @@ -2621,7 +1737,7 @@ index 419545c..702e025 100644 sep_chars = INT_MAX; if (ckd_sub (&useful_chars, chars_per_line - chars_used_by_number, sep_chars)) -@@ -1308,7 +1432,7 @@ init_parameters (int number_of_files) +@@ -1311,7 +1435,7 @@ init_parameters (int number_of_files) We've to use 8 as the lower limit, if we use chars_per_default_tab = 8 to expand a tab which is not an input_tab-char. */ free (clump_buff); @@ -2630,7 +1746,7 @@ index 419545c..702e025 100644 } /* Open the necessary files, -@@ -1414,7 +1538,7 @@ init_funcs (void) +@@ -1417,7 +1541,7 @@ init_funcs (void) /* Enlarge p->start_position of first column to use the same form of padding_not_printed with all columns. */ @@ -2639,7 +1755,7 @@ index 419545c..702e025 100644 /* This loop takes care of all but the rightmost column. */ -@@ -1448,7 +1572,7 @@ init_funcs (void) +@@ -1451,7 +1575,7 @@ init_funcs (void) } else { @@ -2648,7 +1764,7 @@ index 419545c..702e025 100644 h_next = h + chars_per_column; } } -@@ -1745,9 +1869,9 @@ static void +@@ -1751,9 +1875,9 @@ static void align_column (COLUMN *p) { padding_not_printed = p->start_position; @@ -2660,9 +1776,9 @@ index 419545c..702e025 100644 padding_not_printed = ANYWHERE; } -@@ -2021,13 +2145,13 @@ store_char (char c) +@@ -2030,13 +2154,13 @@ store_char (char c) /* May be too generous. */ - buff = X2REALLOC (buff, &buff_allocated); + buff = xpalloc (buff, &buff_allocated, 1, -1, sizeof *buff); } - buff[buff_current++] = c; + buff[buff_current++] = (unsigned char) c; @@ -2676,7 +1792,7 @@ index 419545c..702e025 100644 char *s; int num_width; -@@ -2044,22 +2168,24 @@ add_line_number (COLUMN *p) +@@ -2053,22 +2177,24 @@ add_line_number (COLUMN *p) /* Tabification is assumed for multiple columns, also for n-separators, but 'default n-separator = TAB' hasn't been given priority over equal column_width also specified by POSIX. */ @@ -2705,7 +1821,7 @@ index 419545c..702e025 100644 output_position = POS_AFTER_TAB (chars_per_output_tab, output_position); } -@@ -2218,7 +2344,7 @@ print_white_space (void) +@@ -2227,7 +2353,7 @@ print_white_space (void) while (goal - h_old > 1 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) { @@ -2714,7 +1830,7 @@ index 419545c..702e025 100644 h_old = h_new; } while (++h_old <= goal) -@@ -2238,6 +2364,7 @@ print_sep_string (void) +@@ -2247,6 +2373,7 @@ print_sep_string (void) { char const *s = col_sep_string; int l = col_sep_length; @@ -2722,7 +1838,7 @@ index 419545c..702e025 100644 if (separators_not_printed <= 0) { -@@ -2249,6 +2376,7 @@ print_sep_string (void) +@@ -2258,6 +2385,7 @@ print_sep_string (void) { for (; separators_not_printed > 0; --separators_not_printed) { @@ -2730,7 +1846,7 @@ index 419545c..702e025 100644 while (l-- > 0) { /* 3 types of sep_strings: spaces only, spaces and chars, -@@ -2262,12 +2390,15 @@ print_sep_string (void) +@@ -2271,12 +2399,15 @@ print_sep_string (void) } else { @@ -2747,7 +1863,7 @@ index 419545c..702e025 100644 /* sep_string ends with some spaces */ if (spaces_not_printed > 0) print_white_space (); -@@ -2295,7 +2426,7 @@ print_clump (COLUMN *p, int n, char *clump) +@@ -2307,7 +2438,7 @@ print_clump (COLUMN *p, int n, char *clump) required number of tabs and spaces. */ static void @@ -2756,7 +1872,7 @@ index 419545c..702e025 100644 { if (tabify_output) { -@@ -2319,6 +2450,74 @@ print_char (char c) +@@ -2331,6 +2462,74 @@ print_char (char c) putchar (c); } @@ -2831,7 +1947,7 @@ index 419545c..702e025 100644 /* Skip to page PAGE before printing. PAGE may be larger than total number of pages. */ -@@ -2496,9 +2695,9 @@ read_line (COLUMN *p) +@@ -2507,9 +2706,9 @@ read_line (COLUMN *p) align_empty_cols = false; } @@ -2843,7 +1959,7 @@ index 419545c..702e025 100644 padding_not_printed = ANYWHERE; } -@@ -2567,7 +2766,7 @@ print_stored (COLUMN *p) +@@ -2578,7 +2777,7 @@ print_stored (COLUMN *p) COLUMN *q; int line = p->current_line++; @@ -2852,7 +1968,7 @@ index 419545c..702e025 100644 /* FIXME UMR: Uninitialized memory read: * This is occurring while in: -@@ -2579,7 +2778,7 @@ print_stored (COLUMN *p) +@@ -2590,7 +2789,7 @@ print_stored (COLUMN *p) xmalloc [xmalloc.c:94] init_store_cols [pr.c:1648] */ @@ -2861,7 +1977,7 @@ index 419545c..702e025 100644 pad_vertically = true; -@@ -2599,9 +2798,9 @@ print_stored (COLUMN *p) +@@ -2610,9 +2809,9 @@ print_stored (COLUMN *p) } } @@ -2873,7 +1989,7 @@ index 419545c..702e025 100644 padding_not_printed = ANYWHERE; } -@@ -2614,8 +2813,8 @@ print_stored (COLUMN *p) +@@ -2625,8 +2824,8 @@ print_stored (COLUMN *p) if (spaces_not_printed == 0) { output_position = p->start_position + end_vector[line]; @@ -2884,7 +2000,7 @@ index 419545c..702e025 100644 } return true; -@@ -2634,7 +2833,7 @@ print_stored (COLUMN *p) +@@ -2645,7 +2844,7 @@ print_stored (COLUMN *p) number of characters is 1.) */ static int @@ -2893,7 +2009,7 @@ index 419545c..702e025 100644 { unsigned char uc = c; char *s = clump_buff; -@@ -2644,10 +2843,10 @@ char_to_clump (char c) +@@ -2655,10 +2854,10 @@ char_to_clump (char c) int chars; int chars_per_c = 8; @@ -2906,7 +2022,7 @@ index 419545c..702e025 100644 { width = TAB_WIDTH (chars_per_c, input_position); -@@ -2728,6 +2927,164 @@ char_to_clump (char c) +@@ -2739,6 +2938,164 @@ char_to_clump (char c) return chars; } @@ -3072,13 +2188,14 @@ index 419545c..702e025 100644 looking for more options and printing the next batch of files. diff --git a/src/sort.c b/src/sort.c -index e779845..1f5c337 100644 +index 05d00cc..eb51f20 100644 --- a/src/sort.c +++ b/src/sort.c -@@ -28,6 +28,14 @@ - #include +@@ -30,6 +30,15 @@ #include #include + #include ++ +#if HAVE_WCHAR_H +# include +#endif @@ -3090,7 +2207,7 @@ index e779845..1f5c337 100644 #include "system.h" #include "argmatch.h" #include "assure.h" -@@ -157,14 +165,39 @@ static int thousands_sep; +@@ -160,14 +169,39 @@ static int thousands_sep; /* We currently ignore multi-byte grouping chars. */ static bool thousands_sep_ignored; @@ -3131,7 +2248,7 @@ index e779845..1f5c337 100644 /* The kind of blanks for '-b' to skip in various options. */ enum blanktype { bl_start, bl_end, bl_both }; -@@ -341,13 +374,11 @@ static bool stable; +@@ -344,13 +378,11 @@ static bool stable; /* An int value outside char range. */ enum { NON_CHAR = CHAR_MAX + 1 }; @@ -3148,9 +2265,9 @@ index e779845..1f5c337 100644 /* Flag to remove consecutive duplicate lines from the output. Only the last of a sequence of equal lines will be output. */ -@@ -803,6 +834,46 @@ reap_all (void) - reap (-1); - } +@@ -386,6 +418,46 @@ struct tempnode + static struct tempnode *volatile temphead; + static struct tempnode *volatile *temptail = &temphead; +/* Function pointers. */ +static void @@ -3195,7 +2312,7 @@ index e779845..1f5c337 100644 /* Clean up any remaining temporary files. */ static void -@@ -1270,7 +1341,7 @@ zaptemp (char const *name) +@@ -1343,7 +1415,7 @@ zaptemp (char const *name) free (node); } @@ -3204,7 +2321,7 @@ index e779845..1f5c337 100644 static int struct_month_cmp (void const *m1, void const *m2) -@@ -1285,7 +1356,7 @@ struct_month_cmp (void const *m1, void const *m2) +@@ -1358,7 +1430,7 @@ struct_month_cmp (void const *m1, void const *m2) /* Initialize the character class tables. */ static void @@ -3213,7 +2330,7 @@ index e779845..1f5c337 100644 { size_t i; -@@ -1297,7 +1368,7 @@ inittables (void) +@@ -1370,7 +1442,7 @@ inittables (void) fold_toupper[i] = toupper (i); } @@ -3222,7 +2339,7 @@ index e779845..1f5c337 100644 /* If we're not in the "C" locale, read different names for months. */ if (hard_LC_TIME) { -@@ -1379,6 +1450,84 @@ specify_nmerge (int oi, char c, char const *s) +@@ -1450,6 +1522,84 @@ specify_nmerge (int oi, char c, char const *s) xstrtol_fatal (e, oi, c, long_options, s); } @@ -3307,7 +2424,7 @@ index e779845..1f5c337 100644 /* Specify the amount of main memory to use when sorting. */ static void specify_sort_size (int oi, char c, char const *s) -@@ -1610,7 +1759,7 @@ buffer_linelim (struct buffer const *buf) +@@ -1676,7 +1826,7 @@ buffer_linelim (struct buffer const *buf) by KEY in LINE. */ static char * @@ -3316,7 +2433,7 @@ index e779845..1f5c337 100644 { char *ptr = line->text, *lim = ptr + line->length - 1; size_t sword = key->sword; -@@ -1619,10 +1768,10 @@ begfield (struct line const *line, struct keyfield const *key) +@@ -1685,10 +1835,10 @@ begfield (struct line const *line, struct keyfield const *key) /* The leading field separator itself is included in a field when -t is absent. */ @@ -3329,7 +2446,7 @@ index e779845..1f5c337 100644 ++ptr; if (ptr < lim) ++ptr; -@@ -1648,12 +1797,71 @@ begfield (struct line const *line, struct keyfield const *key) +@@ -1718,12 +1868,71 @@ begfield (struct line const *line, struct keyfield const *key) return ptr; } @@ -3402,7 +2519,7 @@ index e779845..1f5c337 100644 { char *ptr = line->text, *lim = ptr + line->length - 1; size_t eword = key->eword, echar = key->echar; -@@ -1668,10 +1876,10 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1738,10 +1947,10 @@ limfield (struct line const *line, struct keyfield const *key) 'beginning' is the first character following the delimiting TAB. Otherwise, leave PTR pointing at the first 'blank' character after the preceding field. */ @@ -3415,7 +2532,7 @@ index e779845..1f5c337 100644 ++ptr; if (ptr < lim && (eword || echar)) ++ptr; -@@ -1717,10 +1925,10 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1787,10 +1996,10 @@ limfield (struct line const *line, struct keyfield const *key) */ /* Make LIM point to the end of (one byte past) the current field. */ @@ -3428,7 +2545,7 @@ index e779845..1f5c337 100644 if (newlim) lim = newlim; } -@@ -1751,6 +1959,130 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1825,6 +2034,130 @@ limfield (struct line const *line, struct keyfield const *key) return ptr; } @@ -3559,7 +2676,7 @@ index e779845..1f5c337 100644 /* Fill BUF reading from FP, moving buf->left bytes from the end of buf->buf to the beginning first. If EOF is reached and the file wasn't terminated by a newline, supply one. Set up BUF's line -@@ -1837,8 +2169,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) +@@ -1911,8 +2244,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) else { if (key->skipsblanks) @@ -3584,7 +2701,7 @@ index e779845..1f5c337 100644 line->keybeg = line_start; } } -@@ -1976,12 +2322,10 @@ find_unit_order (char const *number) +@@ -2050,12 +2397,10 @@ find_unit_order (char const *number) ATTRIBUTE_PURE static int @@ -3600,7 +2717,7 @@ index e779845..1f5c337 100644 int diff = find_unit_order (a) - find_unit_order (b); return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep)); -@@ -1993,7 +2337,7 @@ human_numcompare (char const *a, char const *b) +@@ -2067,7 +2412,7 @@ human_numcompare (char const *a, char const *b) ATTRIBUTE_PURE static int @@ -3609,7 +2726,7 @@ index e779845..1f5c337 100644 { while (blanks[to_uchar (*a)]) a++; -@@ -2003,6 +2347,25 @@ numcompare (char const *a, char const *b) +@@ -2077,6 +2422,25 @@ numcompare (char const *a, char const *b) return strnumcmp (a, b, decimal_point, thousands_sep); } @@ -3635,7 +2752,7 @@ index e779845..1f5c337 100644 static int nan_compare (long double a, long double b) { -@@ -2044,7 +2407,7 @@ general_numcompare (char const *sa, char const *sb) +@@ -2118,7 +2482,7 @@ general_numcompare (char const *sa, char const *sb) Return 0 if the name in S is not recognized. */ static int @@ -3644,7 +2761,7 @@ index e779845..1f5c337 100644 { size_t lo = 0; size_t hi = MONTHS_PER_YEAR; -@@ -2320,15 +2683,14 @@ debug_key (struct line const *line, struct keyfield const *key) +@@ -2457,15 +2821,14 @@ debug_key (struct line const *line, struct keyfield const *key) char saved = *lim; *lim = '\0'; @@ -3662,7 +2779,7 @@ index e779845..1f5c337 100644 else if (key->general_numeric) ignore_value (strtold (beg, &tighter_lim)); else if (key->numeric || key->human_numeric) -@@ -2474,7 +2836,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2611,7 +2974,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) /* Warn about significant leading blanks. */ bool implicit_skip = key_numeric (key) || key->month; bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ @@ -3671,7 +2788,7 @@ index e779845..1f5c337 100644 && ((!key->skipsblanks && !implicit_skip) || (!key->skipsblanks && key->schar) || (!key->skipeblanks && key->echar))) -@@ -2522,9 +2884,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2659,9 +3022,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) bool number_locale_warned = false; if (basic_numeric_field_span) { @@ -3684,7 +2801,7 @@ index e779845..1f5c337 100644 { error (0, 0, _("field separator %s is treated as a " -@@ -2535,9 +2897,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2672,9 +3035,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) } if (basic_numeric_field_span || general_numeric_field_span) { @@ -3697,7 +2814,7 @@ index e779845..1f5c337 100644 { error (0, 0, _("field separator %s is treated as a " -@@ -2545,19 +2907,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2682,19 +3045,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) quote (((char []) {decimal_point, 0}))); number_locale_warned = true; } @@ -3721,17 +2838,8 @@ index e779845..1f5c337 100644 } } -@@ -2568,7 +2930,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) - { - error (0, 0, - _("%snumbers use %s as a decimal point in this locale"), -- tab == decimal_point ? "" : _("note "), -+ (tab_length && tab[0] == decimal_point) ? "" : _("note "), - quote (((char []) {decimal_point, 0}))); - - } -@@ -2610,11 +2972,87 @@ diff_reversed (int diff, bool reversed) - return reversed ? (diff < 0) - (diff > 0) : diff; +@@ -2746,11 +3109,87 @@ diff_reversed (int diff, bool reversed) + return reversed ? _GL_CMP (0, diff) : diff; } +#if HAVE_MBRTOWC @@ -3819,7 +2927,7 @@ index e779845..1f5c337 100644 { struct keyfield *key = keylist; -@@ -2695,7 +3133,7 @@ keycompare (struct line const *a, struct line const *b) +@@ -2831,7 +3270,7 @@ keycompare (struct line const *a, struct line const *b) else if (key->human_numeric) diff = human_numcompare (ta, tb); else if (key->month) @@ -3828,7 +2936,7 @@ index e779845..1f5c337 100644 else if (key->random) diff = compare_random (ta, tlena, tb, tlenb); else if (key->version) -@@ -2805,6 +3243,211 @@ keycompare (struct line const *a, struct line const *b) +@@ -2941,6 +3380,211 @@ keycompare (struct line const *a, struct line const *b) return diff_reversed (diff, key->reverse); } @@ -4040,7 +3148,7 @@ index e779845..1f5c337 100644 /* Compare two lines A and B, returning negative, zero, or positive depending on whether A compares less than, equal to, or greater than B. */ -@@ -2832,7 +3475,7 @@ compare (struct line const *a, struct line const *b) +@@ -2968,7 +3612,7 @@ compare (struct line const *a, struct line const *b) diff = - NONZERO (blen); else if (blen == 0) diff = 1; @@ -4049,7 +3157,7 @@ index e779845..1f5c337 100644 { /* xmemcoll0 is a performance enhancement as it will not unconditionally write '\0' after the -@@ -4220,6 +4863,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) +@@ -4340,6 +4984,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) break; case 'f': key->translate = fold_toupper; @@ -4057,7 +3165,7 @@ index e779845..1f5c337 100644 break; case 'g': key->general_numeric = true; -@@ -4299,7 +4943,7 @@ main (int argc, char **argv) +@@ -4419,7 +5064,7 @@ main (int argc, char **argv) initialize_exit_failure (SORT_FAILURE); hard_LC_COLLATE = hard_locale (LC_COLLATE); @@ -4066,7 +3174,7 @@ index e779845..1f5c337 100644 hard_LC_TIME = hard_locale (LC_TIME); #endif -@@ -4322,6 +4966,29 @@ main (int argc, char **argv) +@@ -4442,6 +5087,29 @@ main (int argc, char **argv) thousands_sep = NON_CHAR; } @@ -4096,7 +3204,7 @@ index e779845..1f5c337 100644 have_read_stdin = false; inittables (); -@@ -4592,13 +5259,34 @@ main (int argc, char **argv) +@@ -4717,13 +5385,34 @@ main (int argc, char **argv) case 't': { @@ -4129,13 +3237,13 @@ index e779845..1f5c337 100644 +#endif + if (newtab_length == 1 && optarg[1]) { - if (STREQ (optarg, "\\0")) + if (streq (optarg, "\\0")) - newtab = '\0'; + newtab[0] = '\0'; else { /* Provoke with 'sort -txx'. Complain about -@@ -4609,9 +5297,11 @@ main (int argc, char **argv) +@@ -4734,9 +5423,11 @@ main (int argc, char **argv) quote (optarg)); } } @@ -4150,10 +3258,10 @@ index e779845..1f5c337 100644 break; diff --git a/src/unexpand.c b/src/unexpand.c -index 5a2283f..f24ef76 100644 +index ff234d7..7c36ef6 100644 --- a/src/unexpand.c +++ b/src/unexpand.c -@@ -38,6 +38,9 @@ +@@ -39,6 +39,9 @@ #include #include #include @@ -4163,7 +3271,7 @@ index 5a2283f..f24ef76 100644 #include "system.h" #include "expand-common.h" -@@ -104,24 +107,47 @@ unexpand (void) +@@ -105,24 +108,46 @@ unexpand (void) { /* Input stream. */ FILE *fp = next_file (nullptr); @@ -4183,28 +3291,27 @@ index 5a2283f..f24ef76 100644 if (!fp) return; + + mbf_init (mbf, fp); + found_bom=check_bom(fp,&mbf); + + if (using_utf_locale == false && found_bom == true) -+ { -+ /*try using some predefined locale */ - -+ if (set_utf_locale () != 0) + { -+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale")); ++ /* Try using some predefined locale */ ++ if (set_utf_locale () != 0) ++ { ++ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale")); ++ } + } -+ } ++ /* The worst case is a non-blank character, then one blank, then a tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so allocate MAX_COLUMN_WIDTH bytes to store the blanks. */ -- pending_blank = xmalloc (max_column_width); -+ pending_blank = xmalloc (max_column_width * sizeof (mbf_char_t)); +- pending_blank = ximalloc (max_column_width); ++ pending_blank = ximalloc (max_column_width * sizeof (mbf_char_t)); + + if (found_bom == true) -+ { + print_bom(); -+ } while (true) { @@ -4214,7 +3321,7 @@ index 5a2283f..f24ef76 100644 /* If true, perform translations. */ bool convert = true; -@@ -155,12 +181,44 @@ unexpand (void) +@@ -156,12 +181,44 @@ unexpand (void) do { @@ -4262,10 +3369,10 @@ index 5a2283f..f24ef76 100644 if (blank) { -@@ -177,16 +235,16 @@ unexpand (void) - if (next_tab_column < column) - error (EXIT_FAILURE, 0, _("input line is too long")); +@@ -175,16 +232,16 @@ unexpand (void) + if (convert) + { - if (c == '\t') + if (mb_iseq (c, '\t')) { @@ -4282,7 +3389,7 @@ index 5a2283f..f24ef76 100644 if (! (prev_blank && column == next_tab_column)) { -@@ -194,13 +252,14 @@ unexpand (void) +@@ -192,13 +249,14 @@ unexpand (void) will be replaced by tabs. */ if (column == next_tab_column) one_blank_before_tab_stop = true; @@ -4299,7 +3406,7 @@ index 5a2283f..f24ef76 100644 } /* Discard pending blanks, unless it was a single -@@ -208,7 +267,7 @@ unexpand (void) +@@ -206,7 +264,7 @@ unexpand (void) pending = one_blank_before_tab_stop; } } @@ -4308,7 +3415,7 @@ index 5a2283f..f24ef76 100644 { /* Go back one column, and force recalculation of the next tab stop. */ -@@ -218,16 +277,20 @@ unexpand (void) +@@ -216,16 +274,20 @@ unexpand (void) } else { @@ -4333,7 +3440,7 @@ index 5a2283f..f24ef76 100644 write_error (); pending = 0; one_blank_before_tab_stop = false; -@@ -237,16 +300,17 @@ unexpand (void) +@@ -235,16 +297,17 @@ unexpand (void) convert &= convert_entire_line || blank; } @@ -4354,173 +3461,8 @@ index 5a2283f..f24ef76 100644 } } -diff --git a/src/uniq.c b/src/uniq.c -index fab04de..2e96dcb 100644 ---- a/src/uniq.c -+++ b/src/uniq.c -@@ -21,6 +21,17 @@ - #include - #include - -+/* Get mbstate_t, mbrtowc(). */ -+#if HAVE_WCHAR_H -+# include -+#endif -+ -+/* Get isw* functions. */ -+#if HAVE_WCTYPE_H -+# include -+#endif -+#include -+ - #include "system.h" - #include "argmatch.h" - #include "linebuffer.h" -@@ -31,6 +42,18 @@ - #include "memcasecmp.h" - #include "quote.h" - -+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC -+ installation; work around this configuration error. */ -+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 -+# define MB_LEN_MAX 16 -+#endif -+ -+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ -+#if HAVE_MBRTOWC && defined mbstate_t -+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) -+#endif -+ -+ - /* The official name of this program (e.g., no 'g' prefix). */ - #define PROGRAM_NAME "uniq" - -@@ -137,6 +160,10 @@ enum - GROUP_OPTION = CHAR_MAX + 1 - }; - -+/* Function pointers. */ -+static char * -+(*find_field) (struct linebuffer *line); -+ - static struct option const longopts[] = - { - {"count", no_argument, nullptr, 'c'}, -@@ -252,7 +279,7 @@ size_opt (char const *opt, char const *msgid) - - ATTRIBUTE_PURE - static char * --find_field (struct linebuffer const *line) -+find_field_uni (struct linebuffer *line) - { - size_t count; - char const *lp = line->buffer; -@@ -272,6 +299,83 @@ find_field (struct linebuffer const *line) - return line->buffer + i; - } - -+#if HAVE_MBRTOWC -+ -+# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ -+ do \ -+ { \ -+ mbstate_t state_bak; \ -+ \ -+ CONVFAIL = 0; \ -+ state_bak = *STATEP; \ -+ \ -+ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ -+ \ -+ switch (MBLENGTH) \ -+ { \ -+ case (size_t)-2: \ -+ case (size_t)-1: \ -+ *STATEP = state_bak; \ -+ CONVFAIL++; \ -+ /* Fall through */ \ -+ case 0: \ -+ MBLENGTH = 1; \ -+ } \ -+ } \ -+ while (0) -+ -+static char * -+find_field_multi (struct linebuffer *line) -+{ -+ size_t count; -+ char *lp = line->buffer; -+ size_t size = line->length - 1; -+ size_t pos; -+ size_t mblength; -+ wchar_t wc; -+ mbstate_t *statep; -+ int convfail = 0; -+ -+ pos = 0; -+ statep = &(line->state); -+ -+ /* skip fields. */ -+ for (count = 0; count < skip_fields && pos < size; count++) -+ { -+ while (pos < size) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ -+ if (convfail || !(iswblank (wc) || wc == '\n')) -+ { -+ pos += mblength; -+ break; -+ } -+ pos += mblength; -+ } -+ -+ while (pos < size) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ -+ if (!convfail && (iswblank (wc) || wc == '\n')) -+ break; -+ -+ pos += mblength; -+ } -+ } -+ -+ /* skip fields. */ -+ for (count = 0; count < skip_chars && pos < size; count++) -+ { -+ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); -+ pos += mblength; -+ } -+ -+ return lp + pos; -+} -+#endif -+ - /* Return false if two strings OLD and NEW match, true if not. - OLD and NEW point not to the beginnings of the lines - but rather to the beginnings of the fields to compare. -@@ -495,6 +599,19 @@ main (int argc, char **argv) - - atexit (close_stdout); - -+#if HAVE_MBRTOWC -+ if (MB_CUR_MAX > 1) -+ { -+ find_field = find_field_multi; -+ } -+ else -+#endif -+ { -+ find_field = find_field_uni; -+ } -+ -+ -+ - skip_chars = 0; - skip_fields = 0; - check_chars = SIZE_MAX; diff --git a/tests/Coreutils.pm b/tests/Coreutils.pm -index f147401..3ce5da9 100644 +index b55fb9d..ac80f49 100644 --- a/tests/Coreutils.pm +++ b/tests/Coreutils.pm @@ -269,6 +269,9 @@ sub run_tests ($$$$$) @@ -4534,7 +3476,7 @@ index f147401..3ce5da9 100644 { warn "$program_name: $test_name: test name is too long (> $max)\n"; diff --git a/tests/expand/mb.sh b/tests/expand/mb.sh -new file mode 100755 +new file mode 100644 index 0000000..dd6007c --- /dev/null +++ b/tests/expand/mb.sh @@ -4723,7 +3665,7 @@ index 0000000..dd6007c + +exit $fail diff --git a/tests/i18n/sort.sh b/tests/i18n/sort.sh -new file mode 100755 +new file mode 100644 index 0000000..26c95de --- /dev/null +++ b/tests/i18n/sort.sh @@ -4758,11 +3700,11 @@ index 0000000..26c95de + +Exit $fail diff --git a/tests/local.mk b/tests/local.mk -index b74a4a2..fe6e557 100644 +index 53fc53e..0148422 100644 --- a/tests/local.mk +++ b/tests/local.mk -@@ -384,6 +384,8 @@ all_tests = \ - tests/sort/sort-discrim.sh \ +@@ -412,6 +412,8 @@ all_tests = \ + tests/sort/sort-field-limit.sh \ tests/sort/sort-files0-from.pl \ tests/sort/sort-float.sh \ + tests/misc/sort-mb-tests.sh \ @@ -4770,7 +3712,7 @@ index b74a4a2..fe6e557 100644 tests/sort/sort-h-thousands-sep.sh \ tests/sort/sort-merge.pl \ tests/sort/sort-merge-fdlimit.sh \ -@@ -585,6 +587,7 @@ all_tests = \ +@@ -618,6 +620,7 @@ all_tests = \ tests/du/threshold.sh \ tests/du/trailing-slash.sh \ tests/du/two-args.sh \ @@ -4778,7 +3720,7 @@ index b74a4a2..fe6e557 100644 tests/id/gnu-zero-uids.sh \ tests/id/no-context.sh \ tests/id/context.sh \ -@@ -738,6 +741,7 @@ all_tests = \ +@@ -774,6 +777,7 @@ all_tests = \ tests/touch/read-only.sh \ tests/touch/relative.sh \ tests/touch/trailing-slash.sh \ @@ -4787,7 +3729,7 @@ index b74a4a2..fe6e557 100644 # See tests/factor/create-test.sh. diff --git a/tests/misc/expand.pl b/tests/misc/expand.pl -index 06261ac..7dd813e 100755 +index 4b07210..68b9ea1 100755 --- a/tests/misc/expand.pl +++ b/tests/misc/expand.pl @@ -27,6 +27,15 @@ my $prog = 'expand'; @@ -4853,151 +3795,8 @@ index 06261ac..7dd813e 100755 my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; -diff --git a/tests/misc/fold.pl b/tests/misc/fold.pl -index a94072f..136a82e 100755 ---- a/tests/misc/fold.pl -+++ b/tests/misc/fold.pl -@@ -20,9 +20,18 @@ use strict; - - (my $program_name = $0) =~ s|.*/||; - -+my $prog = 'fold'; -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; - -+# uncommented to enable multibyte paths -+my $mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ - my @Tests = - ( - ['s1', '-w2 -s', {IN=>"a\t"}, {OUT=>"a\n\t"}], -@@ -31,9 +40,48 @@ my @Tests = - ['s4', '-w4 -s', {IN=>"abc ef\n"}, {OUT=>"abc \nef\n"}], - ); - -+# Add _POSIX2_VERSION=199209 to the environment of each test -+# that uses an old-style option like +1. -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether fold is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ -+@Tests = triple_test \@Tests; -+ -+# Remember that triple_test creates from each test with exactly one "IN" -+# file two more tests (.p and .r suffix on name) corresponding to reading -+# input from a file and from a pipe. The pipe-reading test would fail -+# due to a race condition about 1 in 20 times. -+# Remove the IN_PIPE version of the "output-is-input" test above. -+# The others aren't susceptible because they have three inputs each. -+@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - --my $prog = 'fold'; - my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); - exit $fail; -diff --git a/tests/misc/join.pl b/tests/misc/join.pl -index 2ca8567..1d01a3d 100755 ---- a/tests/misc/join.pl -+++ b/tests/misc/join.pl -@@ -25,6 +25,15 @@ my $limits = getlimits (); - - my $prog = 'join'; - -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ -+my $mb_locale; -+#Comment out next line to disable multibyte tests -+$mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ - my $delim = chr 0247; - sub t_subst ($) - { -@@ -333,8 +342,49 @@ foreach my $t (@tv) - push @Tests, $new_ent; - } - -+# Add _POSIX2_VERSION=199209 to the environment of each test -+# that uses an old-style option like +1. -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether join is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ #Adjust the output some error messages including test_name for mb -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR}} -+ (@new_t)) -+ { -+ my $sub2 = {ERR_SUBST => "s/$test_name-mb/$test_name/"}; -+ push @new_t, $sub2; -+ push @$t, $sub2; -+ } -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ - @Tests = triple_test \@Tests; - -+#skip invalid-j-mb test, it is failing because of the format -+@Tests = grep {$_->[0] ne 'invalid-j-mb'} @Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - diff --git a/tests/misc/sort-mb-tests.sh b/tests/misc/sort-mb-tests.sh -new file mode 100755 +new file mode 100644 index 0000000..11836ba --- /dev/null +++ b/tests/misc/sort-mb-tests.sh @@ -5048,7 +3847,7 @@ index 0000000..11836ba + +Exit $fail diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl -index d78a1bc..2b9137d 100755 +index bb7469c..c1dec95 100755 --- a/tests/misc/unexpand.pl +++ b/tests/misc/unexpand.pl @@ -27,6 +27,14 @@ my $limits = getlimits (); @@ -5066,7 +3865,7 @@ index d78a1bc..2b9137d 100755 my @Tests = ( ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}], -@@ -128,6 +136,37 @@ my @Tests = +@@ -132,6 +140,37 @@ my @Tests = ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}], ); @@ -5105,7 +3904,7 @@ index d78a1bc..2b9137d 100755 my $verbose = $ENV{VERBOSE}; diff --git a/tests/pr/pr-tests.pl b/tests/pr/pr-tests.pl -index eafc13d..c1eca2a 100755 +index 60e6106..3c64a08 100755 --- a/tests/pr/pr-tests.pl +++ b/tests/pr/pr-tests.pl @@ -24,6 +24,15 @@ use strict; @@ -5174,7 +3973,7 @@ index eafc13d..c1eca2a 100755 my $verbose = $ENV{VERBOSE}; diff --git a/tests/sort/sort-merge.pl b/tests/sort/sort-merge.pl -index bd439ef..2ccdf87 100755 +index a3204d3..40942a5 100755 --- a/tests/sort/sort-merge.pl +++ b/tests/sort/sort-merge.pl @@ -26,6 +26,15 @@ my $prog = 'sort'; @@ -5234,7 +4033,7 @@ index bd439ef..2ccdf87 100755 my $verbose = $ENV{VERBOSE}; diff --git a/tests/sort/sort.pl b/tests/sort/sort.pl -index 46f1d7a..bb38f5b 100755 +index 5fa9d52..a66952a 100755 --- a/tests/sort/sort.pl +++ b/tests/sort/sort.pl @@ -24,10 +24,15 @@ my $prog = 'sort'; @@ -5254,7 +4053,7 @@ index 46f1d7a..bb38f5b 100755 # Since each test is run with a file name and with redirected stdin, # the name in the diagnostic is either the file name or "-". # Normalize each diagnostic to use '-'. -@@ -423,6 +428,38 @@ foreach my $t (@Tests) +@@ -428,6 +433,38 @@ foreach my $t (@Tests) } } @@ -5293,7 +4092,7 @@ index 46f1d7a..bb38f5b 100755 @Tests = triple_test \@Tests; # Remember that triple_test creates from each test with exactly one "IN" -@@ -432,6 +469,7 @@ foreach my $t (@Tests) +@@ -437,6 +474,7 @@ foreach my $t (@Tests) # Remove the IN_PIPE version of the "output-is-input" test above. # The others aren't susceptible because they have three inputs each. @Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; @@ -5302,7 +4101,7 @@ index 46f1d7a..bb38f5b 100755 my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh -new file mode 100755 +new file mode 100644 index 0000000..8a82d74 --- /dev/null +++ b/tests/unexpand/mb.sh @@ -5479,82 +4278,6 @@ index 0000000..8a82d74 + +LC_ALL=C unexpand in in > out || fail=1 +compare exp out > /dev/null 2>&1 || fail=1 -diff --git a/tests/uniq/uniq.pl b/tests/uniq/uniq.pl -index a6354dc..e43cd6e 100755 ---- a/tests/uniq/uniq.pl -+++ b/tests/uniq/uniq.pl -@@ -23,9 +23,17 @@ my $limits = getlimits (); - my $prog = 'uniq'; - my $try = "Try '$prog --help' for more information.\n"; - -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - # Turn off localization of executable's output. - @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; - -+my $mb_locale; -+#Comment out next line to disable multibyte tests -+$mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ - # When possible, create a "-z"-testing variant of each test. - sub add_z_variants($) - { -@@ -262,6 +270,53 @@ foreach my $t (@Tests) - and push @$t, {ENV=>'_POSIX2_VERSION=199209'}; - } - -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether uniq is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ # In test #145, replace the each ‘...’ by '...'. -+ if ($test_name =~ "145") -+ { -+ my $sub = { ERR_SUBST => "s/‘([^’]+)’/'\$1'/g"}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ next if ( $test_name =~ "schar" -+ or $test_name =~ "^obs-plus" -+ or $test_name =~ "119"); -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ -+# Remember that triple_test creates from each test with exactly one "IN" -+# file two more tests (.p and .r suffix on name) corresponding to reading -+# input from a file and from a pipe. The pipe-reading test would fail -+# due to a race condition about 1 in 20 times. -+# Remove the IN_PIPE version of the "output-is-input" test above. -+# The others aren't susceptible because they have three inputs each. -+ -+@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+ - @Tests = add_z_variants \@Tests; - @Tests = triple_test \@Tests; - -- -2.43.0 +2.52.0 diff --git a/coreutils-python3.patch b/coreutils-python3.patch new file mode 100644 index 0000000..447fdbc --- /dev/null +++ b/coreutils-python3.patch @@ -0,0 +1,65 @@ +From 8927d505ecb5334f09c48ef98ef1f464f581d0f7 Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Tue, 2 Apr 2024 14:11:26 +0100 +Subject: [PATCH] coreutils-python3.patch + +--- + init.cfg | 4 ++-- + tests/d_type-check | 2 +- + tests/du/move-dir-while-traversing.sh | 6 +++--- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/init.cfg b/init.cfg +index ac05f7b..26d9516 100644 +--- a/init.cfg ++++ b/init.cfg +@@ -601,10 +601,10 @@ seek_data_capable_() + # Skip the current test if "." lacks d_type support. + require_dirent_d_type_() + { +- python < /dev/null \ ++ python3 < /dev/null \ + || skip_ python missing: assuming no d_type support + +- python "$abs_srcdir"/tests/d_type-check \ ++ python3 "$abs_srcdir"/tests/d_type-check \ + || skip_ requires d_type support + } + +diff --git a/tests/d_type-check b/tests/d_type-check +index 1a2f76f..42d3924 100644 +--- a/tests/d_type-check ++++ b/tests/d_type-check +@@ -1,4 +1,4 @@ +-#!/usr/bin/python ++#!/usr/bin/python3 + # Exit 0 if "." and "./tempfile" have useful d_type information, else 1. + # Intended to exit 0 only on Linux/GNU systems. + import os +diff --git a/tests/du/move-dir-while-traversing.sh b/tests/du/move-dir-while-traversing.sh +index adf482b..cf9214a 100755 +--- a/tests/du/move-dir-while-traversing.sh ++++ b/tests/du/move-dir-while-traversing.sh +@@ -21,8 +21,8 @@ print_ver_ du + require_trap_signame_ + + # We use a python-inotify script, so... +-python -m pyinotify -h > /dev/null \ +- || skip_ 'python inotify package not installed' ++python3 -m pyinotify -h > /dev/null \ ++ || skip_ 'python3 inotify package not installed' + + # Move a directory "up" while du is processing its sub-directories. + # While du is processing a hierarchy .../B/C/D/... this script +@@ -33,7 +33,7 @@ python -m pyinotify -h > /dev/null \ + # rename syscall before du finishes processing the subtree under D/. + + cat <<'EOF' > inotify-watch-for-dir-access.py +-#!/usr/bin/env python ++#!/usr/bin/env python3 + import pyinotify as pn + import os,sys + +-- +2.51.0 + diff --git a/coreutils-selinux.patch b/coreutils-selinux.patch deleted file mode 100644 index 66e06e1..0000000 --- a/coreutils-selinux.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 88ba186955add2b230c017749d5622f7a0d62177 Mon Sep 17 00:00:00 2001 -From: rpm-build -Date: Wed, 30 Aug 2023 17:19:58 +0200 -Subject: [PATCH] coreutils-selinux.patch - ---- - src/cp.c | 19 ++++++++++++++++++- - src/install.c | 12 +++++++++++- - 2 files changed, 29 insertions(+), 2 deletions(-) - -diff --git a/src/cp.c b/src/cp.c -index 04a5cbe..7a364e5 100644 ---- a/src/cp.c -+++ b/src/cp.c -@@ -989,7 +989,7 @@ main (int argc, char **argv) - selinux_enabled = (0 < is_selinux_enabled ()); - cp_option_init (&x); - -- while ((c = getopt_long (argc, argv, "abdfHilLnprst:uvxPRS:TZ", -+ while ((c = getopt_long (argc, argv, "abcdfHilLnprst:uvxPRS:TZ", - long_opts, nullptr)) - != -1) - { -@@ -1041,6 +1041,23 @@ main (int argc, char **argv) - copy_contents = true; - break; - -+ case 'c': -+ fprintf (stderr, "%s: warning: option '-c' is deprecated," -+ " please use '--preserve=context' instead\n", argv[0]); -+ if (x.set_security_context) -+ { -+ fprintf (stderr, -+ "%s: cannot force target context and preserve it\n", -+ argv[0]); -+ exit (1); -+ } -+ else if (selinux_enabled) -+ { -+ x.preserve_security_context = true; -+ x.require_preserve_context = true; -+ } -+ break; -+ - case 'd': - x.preserve_links = true; - x.dereference = DEREF_NEVER; -diff --git a/src/install.c b/src/install.c -index 31a48f1..ce9fa2d 100644 ---- a/src/install.c -+++ b/src/install.c -@@ -807,7 +807,7 @@ main (int argc, char **argv) - dir_arg = false; - umask (0); - -- while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pt:TvS:Z", long_options, -+ while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pPt:TvS:Z", long_options, - nullptr)) - != -1) - { -@@ -872,6 +872,9 @@ main (int argc, char **argv) - no_target_directory = true; - break; - -+ case 'P': -+ fprintf (stderr, "%s: warning: option '-P' is deprecated," -+ " please use '--preserve-context' instead\n", argv[0]); - case PRESERVE_CONTEXT_OPTION: - if (! selinux_enabled) - { -@@ -879,6 +882,13 @@ main (int argc, char **argv) - "this kernel is not SELinux-enabled")); - break; - } -+ if (x.set_security_context) -+ { -+ fprintf (stderr, -+ "%s: cannot force target context and preserve it\n", -+ argv[0]); -+ exit (1); -+ } - x.preserve_security_context = true; - use_default_selinux_context = false; - break; --- -2.41.0 - diff --git a/coreutils.spec b/coreutils.spec index f1d68b3..6712263 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,7 +1,7 @@ Summary: A set of basic GNU tools commonly used in shell scripts Name: coreutils -Version: 9.4 -Release: 6%{?dist} +Version: 9.9 +Release: 2%{?dist} # some used parts of gnulib are under various variants of LGPL License: GPL-3.0-or-later AND GFDL-1.3-no-invariants-or-later AND LGPL-2.1-or-later AND LGPL-3.0-or-later Url: https://www.gnu.org/software/coreutils/ @@ -26,24 +26,23 @@ Patch101: coreutils-8.26-selinuxenable.patch # downstream changes to default DIR_COLORS Patch102: coreutils-8.32-DIR_COLORS.patch +# use python3 in tests +Patch103: coreutils-python3.patch + # df --direct Patch104: coreutils-df-direct.patch -# fix crash with --enable-systemd -Patch105: coreutils-9.4-systemd-coredump.patch +# gnulib C23 support +# https://github.com/coreutils/gnulib/commit/df17f4f37ed3ca373d23ad42eae51122bdb96626 +Patch105: coreutils-9.9-gnulib-c23.patch -# fix buffer overflow in split (CVE-2024-0684) -Patch106: coreutils-9.4-CVE-2024-0684.patch - -# fix tail on kernels with 64k pagesize -Patch107: coreutils-9.4-tail-64k-pages.patch +# fix cut test failure on aarch64 rawhide (rhbz#2424302) +# https://github.com/coreutils/coreutils/commit/95044cb5eaea83d02f768feb5ab79fcf5e6ad782 +Patch106: coreutils-9.9-fix-cut-test-aarch64.patch # (sb) lin18nux/lsb compliance - multibyte functionality patch Patch800: coreutils-i18n.patch -# downstream SELinux options deprecated since 2009 -Patch950: coreutils-selinux.patch - Conflicts: filesystem < 3 # To avoid clobbering installs @@ -64,19 +63,34 @@ BuildRequires: libselinux-utils BuildRequires: make BuildRequires: openssl-devel BuildRequires: strace +BuildRequires: systemd-devel BuildRequires: texinfo # For gpg verification of source tarball BuildRequires: gnupg2 # test-only dependencies +BuildRequires: acl +BuildRequires: gdb BuildRequires: perl-interpreter BuildRequires: perl(FileHandle) +BuildRequires: python3 +BuildRequires: tzdata +%ifarch %valgrind_arches +BuildRequires: valgrind +%endif + +%if 0%{?fedora} +BuildRequires: perl(Expect) +BuildRequires: python3-inotify +%endif + %if 23 < 0%{?fedora} || 7 < 0%{?rhel} # needed by i18n test-cases BuildRequires: glibc-langpack-en BuildRequires: glibc-langpack-fr BuildRequires: glibc-langpack-ko +BuildRequires: glibc-langpack-sv %endif Requires: %{name}-common = %{version}-%{release} @@ -111,6 +125,9 @@ packaged as a single multicall binary. # https://bugzilla.redhat.com/show_bug.cgi?id=1107973#c7 Obsoletes: %{name} < 8.24-100 +# Gnulib translations are maintained seprately since coreutils 9.6 (#2393892) +Requires: gnulib-l10n + # info doc refers to "Specifying the Time Zone" from glibc-doc (#959597) Suggests: glibc-doc @@ -145,16 +162,15 @@ sed src/dircolors.hin \ find tests -name '*.sh' -perm 0644 -print -exec chmod 0755 '{}' '+' (echo "<<< done") 2>/dev/null +# FIXME: Force a newer gettext version to workaround `autoreconf -i` errors +# with coreutils 9.6 and bundled gettext 0.19.2 from gettext-common-devel. +sed -i "s/0.19.2/$(rpm -q --queryformat '%%{VERSION}\n' gettext-devel)/" bootstrap.conf configure.ac + autoreconf -fiv %build export CFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing -fpic" -# disable -flto on ppc64le to make test-float pass (#1789115) -%ifarch ppc64le -CFLAGS="$CFLAGS -fno-lto" -%endif - # Upstream suggests to build with -Dlint for static analyzers: # https://lists.gnu.org/archive/html/coreutils/2018-06/msg00110.html # ... and even for production binary RPMs: @@ -210,14 +226,20 @@ for type in separate single; do fi (cd $type && make DESTDIR=$RPM_BUILD_ROOT/$subdir $install) +%if "%{_sbindir}" != "%{_bindir}" # chroot was in /usr/sbin : - mkdir -p $RPM_BUILD_ROOT/$subdir/{%{_bindir},%{_sbindir}} + mkdir -p $RPM_BUILD_ROOT/$subdir/%_sbindir mv $RPM_BUILD_ROOT/$subdir/{%_bindir,%_sbindir}/chroot +%endif # Move multicall variants to *.single. # RemovePathPostfixes will strip that later. if test $type = 'single'; then - for dir in %{_bindir} %{_sbindir} %{_libexecdir}/%{name}; do + for dir in %{_bindir} \ +%if "%{_sbindir}" != "%{_bindir}" +%{_sbindir} \ +%endif +%{_libexecdir}/%{name}; do for bin in $RPM_BUILD_ROOT/%{_libexecdir}/%{name}/$dir/*; do basebin=$(basename $bin) mv $bin $RPM_BUILD_ROOT/$dir/$basebin.single @@ -245,7 +267,9 @@ rm -f $RPM_BUILD_ROOT%{_infodir}/dir %files single %{_bindir}/*.single +%if "%{_sbindir}" != "%{_bindir}" %{_sbindir}/chroot.single +%endif %dir %{_libexecdir}/coreutils %{_libexecdir}/coreutils/*.so.single # duplicate the license because coreutils-common does not need to be installed @@ -262,6 +286,85 @@ rm -f $RPM_BUILD_ROOT%{_infodir}/dir %license COPYING %changelog +* Tue Jan 13 2026 Lukáš Zaoral - 9.9-2 +- fix cut test failure on aarch64 rawhide (rhbz#2424302) + +* Wed Nov 26 2025 Lukáš Zaoral - 9.9-1 +- rebase to latest upstream release (rhbz#2413803) + +* Mon Sep 29 2025 Lukáš Zaoral - 9.8-3 +- require gnulib-l10n for translations of gnulib messages (rhbz#2393892) + +* Thu Sep 25 2025 Lukáš Zaoral - 9.8-2 +- tail: fix tailing larger number of lines in regular files (rhbz#2398008) + +* Wed Sep 24 2025 Lukáš Zaoral - 9.8-1 +- rebase to latest upstream release (rhbz#2397467) +- remove downstream patch for selinux options deprecated since 2009 + +* Wed Jul 23 2025 Fedora Release Engineering - 9.7-5 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_43_Mass_Rebuild + +* Mon Jun 30 2025 Lukáš Zaoral - 9.7-4 +- stty: add support for arbitrary baud rates (rhbz#2375439) + +* Wed May 28 2025 Lukáš Zaoral - 9.7-3 +- sort: fix buffer under-read (CVE-2025-5278) + +* Mon May 19 2025 Lukáš Zaoral - 9.7-2 +- cp/mv: do not fail when copying of trivial NFSv4 ACLs fails (rhbz#2363149) + +* Wed Apr 09 2025 Lukáš Zaoral - 9.7-1 +- rebase to latest upstream release (rhbz#2358624) + +* Tue Feb 25 2025 Lukáš Zaoral - 9.6-2 +- fix 'who -m' with guessed tty names (rhbz#2343998) + +* Mon Jan 20 2025 Lukáš Zaoral - 9.6-1 +- rebase to latest upstream version (rhbz#2338620) +- sync i18n patch with SUSE (Kudos to Berny Völker!) + +* Thu Jan 16 2025 Fedora Release Engineering - 9.5-13 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_42_Mass_Rebuild + +* Sun Jan 12 2025 Zbigniew Jędrzejewski-Szmek - 9.5-12 +- Rebuilt for the bin-sbin merge (2nd attempt) + +* Wed Nov 13 2024 Florian Weimer - 9.5-11 +- Affinity mask handling in nproc for large CPU counts (rhbz#2325167) + +* Fri Sep 27 2024 Lukáš Zaoral - 9.5-10 +- fix fold -b with UTF8 locale (RHEL-60295) + +* Tue Aug 27 2024 Lukáš Zaoral - 9.5-9 +- show web sessions in who output (rhbz#2307847) + +* Wed Aug 21 2024 Lukáš Zaoral - 9.5-8 +- add missing systemd-devel buildrequires + +* Wed Jul 17 2024 Fedora Release Engineering - 9.5-7 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_41_Mass_Rebuild + +* Mon Jul 15 2024 Lukáš Zaoral - 9.5-6 +- Rebuilt for the bin-sbin merge + +* Mon Jul 15 2024 Sohum Mendon - 9.5-5 +- fix incorrect exit status when fold is called with a non-existent file + +* Tue Jul 09 2024 Zbigniew Jędrzejewski-Szmek - 9.5-4 +- Rebuilt for the bin-sbin merge + +* Thu Jul 04 2024 Lukáš Zaoral - 9.5-3 +- do not buildrequire perl(Expect) on ELN + +* Tue Jun 04 2024 Lukáš Zaoral - 9.5-2 +- enable LTO on ppc64le + +* Tue Apr 02 2024 Lukáš Zaoral - 9.5-1 +- rebase to latest upstream version (rhbz#2272063) +- sync i18n patch with SUSE (Kudos to Berny Völker!) +- add some test dependencies to execute additional part of the upstream test-suite + * Mon Jan 29 2024 Lukáš Zaoral - 9.4-6 - fix tail on kernels with 64k page sizes (RHEL-22866) diff --git a/sources b/sources index ac58478..0952ab1 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -SHA512 (coreutils-9.4.tar.xz) = 7c55ee23b685a0462bbbd118b04d25278c902604a0dcf3bf4f8bf81faa0500dee5a7813cba6f586d676c98e520cafd420f16479619305e94ea6798d8437561f5 -SHA512 (coreutils-9.4.tar.xz.sig) = 9674f783f592c4f3e5c708ff31426ac009bf132fd0005019571bf39c8a1627efb5351c6cecc7faecb1eff8fa2970318666593bffc0eda9c750159e174ef42524 +SHA512 (coreutils-9.9.tar.xz.sig) = 0a3dfdfa6b4234e2e1d42142269f959bdf3cf8f6605a50270a27eff84dd22588f182121f7dd3eeb04be45f5109d02690215065b3d3b43882874d0e165a1435d0 +SHA512 (coreutils-9.9.tar.xz) = e7b0e59f7732d2c098ea4934014f470248bd5c4764210e9200a698010a8e3b95bbb26e543f0cd73ed5a4b8e1f8cda932c73f39954d68175e4deaa47526610c65 diff --git a/tests/readlink-cannot-handle-recursive-symlinks/Makefile b/tests/readlink-cannot-handle-recursive-symlinks/Makefile deleted file mode 100644 index 49d37a7..0000000 --- a/tests/readlink-cannot-handle-recursive-symlinks/Makefile +++ /dev/null @@ -1,63 +0,0 @@ -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# -# Makefile of /CoreOS/coreutils/readlink-cannot-handle-recursive-symlink s -# Description: Test for readlink cannot handle recursive symlinks -# Author: Jan Scotka -# -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# -# Copyright (c) 2010 Red Hat, Inc. All rights reserved. -# -# This copyrighted material is made available to anyone wishing -# to use, modify, copy, or redistribute it subject to the terms -# and conditions of the GNU General Public License version 2. -# -# This program is distributed in the hope that it will be -# useful, but WITHOUT ANY WARRANTY; without even the implied -# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -# PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public -# License along with this program; if not, write to the Free -# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. -# -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -export TEST=/CoreOS/coreutils/readlink-cannot-handle-recursive-symlinks -export TESTVERSION=1.0 - -BUILT_FILES= - -FILES=$(METADATA) runtest.sh Makefile PURPOSE - -.PHONY: all install download clean - -run: $(FILES) build - ./runtest.sh - -build: $(BUILT_FILES) - chmod a+x runtest.sh - -clean: - rm -f *~ $(BUILT_FILES) - - -include /usr/share/rhts/lib/rhts-make.include - -$(METADATA): Makefile - @echo "Owner: Jan Scotka " > $(METADATA) - @echo "Name: $(TEST)" >> $(METADATA) - @echo "TestVersion: $(TESTVERSION)" >> $(METADATA) - @echo "Path: $(TEST_DIR)" >> $(METADATA) - @echo "Description: Test for readlink cannot handle recursive symlinks" >> $(METADATA) - @echo "Type: Sanity" >> $(METADATA) - @echo "TestTime: 5m" >> $(METADATA) - @echo "RunFor: coreutils" >> $(METADATA) - @echo "Requires: coreutils" >> $(METADATA) - @echo "Priority: Normal" >> $(METADATA) - @echo "License: GPLv2" >> $(METADATA) - @echo "Confidential: no" >> $(METADATA) - @echo "Destructive: no" >> $(METADATA) - - rhts-lint $(METADATA) diff --git a/tests/readlink-cannot-handle-recursive-symlinks/PURPOSE b/tests/readlink-cannot-handle-recursive-symlinks/PURPOSE deleted file mode 100644 index b9fd740..0000000 --- a/tests/readlink-cannot-handle-recursive-symlinks/PURPOSE +++ /dev/null @@ -1,54 +0,0 @@ -PURPOSE of /CoreOS/coreutils/readlink-cannot-handle-recursive-symlinks -Description: Test for readlink cannot handle recursive symlinks -Author: Jan Scotka -Bug summary: readlink cannot handle recursive symlinks - -Description: - -Description of problem: -The readlink command fails with an error "Too many levels of symbolic links", even if there are only 2 levels. - -The readlink command from RHEL 3 and RHEL 4 and from Fedora 11 all work fine. - -Among other changes between RHEL 4 and RHEL 5, RHEL 5's coreutils added calls to cycle_check() in lib/canonicalize.c, but in upstream canonicalize.c (now in gnulib instead of coreutils), we see the comment: - /* Detect loops. We cannot use the cycle-check module here, - since it's actually possible to encounter the same symlink - more than once in a given traversal. However, encountering - the same symlink,NAME pair twice does indicate a loop. */ - -http://git.savannah.gnu.org/gitweb/?p=gnulib.git;a=blob;f=lib/canonicalize.c;h=4f348398fd69ae516396313d18ac294a4ca3dde3;hb=b653eda3ac4864de205419d9f41eec267cb89eeb#l262 - -The latest canonicalize.c uses seen_triple() instead of cycle_check(). - - -Version-Release number of selected component (if applicable): -coreutils-5.97-19.el5 - -How reproducible: -every time - -Steps to Reproduce: -1. Create a directory with a symlink to itself - mkdir /tmp/dir - cd /tmp/dir - ln -s ../dir dirlink - -2. Run readlink using the 'dirlink' symlink recursively - readlink -v -f dirlink - readlink -v -f dirlink/dirlink - readlink -v -f dirlink/dirlink/dirlink - -Actual results: -The first readlink command on just dirlink succeeds, but the 2nd and 3rd commands fail with - readlink: dirlink/dirlink: Too many levels of symbolic links - -Expected results: -$ readlink -v -f dirlink -/tmp/dir -$ readlink -v -f dirlink/dirlink -/tmp/dir -$ readlink -v -f dirlink/dirlink/dirlink -/tmp/dir - - -Additional info: diff --git a/tests/readlink-cannot-handle-recursive-symlinks/runtest.sh b/tests/readlink-cannot-handle-recursive-symlinks/runtest.sh deleted file mode 100755 index 6ee251f..0000000 --- a/tests/readlink-cannot-handle-recursive-symlinks/runtest.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash -# vim: dict=/usr/share/beakerlib/dictionary.vim cpt=.,w,b,u,t,i,k -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# -# runtest.sh of /CoreOS/coreutils/readlink-cannot-handle-recursive-symlinks -# Description: Test for readlink cannot handle recursive symlinks -# Author: Jan Scotka -# -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# -# Copyright (c) 2010 Red Hat, Inc. All rights reserved. -# -# This copyrighted material is made available to anyone wishing -# to use, modify, copy, or redistribute it subject to the terms -# and conditions of the GNU General Public License version 2. -# -# This program is distributed in the hope that it will be -# useful, but WITHOUT ANY WARRANTY; without even the implied -# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -# PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public -# License along with this program; if not, write to the Free -# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301, USA. -# -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -# Include rhts environment -. /usr/bin/rhts-environment.sh -. /usr/lib/beakerlib/beakerlib.sh - -PACKAGE="coreutils" - -rlJournalStart - rlPhaseStartSetup - rlAssertRpm $PACKAGE - rlRun "TmpDir=\`mktemp -d\`" 0 "Creating tmp directory" - rlRun "pushd $TmpDir" - rlRun "mkdir link" 0 "Creating test directory" - rlRun "cd link" 0 "cd to this dir" - rlRun "ln -s ../link link" 0 "creating symlink to ../link -> link" - rlPhaseEnd - - rlPhaseStartTest - rlLog "Test of readlink up to 20 iteration" - export TMPVAR="link" - for foo in `seq 20` - do echo $TMPVAR - rlRun "readlink -v -f $TMPVAR" 0 "readlink of $TMPVAR" - TMPVAR="$TMPVAR/link" - done - rlPhaseEnd - - rlPhaseStartCleanup - rlRun "popd" - rlRun "rm -r $TmpDir" 0 "Removing tmp directory" - rlPhaseEnd -rlJournalPrintText -rlJournalEnd diff --git a/tests/test-basics b/tests/test-basics deleted file mode 100755 index 7324553..0000000 --- a/tests/test-basics +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh - -# Checks that touch ls rm and foo work -# https://www.mankier.com/1/beakerlib#Examples -. /usr/share/beakerlib/beakerlib.sh - -# Set the full test name -TEST="/examples/beakerlib/Sanity/phases" - -# Package being tested -PACKAGE="coreutils" - -rlJournalStart - # Setup phase: Prepare test directory - rlPhaseStartSetup - rlAssertRpm $PACKAGE - rlRun 'TmpDir=$(mktemp -d)' 0 'Creating tmp directory' # no-reboot - rlRun "pushd $TmpDir" - rlPhaseEnd - - # Test phase: Testing touch, ls and rm commands - rlPhaseStartTest - rlRun "touch foo" 0 "Creating the foo test file" - rlAssertExists "foo" - rlRun "ls -l foo" 0 "Listing the foo test file" - rlRun "rm foo" 0 "Removing the foo test file" - rlAssertNotExists "foo" - rlRun "ls -l foo" 2 "Listing foo should now report an error" - rlPhaseEnd - - # Cleanup phase: Remove test directory - rlPhaseStartCleanup - rlRun "popd" - rlRun "rm -r $TmpDir" 0 "Removing tmp directory" - rlPhaseEnd -rlJournalEnd - -# Print the test report -rlJournalPrintText diff --git a/tests/test_basics.yml b/tests/test_basics.yml deleted file mode 100644 index d5727cf..0000000 --- a/tests/test_basics.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -# This first play always runs on the local staging system -- hosts: localhost - tags: - - atomic - - classic - - container - roles: - - { role: standard-test-beakerlib, tests: [ test-basics, readlink-cannot-handle-recursive-symlinks ] } diff --git a/tests/tests.yml b/tests/tests.yml deleted file mode 100644 index 529263d..0000000 --- a/tests/tests.yml +++ /dev/null @@ -1,2 +0,0 @@ -# Fedora Continuous Integration: https://fedoraproject.org/wiki/CI -- include: test_basics.yml