diff --git a/.gitignore b/.gitignore index 46d0f92..6f14d2e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ /coreutils-[0-9.]*.tar.xz -/coreutils-[0-9.]*.tar.xz.sig +/coreutils-ss.tar.xz diff --git a/coreutils-8.2-uname-processortype.patch b/coreutils-8.2-uname-processortype.patch new file mode 100644 index 0000000..44c57f5 --- /dev/null +++ b/coreutils-8.2-uname-processortype.patch @@ -0,0 +1,44 @@ + src/uname.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/src/uname.c b/src/uname.c +index 6371ca2..1ad8fd7 100644 +--- a/src/uname.c ++++ b/src/uname.c +@@ -322,6 +322,12 @@ main (int argc, char **argv) + # elif defined __ppc__ || defined __ppc64__ + element = "powerpc"; + # endif ++#else ++ { ++ static struct utsname u; ++ uname(&u); ++ element = u.machine; ++ } + #endif + #if HAVE_SYSINFO && defined SI_ARCHITECTURE + if (element == unknown) +@@ -347,7 +353,7 @@ main (int argc, char **argv) + + if (toprint & PRINT_HARDWARE_PLATFORM) + { +- char const *element = unknown; ++ char *element = unknown; + #if HAVE_SYSINFO && defined SI_PLATFORM + { + static char hardware_platform[257]; +@@ -355,6 +361,14 @@ main (int argc, char **argv) + hardware_platform, sizeof hardware_platform)) + element = hardware_platform; + } ++#else ++ { ++ static struct utsname u; ++ uname(&u); ++ element = u.machine; ++ if(strlen(element)==4 && element[0]=='i' && element[2]=='8' && element[3]=='6') ++ element[1]='3'; ++ } + #endif + #ifdef UNAME_HARDWARE_PLATFORM + if (element == unknown) diff --git a/coreutils-8.32-DIR_COLORS.patch b/coreutils-8.32-DIR_COLORS.patch index 37ce3e6..3a0375d 100644 --- a/coreutils-8.32-DIR_COLORS.patch +++ b/coreutils-8.32-DIR_COLORS.patch @@ -1,4 +1,4 @@ -From bca11e30e8a6281a8cbddc9fb196dd86ab09c955 Mon Sep 17 00:00:00 2001 +From c7b13f5e1a7ad012c510a8bdd5a8943ab4b55833 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Fri, 17 Jun 2016 16:58:18 +0200 Subject: [PATCH] downstream changes to default DIR_COLORS @@ -9,61 +9,61 @@ Subject: [PATCH] downstream changes to default DIR_COLORS 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/DIR_COLORS b/DIR_COLORS -index 540f6cd..b4785b6 100644 +index b465771..ad42b09 100644 --- a/DIR_COLORS +++ b/DIR_COLORS @@ -1,3 +1,7 @@ +# This file goes in the /etc directory, and must be world readable. -+# You can override the system defaults by making a copy of this file -+# as ~/.dir_colors ++# You can copy this file to .dir_colors in your $HOME directory to override ++# the system defaults. + # Configuration file for dircolors, a utility to help you set the # LS_COLORS environment variable used by GNU ls with the --color option. -@@ -11,6 +15,9 @@ +@@ -10,6 +14,9 @@ # Global config options can be specified before TERM or COLORTERM entries +# For compatibility, the pattern "^COLOR.*none" is recognized as a way to +# disable colorization. See https://bugzilla.redhat.com/1349579 for details. + - # =================================================================== - # Terminal filters - # =================================================================== -@@ -70,7 +77,7 @@ DOOR 01;35 # door + # Below are TERM or COLORTERM entries, which can be glob patterns, which + # restrict following config to systems with matching environment variables. + COLORTERM ?* +@@ -62,7 +69,7 @@ DOOR 01;35 # door BLK 40;33;01 # block device driver CHR 40;33;01 # character device driver ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file ... -MISSING 00 # ... and the files they point to +MISSING 01;37;41 # ... and the files they point to - SETUID 37;41 # regular file that is setuid (u+s) - SETGID 30;43 # regular file that is setgid (g+s) - CAPABILITY 00 # regular file with capability (very expensive to lookup) + SETUID 37;41 # file that is setuid (u+s) + SETGID 30;43 # file that is setgid (g+s) + CAPABILITY 00 # file with capability (very expensive to lookup) diff --git a/DIR_COLORS.lightbgcolor b/DIR_COLORS.lightbgcolor -index e3b0ec3..39a0a4c 100644 +index eab6258..1627b63 100644 --- a/DIR_COLORS.lightbgcolor +++ b/DIR_COLORS.lightbgcolor @@ -1,3 +1,9 @@ +# Configuration file for the color ls utility - modified for lighter backgrounds + +# This file goes in the /etc directory, and must be world readable. -+# You can override the system defaults by making a copy of this file -+# as ~/.dir_colors ++# You can copy this file to .dir_colors in your $HOME directory to override ++# the system defaults. + # Configuration file for dircolors, a utility to help you set the # LS_COLORS environment variable used by GNU ls with the --color option. -@@ -11,6 +17,9 @@ +@@ -10,6 +16,9 @@ # Global config options can be specified before TERM or COLORTERM entries +# For compatibility, the pattern "^COLOR.*none" is recognized as a way to +# disable colorization. See https://bugzilla.redhat.com/1349579 for details. + - # =================================================================== - # Terminal filters - # =================================================================== -@@ -60,17 +69,17 @@ TERM xterm* + # Below are TERM or COLORTERM entries, which can be glob patterns, which + # restrict following config to systems with matching environment variables. + COLORTERM ?* +@@ -52,17 +61,17 @@ TERM xterm* #NORMAL 00 # no color code at all #FILE 00 # regular file: use no color at all RESET 0 # reset to "normal" color @@ -83,18 +83,18 @@ index e3b0ec3..39a0a4c 100644 ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file ... -MISSING 00 # ... and the files they point to +MISSING 01;37;41 # ... and the files they point to - SETUID 37;41 # regular file that is setuid (u+s) - SETGID 30;43 # regular file that is setgid (g+s) - CAPABILITY 00 # regular file with capability (very expensive to lookup) -@@ -79,7 +88,7 @@ OTHER_WRITABLE 34;42 # dir that is other-writable (o+w) and not sticky + SETUID 37;41 # file that is setuid (u+s) + SETGID 30;43 # file that is setgid (g+s) + CAPABILITY 00 # file with capability (very expensive to lookup) +@@ -71,7 +80,7 @@ OTHER_WRITABLE 34;42 # dir that is other-writable (o+w) and not sticky STICKY 37;44 # dir with the sticky bit set (+t) and not other-writable - # This is for regular files with execute permission: + # This is for files with execute permission: -EXEC 01;32 +EXEC 00;32 - # =================================================================== - # File extension attributes + # List any file extensions like '.gz' or '.tar' that you would like ls + # to color below. Put the extension, a space, and the color init string. -- -2.49.0 +2.34.1 diff --git a/coreutils-8.4-mkdir-modenote.patch b/coreutils-8.4-mkdir-modenote.patch new file mode 100644 index 0000000..3973d44 --- /dev/null +++ b/coreutils-8.4-mkdir-modenote.patch @@ -0,0 +1,13 @@ +diff --git a/doc/coreutils.texi b/doc/coreutils.texi +index 400e135..47e4480 100644 +--- a/doc/coreutils.texi ++++ b/doc/coreutils.texi +@@ -10829,6 +10829,8 @@ incorrect. @xref{Directory Setuid and Setgid}, for how the + set-user-ID and set-group-ID bits of directories are inherited unless + overridden in this way. + ++Note: The @option{--mode},@option{-m} option only applies to the right-most directories listed on the command line. When combined with @option{--parents}, @option{-p} option, any parent directories are created with @samp{u+wx} modified by umask. ++ + @item -p + @itemx --parents + @opindex -p diff --git a/coreutils-9.0-autofs-no-mount.patch b/coreutils-9.0-autofs-no-mount.patch new file mode 100644 index 0000000..b6d6523 --- /dev/null +++ b/coreutils-9.0-autofs-no-mount.patch @@ -0,0 +1,87 @@ +From f4422844dbcd839ce486bcbc15b7bd5b72c9198d Mon Sep 17 00:00:00 2001 +From: Rohan Sable +Date: Mon, 7 Mar 2022 14:14:13 +0000 +Subject: [PATCH 1/2] ls: avoid triggering automounts + +statx() has different defaults wrt automounting +compared to stat() or lstat(), so explicitly +set the AT_NO_AUTOMOUNT flag to suppress that behavior, +and avoid unintended operations or potential errors. + +* src/ls.c (do_statx): Pass AT_NO_AUTOMOUNT to avoid this behavior. +Fixes https://bugs.gnu.org/54286 + +Signed-off-by: Rohan Sable + +Upstream-commit: 85c975df2c25bd799370b04bb294e568e001102f +Signed-off-by: Kamil Dudka +--- + src/ls.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/ls.c b/src/ls.c +index 1047801..fe0e9f8 100644 +--- a/src/ls.c ++++ b/src/ls.c +@@ -1175,7 +1175,7 @@ do_statx (int fd, char const *name, struct stat *st, int flags, + { + struct statx stx; + bool want_btime = mask & STATX_BTIME; +- int ret = statx (fd, name, flags, mask, &stx); ++ int ret = statx (fd, name, flags | AT_NO_AUTOMOUNT, mask, &stx); + if (ret >= 0) + { + statx_to_stat (&stx, st); +-- +2.34.1 + + +From 3d227f9e4f3fe806064721e4b9451ee06526bc80 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?P=C3=A1draig=20Brady?= +Date: Mon, 7 Mar 2022 23:29:20 +0000 +Subject: [PATCH 2/2] stat: only automount with --cached=never + +Revert to the default behavior before the introduction of statx(). + +* src/stat.c (do_stat): Set AT_NO_AUTOMOUNT without --cached=never. +* doc/coreutils.texi (stat invocation): Mention the automount +behavior with --cached=never. + +Fixes https://bugs.gnu.org/54287 + +Upstream-commit: 92cb8427c537f37edd43c5cef1909585201372ab +Signed-off-by: Kamil Dudka +--- + doc/coreutils.texi | 1 + + src/stat.c | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/doc/coreutils.texi b/doc/coreutils.texi +index 19b535c..0f5c16a 100644 +--- a/doc/coreutils.texi ++++ b/doc/coreutils.texi +@@ -12564,6 +12564,7 @@ Always read the already cached attributes if available. + + @item never + Always sychronize with the latest file system attributes. ++This also mounts automounted files. + + @item default + Leave the caching behavior to the underlying file system. +diff --git a/src/stat.c b/src/stat.c +index 0c34501..803340a 100644 +--- a/src/stat.c ++++ b/src/stat.c +@@ -1381,6 +1381,9 @@ do_stat (char const *filename, char const *format, char const *format2) + else if (force_sync) + flags |= AT_STATX_FORCE_SYNC; + ++ if (! force_sync) ++ flags |= AT_NO_AUTOMOUNT; ++ + fd = statx (fd, pathname, flags, format_to_mask (format), &stx); + if (fd < 0) + { +-- +2.34.1 + diff --git a/coreutils-9.0-chmod-symlink.patch b/coreutils-9.0-chmod-symlink.patch new file mode 100644 index 0000000..fad3b7a --- /dev/null +++ b/coreutils-9.0-chmod-symlink.patch @@ -0,0 +1,114 @@ +From c76e70637e529481478e26683ebd73c40621c382 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?P=C3=A1draig=20Brady?= +Date: Fri, 24 Sep 2021 20:57:41 +0100 +Subject: [PATCH] chmod: fix exit status when ignoring symlinks + +* src/chmod.c: Reorder enum so CH_NOT_APPLIED +can be treated as a non error. +* tests/chmod/ignore-symlink.sh: A new test. +* tests/local.mk: Reference the new test. +* NEWS: Mention the bug fix. +Fixes https://bugs.gnu.org/50784 + +Upstream-commit: e8b56ebd536e82b15542a00c888109471936bfda +Signed-off-by: Kamil Dudka +--- + NEWS | 6 ++++++ + src/chmod.c | 4 ++-- + tests/chmod/ignore-symlink.sh | 31 +++++++++++++++++++++++++++++++ + tests/local.mk | 1 + + 4 files changed, 40 insertions(+), 2 deletions(-) + create mode 100755 tests/chmod/ignore-symlink.sh + +diff --git a/NEWS b/NEWS +index f2fbcbb..5722a8b 100644 +--- a/NEWS ++++ b/NEWS +@@ -143,6 +143,12 @@ GNU coreutils NEWS -*- outline -*- + where avx2 instructions are supported. + A new --debug option will indicate if avx2 is being used. + ++** Bug fixes ++ ++ chmod -R no longer exits with error status when encountering symlinks. ++ All files would be processed correctly, but the exit status was incorrect. ++ [bug introduced in coreutils-9.0] ++ + + * Noteworthy changes in release 8.32 (2020-03-05) [stable] + +diff --git a/src/chmod.c b/src/chmod.c +index 37b04f5..57ac47f 100644 +--- a/src/chmod.c ++++ b/src/chmod.c +@@ -44,8 +44,8 @@ struct change_status + enum + { + CH_NO_STAT, +- CH_NOT_APPLIED, + CH_FAILED, ++ CH_NOT_APPLIED, + CH_NO_CHANGE_REQUESTED, + CH_SUCCEEDED + } +@@ -322,7 +322,7 @@ process_file (FTS *fts, FTSENT *ent) + if ( ! recurse) + fts_set (fts, ent, FTS_SKIP); + +- return CH_NO_CHANGE_REQUESTED <= ch.status; ++ return CH_NOT_APPLIED <= ch.status; + } + + /* Recursively change the modes of the specified FILES (the last entry +diff --git a/tests/chmod/ignore-symlink.sh b/tests/chmod/ignore-symlink.sh +new file mode 100755 +index 0000000..5ce3de8 +--- /dev/null ++++ b/tests/chmod/ignore-symlink.sh +@@ -0,0 +1,31 @@ ++#!/bin/sh ++# Test for proper exit code of chmod on a processed symlink. ++ ++# Copyright (C) 2021 Free Software Foundation, Inc. ++ ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation, either version 3 of the License, or ++# (at your option) any later version. ++ ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++ ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src ++print_ver_ chmod ++ ++mkdir dir || framework_failure_ ++touch dir/f || framework_failure_ ++ln -s f dir/l || framework_failure_ ++ ++# This operation ignores symlinks but should succeed. ++chmod u+w -R dir 2> out || fail=1 ++ ++compare /dev/null out || fail=1 ++ ++Exit $fail +diff --git a/tests/local.mk b/tests/local.mk +index a76c808..a2164c9 100644 +--- a/tests/local.mk ++++ b/tests/local.mk +@@ -458,6 +458,7 @@ all_tests = \ + tests/chmod/c-option.sh \ + tests/chmod/equal-x.sh \ + tests/chmod/equals.sh \ ++ tests/chmod/ignore-symlink.sh \ + tests/chmod/inaccessible.sh \ + tests/chmod/octal.sh \ + tests/chmod/setgid.sh \ +-- +2.31.1 + diff --git a/coreutils-9.9-fix-cut-test-aarch64.patch b/coreutils-9.9-fix-cut-test-aarch64.patch deleted file mode 100644 index 600f87b..0000000 --- a/coreutils-9.9-fix-cut-test-aarch64.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 95044cb5eaea83d02f768feb5ab79fcf5e6ad782 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?P=C3=A1draig=20Brady?= -Date: Mon, 22 Dec 2025 17:12:48 +0000 -Subject: [PATCH] tests: avoid false failure due to ulimit on aarch64 - -* tests/cut/cut-huge-range.sh: Add an extra 1MiB headroom, -which was seen with aarch64. -Reported at https://bugzilla.redhat.com/2424302 - -Cherry-picked-by: Lukáš Zaoral -Upstream-commit: 95044cb5eaea83d02f768feb5ab79fcf5e6ad782 ---- - tests/cut/cut-huge-range.sh | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tests/cut/cut-huge-range.sh b/tests/cut/cut-huge-range.sh -index 4bd1b129d8..98d7e8f0b9 100755 ---- a/tests/cut/cut-huge-range.sh -+++ b/tests/cut/cut-huge-range.sh -@@ -22,6 +22,7 @@ getlimits_ - - vm=$(get_min_ulimit_v_ returns_ 0 cut -b1 /dev/null) \ - || skip_ 'shell lacks ulimit, or ASAN enabled' -+vm=$(($vm+1000)) # https://bugzilla.redhat.com/2424302 - - # Ensure we can cut up to our sentinel value. - # Don't use expr to subtract one, - diff --git a/coreutils-9.9-gnulib-c23.patch b/coreutils-9.9-gnulib-c23.patch deleted file mode 100644 index 82e3899..0000000 --- a/coreutils-9.9-gnulib-c23.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 891761bca1aa78336e5b18c121075b6e4696c5d4 Mon Sep 17 00:00:00 2001 -From: Paul Eggert -Date: Sun, 23 Nov 2025 00:50:40 -0800 -Subject: [PATCH] Port to C23 qualifier-generic fns like strchr -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This ports Gnulib to strict C23 platforms that reject code -like ‘char *q = strchr (P, 'x');’ when P is a pointer to const, -because in C23 strchr is a qualifier-generic function so -strchr (P, 'x') returns char const *. -This patch does not attempt to do the following two things, -which might be useful in the future: -1. When compiling on non-C23 platforms, check user code for -portability to platforms that define qualifier-generic functions. -2. Port Gnulib to platforms that have qualifier-generic functions -not listed in the C23 standard, e.g., strchrnul. I don’t know -of any such platforms. -* lib/mbschr.c (mbschr): -* lib/memchr2.c (memchr2): -Port to C23, where functions like strchr are qualifier-generic. -* lib/c++defs.h (_GL_FUNCDECL_SYS_NAME): New macro. -* lib/c++defs.h (_GL_FUNCDECL_SYS): -* lib/stdlib.in.h (bsearch): -Use it, to prevent C23 names like strchr from acting like macros. -* lib/string.in.h (memchr, strchr, strpbrk, strrchr): -Do not #undef when GNULIB_POSIXCHECK is defined, as this could -cause conforming C23 code to fail to conform. It’s not clear why -_GL_WARN_ON_USE_CXX; perhaps it was needed but isn’t any more? -But for now, limit the removal of #undef to these four functions -where #undeffing is clearly undesirable in C23. -* lib/wchar.in.h (wmemchr): Parenthesize function name in decl, -to prevent it from acting like a macro. - -Cherry-picked-by: Lukáš Zaoral -Upstream-commit: df17f4f37ed3ca373d23ad42eae51122bdb96626 ---- - lib/c++defs.h | 12 +++++++++++- - lib/mbschr.c | 2 +- - lib/memchr2.c | 2 +- - lib/stdlib.in.h | 6 +++--- - lib/string.in.h | 4 ---- - lib/wchar.in.h | 2 +- - 6 files changed, 17 insertions(+), 11 deletions(-) - -diff --git a/lib/c++defs.h b/lib/c++defs.h -index b77979a..7384457 100644 ---- a/lib/c++defs.h -+++ b/lib/c++defs.h -@@ -127,6 +127,16 @@ - #define _GL_FUNCDECL_RPL_1(rpl_func,rettype,parameters,...) \ - _GL_EXTERN_C_FUNC __VA_ARGS__ rettype rpl_func parameters - -+/* _GL_FUNCDECL_SYS_NAME (func) expands to plain func if C++, and to -+ parenthsized func otherwise. Parenthesization is needed in C23 if -+ the function is like strchr and so is a qualifier-generic macro -+ that expands to something more complicated. */ -+#ifdef __cplusplus -+# define _GL_FUNCDECL_SYS_NAME(func) func -+#else -+# define _GL_FUNCDECL_SYS_NAME(func) (func) -+#endif -+ - /* _GL_FUNCDECL_SYS (func, rettype, parameters, [attributes]); - declares the system function, named func, with the given prototype, - consisting of return type, parameters, and attributes. -@@ -139,7 +149,7 @@ - _GL_FUNCDECL_SYS (posix_openpt, int, (int flags), _GL_ATTRIBUTE_NODISCARD); - */ - #define _GL_FUNCDECL_SYS(func,rettype,parameters,...) \ -- _GL_EXTERN_C_FUNC __VA_ARGS__ rettype func parameters -+ _GL_EXTERN_C_FUNC __VA_ARGS__ rettype _GL_FUNCDECL_SYS_NAME (func) parameters - - /* _GL_CXXALIAS_RPL (func, rettype, parameters); - declares a C++ alias called GNULIB_NAMESPACE::func -diff --git a/lib/mbschr.c b/lib/mbschr.c -index c9e14b5..6582134 100644 ---- a/lib/mbschr.c -+++ b/lib/mbschr.c -@@ -65,5 +65,5 @@ mbschr (const char *string, int c) - return NULL; - } - else -- return strchr (string, c); -+ return (char *) strchr (string, c); - } -diff --git a/lib/memchr2.c b/lib/memchr2.c -index 7493823..d7724ae 100644 ---- a/lib/memchr2.c -+++ b/lib/memchr2.c -@@ -55,7 +55,7 @@ memchr2 (void const *s, int c1_in, int c2_in, size_t n) - c2 = (unsigned char) c2_in; - - if (c1 == c2) -- return memchr (s, c1, n); -+ return (void *) memchr (s, c1, n); - - /* Handle the first few bytes by reading one byte at a time. - Do this until VOID_PTR is aligned on a longword boundary. */ -diff --git a/lib/stdlib.in.h b/lib/stdlib.in.h -index bef0aaa..fd0e1e0 100644 ---- a/lib/stdlib.in.h -+++ b/lib/stdlib.in.h -@@ -224,9 +224,9 @@ _GL_INLINE_HEADER_BEGIN - - /* Declarations for ISO C N3322. */ - #if defined __GNUC__ && __GNUC__ >= 15 && !defined __clang__ --_GL_EXTERN_C void *bsearch (const void *__key, -- const void *__base, size_t __nmemb, size_t __size, -- int (*__compare) (const void *, const void *)) -+_GL_EXTERN_C void *_GL_FUNCDECL_SYS_NAME (bsearch) -+ (const void *__key, const void *__base, size_t __nmemb, size_t __size, -+ int (*__compare) (const void *, const void *)) - _GL_ATTRIBUTE_NONNULL_IF_NONZERO (2, 3) _GL_ARG_NONNULL ((5)); - _GL_EXTERN_C void qsort (void *__base, size_t __nmemb, size_t __size, - int (*__compare) (const void *, const void *)) -diff --git a/lib/string.in.h b/lib/string.in.h -index fdcdd21..8b56acf 100644 ---- a/lib/string.in.h -+++ b/lib/string.in.h -@@ -409,7 +409,6 @@ _GL_CXXALIASWARN1 (memchr, void const *, - _GL_CXXALIASWARN (memchr); - # endif - #elif defined GNULIB_POSIXCHECK --# undef memchr - /* Assume memchr is always declared. */ - _GL_WARN_ON_USE (memchr, "memchr has platform-specific bugs - " - "use gnulib module memchr for portability" ); -@@ -674,7 +673,6 @@ _GL_WARN_ON_USE (stpncpy, "stpncpy is unportable - " - #if defined GNULIB_POSIXCHECK - /* strchr() does not work with multibyte strings if the locale encoding is - GB18030 and the character to be searched is a digit. */ --# undef strchr - /* Assume strchr is always declared. */ - _GL_WARN_ON_USE_CXX (strchr, - const char *, char *, (const char *, int), -@@ -981,7 +979,6 @@ _GL_CXXALIASWARN (strpbrk); - Even in this simple case, it does not work with multibyte strings if the - locale encoding is GB18030 and one of the characters to be searched is a - digit. */ --# undef strpbrk - _GL_WARN_ON_USE_CXX (strpbrk, - const char *, char *, (const char *, const char *), - "strpbrk cannot work correctly on character strings " -@@ -1011,7 +1008,6 @@ _GL_WARN_ON_USE (strspn, "strspn cannot work correctly on character strings " - #if defined GNULIB_POSIXCHECK - /* strrchr() does not work with multibyte strings if the locale encoding is - GB18030 and the character to be searched is a digit. */ --# undef strrchr - /* Assume strrchr is always declared. */ - _GL_WARN_ON_USE_CXX (strrchr, - const char *, char *, (const char *, int), -diff --git a/lib/wchar.in.h b/lib/wchar.in.h -index ab602a2..6be4515 100644 ---- a/lib/wchar.in.h -+++ b/lib/wchar.in.h -@@ -301,7 +301,7 @@ _GL_EXTERN_C int wcsncmp (const wchar_t *__s1, const wchar_t *__s2, size_t __n) - _GL_ATTRIBUTE_NONNULL_IF_NONZERO (1, 3) - _GL_ATTRIBUTE_NONNULL_IF_NONZERO (2, 3); - # ifndef __cplusplus --_GL_EXTERN_C wchar_t *wmemchr (const wchar_t *__s, wchar_t __wc, size_t __n) -+_GL_EXTERN_C wchar_t *(wmemchr) (const wchar_t *__s, wchar_t __wc, size_t __n) - _GL_ATTRIBUTE_NONNULL_IF_NONZERO (1, 3); - # endif - _GL_EXTERN_C wchar_t *wmemset (wchar_t *__s, wchar_t __wc, size_t __n) --- -2.52.0 - diff --git a/coreutils-df-direct.patch b/coreutils-df-direct.patch index 341ee2c..2571fa4 100644 --- a/coreutils-df-direct.patch +++ b/coreutils-df-direct.patch @@ -1,4 +1,4 @@ -From 91be1a584108a6a3d96f64382bbf206c4213b3db Mon Sep 17 00:00:00 2001 +From 6e36198f10a2f63b89c89ebb5d5c185b20fb3a63 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Mon, 29 Mar 2010 17:20:34 +0000 Subject: [PATCH] coreutils-df-direct.patch @@ -11,10 +11,10 @@ Subject: [PATCH] coreutils-df-direct.patch create mode 100755 tests/df/direct.sh diff --git a/doc/coreutils.texi b/doc/coreutils.texi -index b420606..0ccb368 100644 +index 5b9a597..6810c15 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi -@@ -12597,6 +12597,13 @@ some systems (notably Solaris), doing this yields more up to date results, +@@ -12074,6 +12074,13 @@ some systems (notably Solaris), doing this yields more up to date results, but in general this option makes @command{df} much slower, especially when there are many or very busy file systems. @@ -29,10 +29,10 @@ index b420606..0ccb368 100644 @opindex --total @cindex grand total of file system size, usage and available space diff --git a/src/df.c b/src/df.c -index 75e638c..ef9f0a7 100644 +index 48025b9..c8efa5b 100644 --- a/src/df.c +++ b/src/df.c -@@ -121,6 +121,9 @@ static bool print_type; +@@ -125,6 +125,9 @@ static bool print_type; /* If true, print a grand total at the end. */ static bool print_grand_total; @@ -42,7 +42,7 @@ index 75e638c..ef9f0a7 100644 /* Grand total data. */ static struct fs_usage grand_fsu; -@@ -248,13 +251,15 @@ enum +@@ -252,13 +255,15 @@ enum NO_SYNC_OPTION = CHAR_MAX + 1, SYNC_OPTION, TOTAL_OPTION, @@ -53,16 +53,16 @@ index 75e638c..ef9f0a7 100644 static struct option const long_options[] = { - {"all", no_argument, nullptr, 'a'}, - {"block-size", required_argument, nullptr, 'B'}, -+ {"direct", no_argument, nullptr, DIRECT_OPTION}, - {"inodes", no_argument, nullptr, 'i'}, - {"human-readable", no_argument, nullptr, 'h'}, - {"si", no_argument, nullptr, 'H'}, -@@ -571,7 +576,10 @@ get_header (void) - for (idx_t col = 0; col < ncolumns; col++) + {"all", no_argument, NULL, 'a'}, + {"block-size", required_argument, NULL, 'B'}, ++ {"direct", no_argument, NULL, DIRECT_OPTION}, + {"inodes", no_argument, NULL, 'i'}, + {"human-readable", no_argument, NULL, 'h'}, + {"si", no_argument, NULL, 'H'}, +@@ -583,7 +588,10 @@ get_header (void) + for (col = 0; col < ncolumns; col++) { - char *cell; + char *cell = NULL; - char const *header = _(columns[col]->caption); + char const *header = (columns[col]->field == TARGET_FIELD + && direct_statfs)? @@ -71,7 +71,7 @@ index 75e638c..ef9f0a7 100644 if (columns[col]->field == SIZE_FIELD && (header_mode == DEFAULT_MODE -@@ -1446,6 +1454,17 @@ get_point (char const *point, const struct stat *statp) +@@ -1486,6 +1494,17 @@ get_point (char const *point, const struct stat *statp) static void get_entry (char const *name, struct stat const *statp) { @@ -79,17 +79,17 @@ index 75e638c..ef9f0a7 100644 + { + char *resolved = canonicalize_file_name (name); + if (resolved) -+ { -+ get_dev (NULL, resolved, name, NULL, NULL, false, false, NULL, false); -+ free (resolved); -+ return; -+ } ++ { ++ get_dev (NULL, resolved, name, NULL, NULL, false, false, NULL, false); ++ free (resolved); ++ return; ++ } + } + if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode)) && get_device (name)) return; -@@ -1516,6 +1535,7 @@ or all file systems by default.\n\ +@@ -1556,6 +1575,7 @@ or all file systems by default.\n\ -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\ '-BM' prints sizes in units of 1,048,576 bytes;\n\ see SIZE format below\n\ @@ -97,7 +97,7 @@ index 75e638c..ef9f0a7 100644 -h, --human-readable print sizes in powers of 1024 (e.g., 1023M)\n\ -H, --si print sizes in powers of 1000 (e.g., 1.1G)\n\ "), stdout); -@@ -1610,6 +1630,9 @@ main (int argc, char **argv) +@@ -1646,6 +1666,9 @@ main (int argc, char **argv) xstrtol_fatal (e, oi, c, long_options, optarg); } break; @@ -107,14 +107,14 @@ index 75e638c..ef9f0a7 100644 case 'i': if (header_mode == OUTPUT_MODE) { -@@ -1706,6 +1729,13 @@ main (int argc, char **argv) +@@ -1742,6 +1765,13 @@ main (int argc, char **argv) } } + if (direct_statfs && show_local_fs) + { + error (0, 0, _("options --direct and --local (-l) are mutually " -+ "exclusive")); ++ "exclusive")); + usage (EXIT_FAILURE); + } + @@ -183,5 +183,5 @@ index 0000000..8e4cfb8 + +Exit $fail -- -2.52.0 +2.31.1 diff --git a/coreutils-getgrouplist.patch b/coreutils-getgrouplist.patch new file mode 100644 index 0000000..93eef67 --- /dev/null +++ b/coreutils-getgrouplist.patch @@ -0,0 +1,94 @@ +diff --git a/lib/getugroups.c b/lib/getugroups.c +index 299bae6..8ece29b 100644 +--- a/lib/getugroups.c ++++ b/lib/getugroups.c +@@ -19,6 +19,9 @@ + + #include + ++/* We do not need this code if getgrouplist(3) is available. */ ++#ifndef HAVE_GETGROUPLIST ++ + #include "getugroups.h" + + #include +@@ -126,3 +129,4 @@ getugroups (int maxcount, gid_t *grouplist, char const *username, + } + + #endif /* HAVE_GRP_H */ ++#endif /* have getgrouplist */ +diff --git a/lib/mgetgroups.c b/lib/mgetgroups.c +index 76474c2..0a9d221 100644 +--- a/lib/mgetgroups.c ++++ b/lib/mgetgroups.c +@@ -31,6 +31,7 @@ + #endif + + #include "getugroups.h" ++#include "xalloc.h" + #include "xalloc-oversized.h" + + /* Work around an incompatibility of OS X 10.11: getgrouplist +@@ -119,9 +120,17 @@ mgetgroups (char const *username, gid_t gid, gid_t **groups) + /* else no username, so fall through and use getgroups. */ + #endif + +- max_n_groups = (username +- ? getugroups (0, NULL, username, gid) +- : getgroups (0, NULL)); ++ if (!username) ++ max_n_groups = getgroups(0, NULL); ++ else ++ { ++#ifdef HAVE_GETGROUPLIST ++ max_n_groups = 0; ++ getgrouplist (username, gid, NULL, &max_n_groups); ++#else ++ max_n_groups = getugroups (0, NULL, username, gid); ++#endif ++ } + + /* If we failed to count groups because there is no supplemental + group support, then return an array containing just GID. +@@ -143,10 +152,25 @@ mgetgroups (char const *username, gid_t gid, gid_t **groups) + if (g == NULL) + return -1; + +- ng = (username +- ? getugroups (max_n_groups, g, username, gid) +- : getgroups (max_n_groups - (gid != (gid_t) -1), +- g + (gid != (gid_t) -1))); ++ if (!username) ++ ng = getgroups (max_n_groups - (gid != (gid_t)-1), g + (gid != (gid_t)-1)); ++ else ++ { ++#ifdef HAVE_GETGROUPLIST ++ int e; ++ ng = max_n_groups; ++ while ((e = getgrouplist (username, gid, g, &ng)) == -1 ++ && ng > max_n_groups) ++ { ++ max_n_groups = ng; ++ g = xrealloc (g, max_n_groups * sizeof (GETGROUPS_T)); ++ } ++ if (e == -1) ++ ng = -1; ++#else ++ ng = getugroups (max_n_groups, g, username, gid); ++#endif ++ } + + if (ng < 0) + { +diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4 +index 62777c7..5180243 100644 +--- a/m4/jm-macros.m4 ++++ b/m4/jm-macros.m4 +@@ -68,6 +68,7 @@ AC_DEFUN([coreutils_MACROS], + fchown + fchmod + ftruncate ++ getgrouplist + iswspace + mkfifo + mbrlen diff --git a/coreutils-i18n.patch b/coreutils-i18n.patch index 83579e9..4ff0911 100644 --- a/coreutils-i18n.patch +++ b/coreutils-i18n.patch @@ -1,80 +1,81 @@ -From a81b096084524e9aeef5e8b81fc829eb9efec581 Mon Sep 17 00:00:00 2001 +From 01010419a6499768563e7b2f3fd56cf16edda75e Mon Sep 17 00:00:00 2001 From: rpm-build -Date: Wed, 30 Aug 2023 17:19:58 +0200 +Date: Mon, 4 Oct 2021 08:54:37 +0200 Subject: [PATCH] coreutils-i18n.patch --- - bootstrap.conf | 2 + - configure.ac | 6 + + bootstrap.conf | 1 + + configure.ac | 2 + lib/linebuffer.h | 8 + - lib/mbfile.c | 20 + - lib/mbfile.h | 283 +++++++++++++ - m4/mbfile.m4 | 16 + + lib/mbfile.c | 3 + + lib/mbfile.h | 255 ++++++++++++ + m4/mbfile.m4 | 14 + src/cut.c | 508 +++++++++++++++++++++-- src/expand-common.c | 114 ++++++ src/expand-common.h | 12 + src/expand.c | 90 +++- + src/fold.c | 312 ++++++++++++-- + src/join.c | 359 ++++++++++++++-- src/local.mk | 4 +- src/pr.c | 443 ++++++++++++++++++-- - src/sort.c | 791 +++++++++++++++++++++++++++++++++--- + src/sort.c | 792 +++++++++++++++++++++++++++++++++--- src/unexpand.c | 101 ++++- + src/uniq.c | 119 +++++- tests/Coreutils.pm | 3 + tests/expand/mb.sh | 183 +++++++++ tests/i18n/sort.sh | 29 ++ tests/local.mk | 4 + tests/misc/expand.pl | 42 ++ + tests/misc/fold.pl | 50 ++- + tests/misc/join.pl | 50 +++ tests/misc/sort-mb-tests.sh | 45 ++ + tests/misc/sort-merge.pl | 42 ++ + tests/misc/sort.pl | 40 +- tests/misc/unexpand.pl | 39 ++ + tests/misc/uniq.pl | 55 +++ tests/pr/pr-tests.pl | 49 +++ - tests/sort/sort-merge.pl | 42 ++ - tests/sort/sort.pl | 40 +- tests/unexpand/mb.sh | 172 ++++++++ - 25 files changed, 2879 insertions(+), 167 deletions(-) + 31 files changed, 3698 insertions(+), 242 deletions(-) create mode 100644 lib/mbfile.c create mode 100644 lib/mbfile.h create mode 100644 m4/mbfile.m4 - create mode 100644 tests/expand/mb.sh - create mode 100644 tests/i18n/sort.sh - create mode 100644 tests/misc/sort-mb-tests.sh - create mode 100644 tests/unexpand/mb.sh + create mode 100755 tests/expand/mb.sh + create mode 100755 tests/i18n/sort.sh + create mode 100755 tests/misc/sort-mb-tests.sh + create mode 100755 tests/unexpand/mb.sh diff --git a/bootstrap.conf b/bootstrap.conf -index ec68ac8..ec2fbbe 100644 +index c1399e3..60b39cf 100644 --- a/bootstrap.conf +++ b/bootstrap.conf -@@ -171,6 +171,8 @@ gnulib_modules=" +@@ -162,6 +162,7 @@ gnulib_modules=" + maintainer-makefile malloc-gnu manywarnings - mbbuf -+ mbchar + mbfile mbrlen - mbrtoc32 mbrtowc + mbsalign diff --git a/configure.ac b/configure.ac -index 5e99ef3..ac07577 100644 +index 7e4afc9..4656a35 100644 --- a/configure.ac +++ b/configure.ac -@@ -465,6 +465,12 @@ fi +@@ -476,6 +476,8 @@ fi # I'm leaving it here for now. This whole thing needs to be modernized... gl_WINSIZE_IN_PTEM +gl_MBFILE -+dnl Do not use gl_MODULE_INDICATOR([mbfile]) here: we don't want 'struct mbchar' -+dnl to have a different size in lib/ than in tests/. -+AC_DEFINE([GNULIB_MBFILE], [1], -+ [Define to 1 if the gnulib module 'mbfile' is in use.]) + gl_HEADER_TIOCGWINSZ_IN_TERMIOS_H if test $gl_cv_sys_tiocgwinsz_needs_termios_h = no && \ diff --git a/lib/linebuffer.h b/lib/linebuffer.h -index ca56f80..509b7e6 100644 +index 07d45ca..af62e6c 100644 --- a/lib/linebuffer.h +++ b/lib/linebuffer.h -@@ -27,6 +27,11 @@ extern "C" { - #endif - +@@ -22,6 +22,11 @@ + # include "idx.h" + # include +/* Get mbstate_t. */ +# if HAVE_WCHAR_H @@ -84,7 +85,7 @@ index ca56f80..509b7e6 100644 /* A 'struct linebuffer' holds a line of text. */ struct linebuffer -@@ -34,6 +39,9 @@ struct linebuffer +@@ -29,6 +34,9 @@ struct linebuffer idx_t size; /* Allocated. */ idx_t length; /* Used. */ char *buffer; @@ -96,51 +97,34 @@ index ca56f80..509b7e6 100644 /* Initialize linebuffer LINEBUFFER for use. */ diff --git a/lib/mbfile.c b/lib/mbfile.c new file mode 100644 -index 0000000..f4e3e77 +index 0000000..b0a468e --- /dev/null +++ b/lib/mbfile.c -@@ -0,0 +1,20 @@ -+/* Multibyte character I/O: macros for multi-byte encodings. -+ Copyright (C) 2012-2025 Free Software Foundation, Inc. -+ -+ This file is free software: you can redistribute it and/or modify -+ it under the terms of the GNU Lesser General Public License as -+ published by the Free Software Foundation, either version 3 of the -+ License, or (at your option) any later version. -+ -+ This file is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public License -+ along with this program. If not, see . */ -+ +@@ -0,0 +1,3 @@ +#include -+ +#define MBFILE_INLINE _GL_EXTERN_INLINE +#include "mbfile.h" diff --git a/lib/mbfile.h b/lib/mbfile.h new file mode 100644 -index 0000000..c852f31 +index 0000000..11f1b12 --- /dev/null +++ b/lib/mbfile.h -@@ -0,0 +1,283 @@ +@@ -0,0 +1,255 @@ +/* Multibyte character I/O: macros for multi-byte encodings. -+ Copyright (C) 2001, 2005, 2009-2025 Free Software Foundation, Inc. ++ Copyright (C) 2001, 2005, 2009-2015 Free Software Foundation, Inc. + -+ This file is free software: you can redistribute it and/or modify -+ it under the terms of the GNU Lesser General Public License as -+ published by the Free Software Foundation, either version 3 of the -+ License, or (at your option) any later version. ++ This program is free software: you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3 of the License, or ++ (at your option) any later version. + -+ This file is distributed in the hope that it will be useful, ++ This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU Lesser General Public License for more details. ++ GNU General Public License for more details. + -+ You should have received a copy of the GNU Lesser General Public License -+ along with this program. If not, see . */ ++ You should have received a copy of the GNU General Public License ++ along with this program. If not, see . */ + +/* Written by Mitsuru Chinen + and Bruno Haible . */ @@ -175,125 +159,115 @@ index 0000000..c852f31 +#ifndef _MBFILE_H +#define _MBFILE_H 1 + -+/* This file uses _GL_INLINE_HEADER_BEGIN, _GL_INLINE. */ -+#if !_GL_CONFIG_H_INCLUDED -+ #error "Please include config.h first." -+#endif -+ +#include ++#include +#include -+#include +#include ++ ++/* Tru64 with Desktop Toolkit C has a bug: must be included before ++ . ++ BSD/OS 4.1 has a bug: and must be included before ++ . */ ++#include ++#include +#include + +#include "mbchar.h" + ++#ifndef _GL_INLINE_HEADER_BEGIN ++ #error "Please include config.h first." ++#endif +_GL_INLINE_HEADER_BEGIN +#ifndef MBFILE_INLINE +# define MBFILE_INLINE _GL_INLINE +#endif + -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+ -+/* Guarantee two characters of pushback. */ -+#define MBFILE_MAX_PUSHBACK 2 -+ +struct mbfile_multi { + FILE *fp; + bool eof_seen; -+ unsigned int pushback_count; /* <= MBFILE_MAX_PUSHBACK */ ++ bool have_pushback; + mbstate_t state; + unsigned int bufcount; + char buf[MBCHAR_BUF_SIZE]; -+ struct mbchar pushback[MBFILE_MAX_PUSHBACK]; ++ struct mbchar pushback; +}; + +MBFILE_INLINE void +mbfile_multi_getc (struct mbchar *mbc, struct mbfile_multi *mbf) +{ -+ unsigned int new_bufcount; + size_t bytes; + -+ /* Return character pushed back, if there is one. */ -+ if (mbf->pushback_count > 0) -+ { -+ mb_copy (mbc, &mbf->pushback[mbf->pushback_count - 1]); -+ mbf->pushback_count--; -+ return; -+ } -+ + /* If EOF has already been seen, don't use getc. This matters if + mbf->fp is connected to an interactive tty. */ + if (mbf->eof_seen) + goto eof; + -+ new_bufcount = mbf->bufcount; -+ -+ /* If mbf->state is not in an initial state, some more 32-bit wide character -+ may be hiding in the state. We need to call mbrtoc32 again. */ -+ #if GNULIB_MBRTOC32_REGULAR -+ assert (mbsinit (&mbf->state)); -+ #else -+ if (mbsinit (&mbf->state)) -+ #endif ++ /* Return character pushed back, if there is one. */ ++ if (mbf->have_pushback) + { -+ /* Before using mbrtoc32, we need at least one byte. */ -+ if (new_bufcount == 0) -+ { -+ int c = getc (mbf->fp); -+ if (c == EOF) -+ { -+ mbf->eof_seen = true; -+ goto eof; -+ } -+ mbf->buf[0] = (unsigned char) c; -+ new_bufcount++; -+ } -+ -+ /* Handle most ASCII characters quickly, without calling mbrtoc32(). */ -+ if (new_bufcount == 1 && is_basic (mbf->buf[0])) -+ { -+ /* These characters are part of the POSIX portable character set. -+ For most of them, namely those in the ISO C basic character set, -+ ISO C 99 guarantees that their wide character code is identical to -+ their char code. For the few other ones, this is the case as well, -+ in all locale encodings that are in use. The 32-bit wide character -+ code is the same as well. */ -+ mbc->wc = mbc->buf[0] = mbf->buf[0]; -+ mbc->wc_valid = true; -+ mbc->ptr = &mbc->buf[0]; -+ mbc->bytes = 1; -+ mbf->bufcount = 0; -+ return; -+ } ++ mb_copy (mbc, &mbf->pushback); ++ mbf->have_pushback = false; ++ return; + } + -+ /* Use mbrtoc32 on an increasing number of bytes. Read only as many bytes ++ /* Before using mbrtowc, we need at least one byte. */ ++ if (mbf->bufcount == 0) ++ { ++ int c = getc (mbf->fp); ++ if (c == EOF) ++ { ++ mbf->eof_seen = true; ++ goto eof; ++ } ++ mbf->buf[0] = (unsigned char) c; ++ mbf->bufcount++; ++ } ++ ++ /* Handle most ASCII characters quickly, without calling mbrtowc(). */ ++ if (mbf->bufcount == 1 && mbsinit (&mbf->state) && is_basic (mbf->buf[0])) ++ { ++ /* These characters are part of the basic character set. ISO C 99 ++ guarantees that their wide character code is identical to their ++ char code. */ ++ mbc->wc = mbc->buf[0] = mbf->buf[0]; ++ mbc->wc_valid = true; ++ mbc->ptr = &mbc->buf[0]; ++ mbc->bytes = 1; ++ mbf->bufcount = 0; ++ return; ++ } ++ ++ /* Use mbrtowc on an increasing number of bytes. Read only as many bytes + from mbf->fp as needed. This is needed to give reasonable interactive + behaviour when mbf->fp is connected to an interactive tty. */ + for (;;) + { -+ /* Feed the bytes one by one into mbrtoc32. */ -+ bytes = mbrtoc32 (&mbc->wc, &mbf->buf[mbf->bufcount], new_bufcount - mbf->bufcount, &mbf->state); ++ /* We don't know whether the 'mbrtowc' function updates the state when ++ it returns -2, - this is the ISO C 99 and glibc-2.2 behaviour - or ++ not - amended ANSI C, glibc-2.1 and Solaris 2.7 behaviour. We ++ don't have an autoconf test for this, yet. ++ The new behaviour would allow us to feed the bytes one by one into ++ mbrtowc. But the old behaviour forces us to feed all bytes since ++ the end of the last character into mbrtowc. Since we want to retry ++ with more bytes when mbrtowc returns -2, we must backup the state ++ before calling mbrtowc, because implementations with the new ++ behaviour will clobber it. */ ++ mbstate_t backup_state = mbf->state; ++ ++ bytes = mbrtowc (&mbc->wc, &mbf->buf[0], mbf->bufcount, &mbf->state); + + if (bytes == (size_t) -1) + { + /* An invalid multibyte sequence was encountered. */ -+ mbf->bufcount = new_bufcount; + /* Return a single byte. */ + bytes = 1; + mbc->wc_valid = false; -+ /* Allow the next invocation to continue from a sane state. */ -+ mbszero (&mbf->state); + break; + } + else if (bytes == (size_t) -2) + { + /* An incomplete multibyte character. */ -+ mbf->bufcount = new_bufcount; ++ mbf->state = backup_state; + if (mbf->bufcount == MBCHAR_BUF_SIZE) + { + /* An overlong incomplete multibyte sequence was encountered. */ @@ -304,42 +278,28 @@ index 0000000..c852f31 + } + else + { -+ /* Read one more byte and retry mbrtoc32. */ ++ /* Read one more byte and retry mbrtowc. */ + int c = getc (mbf->fp); + if (c == EOF) + { + /* An incomplete multibyte character at the end. */ + mbf->eof_seen = true; -+ bytes = new_bufcount; ++ bytes = mbf->bufcount; + mbc->wc_valid = false; + break; + } -+ mbf->buf[new_bufcount] = (unsigned char) c; -+ new_bufcount++; ++ mbf->buf[mbf->bufcount] = (unsigned char) c; ++ mbf->bufcount++; + } + } + else + { -+ #if !GNULIB_MBRTOC32_REGULAR -+ if (bytes == (size_t) -3) ++ if (bytes == 0) + { -+ /* The previous multibyte sequence produced an additional 32-bit -+ wide character. */ -+ mbf->bufcount = new_bufcount; -+ bytes = 0; -+ } -+ else -+ #endif -+ { -+ bytes = mbf->bufcount + bytes; -+ mbf->bufcount = new_bufcount; -+ if (bytes == 0) -+ { -+ /* A null 32-bit wide character was encountered. */ -+ bytes = 1; -+ assert (mbf->buf[0] == '\0'); -+ assert (mbc->wc == 0); -+ } ++ /* A null wide character was encountered. */ ++ bytes = 1; ++ assert (mbf->buf[0] == '\0'); ++ assert (mbc->wc == 0); + } + mbc->wc_valid = true; + break; @@ -378,10 +338,8 @@ index 0000000..c852f31 +MBFILE_INLINE void +mbfile_multi_ungetc (const struct mbchar *mbc, struct mbfile_multi *mbf) +{ -+ if (mbf->pushback_count == MBFILE_MAX_PUSHBACK) -+ abort (); -+ mb_copy (&mbf->pushback[mbf->pushback_count], mbc); -+ mbf->pushback_count++; ++ mb_copy (&mbf->pushback, mbc); ++ mbf->have_pushback = true; +} + +typedef struct mbfile_multi mb_file_t; @@ -391,8 +349,8 @@ index 0000000..c852f31 +#define mbf_init(mbf, stream) \ + ((mbf).fp = (stream), \ + (mbf).eof_seen = false, \ -+ (mbf).pushback_count = 0, \ -+ mbszero (&(mbf).state), \ ++ (mbf).have_pushback = false, \ ++ memset (&(mbf).state, '\0', sizeof (mbstate_t)), \ + (mbf).bufcount = 0) + +#define mbf_getc(mbc, mbf) mbfile_multi_getc (&(mbc), &(mbf)) @@ -401,27 +359,23 @@ index 0000000..c852f31 + +#define mb_iseof(mbc) ((mbc).bytes == 0) + -+ -+#ifdef __cplusplus -+} ++#ifndef _GL_INLINE_HEADER_BEGIN ++ #error "Please include config.h first." +#endif -+ -+_GL_INLINE_HEADER_END ++_GL_INLINE_HEADER_BEGIN + +#endif /* _MBFILE_H */ diff --git a/m4/mbfile.m4 b/m4/mbfile.m4 new file mode 100644 -index 0000000..1d126e0 +index 0000000..8589902 --- /dev/null +++ b/m4/mbfile.m4 -@@ -0,0 +1,16 @@ -+# mbfile.m4 -+# serial 7 -+dnl Copyright (C) 2005, 2008-2025 Free Software Foundation, Inc. +@@ -0,0 +1,14 @@ ++# mbfile.m4 serial 7 ++dnl Copyright (C) 2005, 2008-2015 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. -+dnl This file is offered as-is, without any warranty. + +dnl autoconf tests required for use of mbfile.h +dnl From Bruno Haible. @@ -432,11 +386,11 @@ index 0000000..1d126e0 + : +]) diff --git a/src/cut.c b/src/cut.c -index f0effb9..36479d6 100644 +index 6fd8978..faef877 100644 --- a/src/cut.c +++ b/src/cut.c -@@ -27,6 +27,11 @@ - #include +@@ -28,6 +28,11 @@ + #include #include #include + @@ -446,8 +400,8 @@ index f0effb9..36479d6 100644 +#endif #include "system.h" - #include "assure.h" -@@ -35,6 +40,18 @@ + #include "error.h" +@@ -37,6 +42,18 @@ #include "set-fields.h" @@ -466,7 +420,7 @@ index f0effb9..36479d6 100644 /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "cut" -@@ -51,6 +68,52 @@ +@@ -53,6 +70,52 @@ } \ while (0) @@ -519,8 +473,8 @@ index f0effb9..36479d6 100644 /* Pointer inside RP. When checking if a byte or field is selected by a finite range, we check if it is between CURRENT_RP.LO -@@ -58,6 +121,9 @@ - CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ +@@ -60,6 +123,9 @@ + CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ static struct field_range_pair *current_rp; +/* Length of the delimiter given as argument to -d. */ @@ -529,7 +483,7 @@ index f0effb9..36479d6 100644 /* This buffer is used to support the semantics of the -s option (or lack of same) when the specified field list includes (does not include) the first field. In both of those cases, the entire -@@ -70,6 +136,29 @@ static char *field_1_buffer; +@@ -72,6 +138,29 @@ static char *field_1_buffer; /* The number of bytes allocated for FIELD_1_BUFFER. */ static size_t field_1_bufsize; @@ -556,18 +510,18 @@ index f0effb9..36479d6 100644 + if this program runs on multibyte locale. */ +static int force_singlebyte_mode; + - /* If true, do not output lines containing no delimiter characters. + /* If true do not output lines containing no delimiter characters. Otherwise, all such lines are printed. This option is valid only with field mode. */ -@@ -81,10 +170,16 @@ static bool complement; +@@ -83,10 +172,16 @@ static bool complement; - /* The delimiter character for field mode. */ + /* The delimiter character for field mode. */ static unsigned char delim; +#if HAVE_WCHAR_H +static wchar_t wcdelim; +#endif - /* The delimiter for each line/record. */ + /* The delimiter for each line/record. */ static unsigned char line_delim = '\n'; +/* True if the --output-delimiter=STRING option was specified. */ @@ -576,17 +530,17 @@ index f0effb9..36479d6 100644 /* The length of output_delimiter_string. */ static size_t output_delimiter_length; -@@ -92,9 +187,6 @@ static size_t output_delimiter_length; +@@ -94,9 +189,6 @@ static size_t output_delimiter_length; string consisting of the input delimiter. */ static char *output_delimiter_string; -/* The output delimiter string contents, if the default. */ -static char output_delimiter_default[1]; - - /* True if we have ever read standard input. */ + /* True if we have ever read standard input. */ static bool have_read_stdin; -@@ -148,7 +240,7 @@ Print selected parts of lines from each FILE to standard output.\n\ +@@ -150,7 +242,7 @@ Print selected parts of lines from each FILE to standard output.\n\ -f, --fields=LIST select only these fields; also print any line\n\ that contains no delimiter character, unless\n\ the -s option is specified\n\ @@ -595,7 +549,7 @@ index f0effb9..36479d6 100644 "), stdout); fputs (_("\ --complement complement the set of selected bytes, characters\n\ -@@ -252,7 +344,7 @@ cut_bytes (FILE *stream) +@@ -250,7 +342,7 @@ cut_bytes (FILE *stream) next_item (&byte_idx); if (print_kth (byte_idx)) { @@ -604,7 +558,7 @@ index f0effb9..36479d6 100644 { if (print_delimiter && is_range_start_index (byte_idx)) { -@@ -271,6 +363,82 @@ cut_bytes (FILE *stream) +@@ -266,6 +358,82 @@ cut_bytes (FILE *stream) } } @@ -687,7 +641,7 @@ index f0effb9..36479d6 100644 /* Read from stream STREAM, printing to standard output any selected fields. */ static void -@@ -433,11 +601,218 @@ cut_fields (FILE *stream) +@@ -411,11 +579,218 @@ cut_fields (FILE *stream) } } @@ -908,18 +862,18 @@ index f0effb9..36479d6 100644 { FILE *stream; -@@ -482,8 +857,8 @@ main (int argc, char **argv) +@@ -459,8 +834,8 @@ main (int argc, char **argv) int optc; bool ok; bool delim_specified = false; - bool byte_mode = false; -- char *spec_list_string = nullptr; -+ char *spec_list_string IF_LINT ( = nullptr); +- char *spec_list_string = NULL; ++ char *spec_list_string IF_LINT ( = NULL); + char mbdelim[MB_LEN_MAX + 1]; initialize_main (&argc, &argv); set_program_name (argv[0]); -@@ -493,6 +868,8 @@ main (int argc, char **argv) +@@ -470,6 +845,8 @@ main (int argc, char **argv) atexit (close_stdout); @@ -928,12 +882,12 @@ index f0effb9..36479d6 100644 /* By default, all non-delimited lines are printed. */ suppress_non_delimited = false; -@@ -505,35 +882,77 @@ main (int argc, char **argv) +@@ -481,35 +858,77 @@ main (int argc, char **argv) switch (optc) { case 'b': - case 'c': - /* Build the byte list. */ + /* Build the byte list. */ - byte_mode = true; - FALLTHROUGH; + if (operating_mode != undefined_mode) @@ -943,7 +897,7 @@ index f0effb9..36479d6 100644 + break; + + case 'c': -+ /* Build the character list. */ ++ /* Build the character list. */ + if (operating_mode != undefined_mode) + FATAL_ERROR (_("only one type of list may be specified")); + operating_mode = character_mode; @@ -951,7 +905,7 @@ index f0effb9..36479d6 100644 + break; + case 'f': - /* Build the field list. */ + /* Build the field list. */ - if (spec_list_string) - FATAL_ERROR (_("only one list may be specified")); + if (operating_mode != undefined_mode) @@ -961,7 +915,7 @@ index f0effb9..36479d6 100644 break; case 'd': - /* New delimiter. */ + /* New delimiter. */ /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ - if (optarg[0] != '\0' && optarg[1] != '\0') - FATAL_ERROR (_("the delimiter must be a single character")); @@ -1016,7 +970,7 @@ index f0effb9..36479d6 100644 break; case 's': -@@ -555,40 +974,57 @@ main (int argc, char **argv) +@@ -533,40 +952,57 @@ main (int argc, char **argv) } } @@ -1057,7 +1011,7 @@ index f0effb9..36479d6 100644 +#endif + } - if (output_delimiter_string == nullptr) + if (output_delimiter_string == NULL) { - output_delimiter_default[0] = delim; - output_delimiter_string = output_delimiter_default; @@ -1092,18 +1046,18 @@ index f0effb9..36479d6 100644 if (have_read_stdin && fclose (stdin) == EOF) diff --git a/src/expand-common.c b/src/expand-common.c -index 14dd804..0d8eaaa 100644 +index deec1bd..b39f740 100644 --- a/src/expand-common.c +++ b/src/expand-common.c @@ -19,6 +19,7 @@ - #include + #include #include #include +#include #include "system.h" - #include "c-ctype.h" - #include "fadvise.h" -@@ -132,6 +133,119 @@ set_increment_size (colno tabval) + #include "die.h" + #include "error.h" +@@ -125,6 +126,119 @@ set_increment_size (uintmax_t tabval) return ok; } @@ -1224,10 +1178,10 @@ index 14dd804..0d8eaaa 100644 to the list of tab stops. */ extern void diff --git a/src/expand-common.h b/src/expand-common.h -index 46ef4e3..e19469b 100644 +index 5f59a0e..835b9d5 100644 --- a/src/expand-common.h +++ b/src/expand-common.h -@@ -29,6 +29,18 @@ extern idx_t max_column_width; +@@ -25,6 +25,18 @@ extern size_t max_column_width; /* The desired exit status. */ extern int exit_status; @@ -1245,12 +1199,12 @@ index 46ef4e3..e19469b 100644 + /* Add tab stop TABVAL to the end of 'tab_list'. */ extern void - add_tab_stop (colno tabval); + add_tab_stop (uintmax_t tabval); diff --git a/src/expand.c b/src/expand.c -index 5ec7ce9..65ac315 100644 +index ed78ca8..a4cefa1 100644 --- a/src/expand.c +++ b/src/expand.c -@@ -38,6 +38,9 @@ +@@ -37,6 +37,9 @@ #include #include #include @@ -1258,12 +1212,12 @@ index 5ec7ce9..65ac315 100644 +#include + #include "system.h" - #include "expand-common.h" + #include "die.h" -@@ -96,19 +99,41 @@ expand (void) +@@ -97,19 +100,41 @@ expand (void) { /* Input stream. */ - FILE *fp = next_file (nullptr); + FILE *fp = next_file (NULL); + mb_file_t mbf; + mbf_char_t c; + /* True if the starting locale is utf8. */ @@ -1306,9 +1260,9 @@ index 5ec7ce9..65ac315 100644 /* The following variables have valid values only when CONVERT is true: */ -@@ -118,17 +143,48 @@ expand (void) +@@ -119,17 +144,48 @@ expand (void) /* Index in TAB_LIST of next tab stop to examine. */ - idx_t tab_index = 0; + size_t tab_index = 0; - /* Convert a line of text. */ @@ -1358,10 +1312,10 @@ index 5ec7ce9..65ac315 100644 + if (mb_iseq (c, '\t')) { /* Column the next input tab stop is on. */ - bool last_tab; -@@ -139,31 +195,33 @@ expand (void) + uintmax_t next_tab_column; +@@ -148,32 +204,34 @@ expand (void) if (putchar (' ') < 0) - write_error (); + die (EXIT_FAILURE, errno, _("write error")); - c = ' '; + mb_setascii (&c, ' '); @@ -1378,13 +1332,14 @@ index 5ec7ce9..65ac315 100644 + /* A leading control character could make us trip over. */ + else if (!mb_iscntrl (c)) { -- if (ckd_add (&column, column, 1)) -+ if (ckd_add (&column, column, mb_width (c))) - error (EXIT_FAILURE, 0, _("input line is too long")); +- column++; ++ column += mb_width (c); + if (!column) + die (EXIT_FAILURE, 0, _("input line is too long")); } - convert &= convert_entire_line || !! isblank (c); -+ convert &= convert_entire_line || !! mb_isblank (c); ++ convert &= convert_entire_line || mb_isblank (c); } - if (c < 0) @@ -1394,34 +1349,933 @@ index 5ec7ce9..65ac315 100644 - if (putchar (c) < 0) + mb_putc (c, stdout); + if (ferror (stdout)) - write_error (); + die (EXIT_FAILURE, errno, _("write error")); } - while (c != '\n'); + while (!mb_iseq (c, '\n')); } } +diff --git a/src/fold.c b/src/fold.c +index f07a90b..d32dbfd 100644 +--- a/src/fold.c ++++ b/src/fold.c +@@ -22,12 +22,34 @@ + #include + #include + ++/* Get mbstate_t, mbrtowc(), wcwidth(). */ ++#if HAVE_WCHAR_H ++# include ++#endif ++ ++/* Get iswprint(), iswblank(), wcwidth(). */ ++#if HAVE_WCTYPE_H ++# include ++#endif ++ + #include "system.h" + #include "die.h" + #include "error.h" + #include "fadvise.h" + #include "xdectoint.h" + ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC ++ installation; work around this configuration error. */ ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 ++# undef MB_LEN_MAX ++# define MB_LEN_MAX 16 ++#endif ++ ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ ++#if HAVE_MBRTOWC && defined mbstate_t ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) ++#endif ++ + #define TAB_WIDTH 8 + + /* The official name of this program (e.g., no 'g' prefix). */ +@@ -35,20 +57,41 @@ + + #define AUTHORS proper_name ("David MacKenzie") + ++#define FATAL_ERROR(Message) \ ++ do \ ++ { \ ++ error (0, 0, (Message)); \ ++ usage (2); \ ++ } \ ++ while (0) ++ ++enum operating_mode ++{ ++ /* Fold texts by columns that are at the given positions. */ ++ column_mode, ++ ++ /* Fold texts by bytes that are at the given positions. */ ++ byte_mode, ++ ++ /* Fold texts by characters that are at the given positions. */ ++ character_mode, ++}; ++ ++/* The argument shows current mode. (Default: column_mode) */ ++static enum operating_mode operating_mode; ++ + /* If nonzero, try to break on whitespace. */ + static bool break_spaces; + +-/* If nonzero, count bytes, not column positions. */ +-static bool count_bytes; +- + /* If nonzero, at least one of the files we read was standard input. */ + static bool have_read_stdin; + +-static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::"; ++static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::"; + + static struct option const longopts[] = + { + {"bytes", no_argument, NULL, 'b'}, ++ {"characters", no_argument, NULL, 'c'}, + {"spaces", no_argument, NULL, 's'}, + {"width", required_argument, NULL, 'w'}, + {GETOPT_HELP_OPTION_DECL}, +@@ -76,6 +119,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ + + fputs (_("\ + -b, --bytes count bytes rather than columns\n\ ++ -c, --characters count characters rather than columns\n\ + -s, --spaces break at spaces\n\ + -w, --width=WIDTH use WIDTH columns instead of 80\n\ + "), stdout); +@@ -93,7 +137,7 @@ Wrap input lines in each FILE, writing to standard output.\n\ + static size_t + adjust_column (size_t column, char c) + { +- if (!count_bytes) ++ if (operating_mode != byte_mode) + { + if (c == '\b') + { +@@ -116,30 +160,14 @@ adjust_column (size_t column, char c) + to stdout, with maximum line length WIDTH. + Return true if successful. */ + +-static bool +-fold_file (char const *filename, size_t width) ++static void ++fold_text (FILE *istream, size_t width, int *saved_errno) + { +- FILE *istream; + int c; + size_t column = 0; /* Screen column where next char will go. */ + size_t offset_out = 0; /* Index in 'line_out' for next char. */ + static char *line_out = NULL; + static size_t allocated_out = 0; +- int saved_errno; +- +- if (STREQ (filename, "-")) +- { +- istream = stdin; +- have_read_stdin = true; +- } +- else +- istream = fopen (filename, "r"); +- +- if (istream == NULL) +- { +- error (0, errno, "%s", quotef (filename)); +- return false; +- } + + fadvise (istream, FADVISE_SEQUENTIAL); + +@@ -169,6 +197,15 @@ fold_file (char const *filename, size_t width) + bool found_blank = false; + size_t logical_end = offset_out; + ++ /* If LINE_OUT has no wide character, ++ put a new wide character in LINE_OUT ++ if column is bigger than width. */ ++ if (offset_out == 0) ++ { ++ line_out[offset_out++] = c; ++ continue; ++ } ++ + /* Look for the last blank. */ + while (logical_end) + { +@@ -215,13 +252,225 @@ fold_file (char const *filename, size_t width) + line_out[offset_out++] = c; + } + +- saved_errno = errno; ++ *saved_errno = errno; + if (!ferror (istream)) +- saved_errno = 0; ++ *saved_errno = 0; + + if (offset_out) + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); + ++} ++ ++#if HAVE_MBRTOWC ++static void ++fold_multibyte_text (FILE *istream, size_t width, int *saved_errno) ++{ ++ char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */ ++ size_t buflen = 0; /* The length of the byte sequence in buf. */ ++ char *bufpos = buf; /* Next read position of BUF. */ ++ wint_t wc; /* A gotten wide character. */ ++ size_t mblength; /* The byte size of a multibyte character which shows ++ as same character as WC. */ ++ mbstate_t state, state_bak; /* State of the stream. */ ++ int convfail = 0; /* 1, when conversion is failed. Otherwise 0. */ ++ ++ static char *line_out = NULL; ++ size_t offset_out = 0; /* Index in `line_out' for next char. */ ++ static size_t allocated_out = 0; ++ ++ int increment; ++ size_t column = 0; ++ ++ size_t last_blank_pos; ++ size_t last_blank_column; ++ int is_blank_seen; ++ int last_blank_increment = 0; ++ int is_bs_following_last_blank; ++ size_t bs_following_last_blank_num; ++ int is_cr_after_last_blank; ++ ++#define CLEAR_FLAGS \ ++ do \ ++ { \ ++ last_blank_pos = 0; \ ++ last_blank_column = 0; \ ++ is_blank_seen = 0; \ ++ is_bs_following_last_blank = 0; \ ++ bs_following_last_blank_num = 0; \ ++ is_cr_after_last_blank = 0; \ ++ } \ ++ while (0) ++ ++#define START_NEW_LINE \ ++ do \ ++ { \ ++ putchar ('\n'); \ ++ column = 0; \ ++ offset_out = 0; \ ++ CLEAR_FLAGS; \ ++ } \ ++ while (0) ++ ++ CLEAR_FLAGS; ++ memset (&state, '\0', sizeof(mbstate_t)); ++ ++ for (;; bufpos += mblength, buflen -= mblength) ++ { ++ if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream)) ++ { ++ memmove (buf, bufpos, buflen); ++ buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream); ++ bufpos = buf; ++ } ++ ++ if (buflen < 1) ++ break; ++ ++ /* Get a wide character. */ ++ state_bak = state; ++ mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state); ++ ++ switch (mblength) ++ { ++ case (size_t)-1: ++ case (size_t)-2: ++ convfail++; ++ state = state_bak; ++ /* Fall through. */ ++ ++ case 0: ++ mblength = 1; ++ break; ++ } ++ ++rescan: ++ if (operating_mode == byte_mode) /* byte mode */ ++ increment = mblength; ++ else if (operating_mode == character_mode) /* character mode */ ++ increment = 1; ++ else /* column mode */ ++ { ++ if (convfail) ++ increment = 1; ++ else ++ { ++ switch (wc) ++ { ++ case L'\n': ++ fwrite (line_out, sizeof(char), offset_out, stdout); ++ START_NEW_LINE; ++ continue; ++ ++ case L'\b': ++ increment = (column > 0) ? -1 : 0; ++ break; ++ ++ case L'\r': ++ increment = -1 * column; ++ break; ++ ++ case L'\t': ++ increment = 8 - column % 8; ++ break; ++ ++ default: ++ increment = wcwidth (wc); ++ increment = (increment < 0) ? 0 : increment; ++ } ++ } ++ } ++ ++ if (column + increment > width && break_spaces && last_blank_pos) ++ { ++ fwrite (line_out, sizeof(char), last_blank_pos, stdout); ++ putchar ('\n'); ++ ++ offset_out = offset_out - last_blank_pos; ++ column = column - last_blank_column + ((is_cr_after_last_blank) ++ ? last_blank_increment : bs_following_last_blank_num); ++ memmove (line_out, line_out + last_blank_pos, offset_out); ++ CLEAR_FLAGS; ++ goto rescan; ++ } ++ ++ if (column + increment > width && column != 0) ++ { ++ fwrite (line_out, sizeof(char), offset_out, stdout); ++ START_NEW_LINE; ++ goto rescan; ++ } ++ ++ if (allocated_out < offset_out + mblength) ++ { ++ line_out = X2REALLOC (line_out, &allocated_out); ++ } ++ ++ memcpy (line_out + offset_out, bufpos, mblength); ++ offset_out += mblength; ++ column += increment; ++ ++ if (is_blank_seen && !convfail && wc == L'\r') ++ is_cr_after_last_blank = 1; ++ ++ if (is_bs_following_last_blank && !convfail && wc == L'\b') ++ ++bs_following_last_blank_num; ++ else ++ is_bs_following_last_blank = 0; ++ ++ if (break_spaces && !convfail && iswblank (wc)) ++ { ++ last_blank_pos = offset_out; ++ last_blank_column = column; ++ is_blank_seen = 1; ++ last_blank_increment = increment; ++ is_bs_following_last_blank = 1; ++ bs_following_last_blank_num = 0; ++ is_cr_after_last_blank = 0; ++ } ++ } ++ ++ *saved_errno = errno; ++ if (!ferror (istream)) ++ *saved_errno = 0; ++ ++ if (offset_out) ++ fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); ++ ++} ++#endif ++ ++/* Fold file FILENAME, or standard input if FILENAME is "-", ++ to stdout, with maximum line length WIDTH. ++ Return 0 if successful, 1 if an error occurs. */ ++ ++static bool ++fold_file (char const *filename, size_t width) ++{ ++ FILE *istream; ++ int saved_errno; ++ ++ if (STREQ (filename, "-")) ++ { ++ istream = stdin; ++ have_read_stdin = 1; ++ } ++ else ++ istream = fopen (filename, "r"); ++ ++ if (istream == NULL) ++ { ++ error (0, errno, "%s", filename); ++ return 1; ++ } ++ ++ /* Define how ISTREAM is being folded. */ ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ fold_multibyte_text (istream, width, &saved_errno); ++ else ++#endif ++ fold_text (istream, width, &saved_errno); ++ + if (STREQ (filename, "-")) + clearerr (istream); + else if (fclose (istream) != 0 && !saved_errno) +@@ -252,7 +501,8 @@ main (int argc, char **argv) + + atexit (close_stdout); + +- break_spaces = count_bytes = have_read_stdin = false; ++ operating_mode = column_mode; ++ break_spaces = have_read_stdin = false; + + while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) + { +@@ -261,7 +511,15 @@ main (int argc, char **argv) + switch (optc) + { + case 'b': /* Count bytes rather than columns. */ +- count_bytes = true; ++ if (operating_mode != column_mode) ++ FATAL_ERROR (_("only one way of folding may be specified")); ++ operating_mode = byte_mode; ++ break; ++ ++ case 'c': ++ if (operating_mode != column_mode) ++ FATAL_ERROR (_("only one way of folding may be specified")); ++ operating_mode = character_mode; + break; + + case 's': /* Break at word boundaries. */ +diff --git a/src/join.c b/src/join.c +index f2fd172..6c7d1ed 100644 +--- a/src/join.c ++++ b/src/join.c +@@ -22,19 +22,33 @@ + #include + #include + ++/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */ ++#if HAVE_WCHAR_H ++# include ++#endif ++ ++/* Get iswblank(), towupper. */ ++#if HAVE_WCTYPE_H ++# include ++#endif ++ + #include "system.h" + #include "die.h" + #include "error.h" + #include "fadvise.h" + #include "hard-locale.h" + #include "linebuffer.h" +-#include "memcasecmp.h" + #include "quote.h" + #include "stdio--.h" + #include "xmemcoll.h" + #include "xstrtol.h" + #include "argmatch.h" + ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ ++#if HAVE_MBRTOWC && defined mbstate_t ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) ++#endif ++ + /* The official name of this program (e.g., no 'g' prefix). */ + #define PROGRAM_NAME "join" + +@@ -136,10 +150,12 @@ static struct outlist outlist_head; + /* Last element in 'outlist', where a new element can be added. */ + static struct outlist *outlist_end = &outlist_head; + +-/* Tab character separating fields. If negative, fields are separated +- by any nonempty string of blanks, otherwise by exactly one +- tab character whose value (when cast to unsigned char) equals TAB. */ +-static int tab = -1; ++/* Tab character separating fields. If NULL, fields are separated ++ by any nonempty string of blanks. */ ++static char *tab = NULL; ++ ++/* The number of bytes used for tab. */ ++static size_t tablen = 0; + + /* If nonzero, check that the input is correctly ordered. */ + static enum +@@ -280,13 +296,14 @@ xfields (struct line *line) + if (ptr == lim) + return; + +- if (0 <= tab && tab != '\n') ++ if (tab != NULL) + { ++ unsigned char t = tab[0]; + char *sep; +- for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) ++ for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1) + extract_field (line, ptr, sep - ptr); + } +- else if (tab < 0) ++ else + { + /* Skip leading blanks before the first field. */ + while (field_sep (*ptr)) +@@ -310,6 +327,147 @@ xfields (struct line *line) + extract_field (line, ptr, lim - ptr); + } + ++#if HAVE_MBRTOWC ++static void ++xfields_multibyte (struct line *line) ++{ ++ char *ptr = line->buf.buffer; ++ char const *lim = ptr + line->buf.length - 1; ++ wchar_t wc = 0; ++ size_t mblength = 1; ++ mbstate_t state, state_bak; ++ ++ memset (&state, 0, sizeof (mbstate_t)); ++ ++ if (ptr >= lim) ++ return; ++ ++ if (tab != NULL) ++ { ++ char *sep = ptr; ++ for (; ptr < lim; ptr = sep + mblength) ++ { ++ sep = ptr; ++ while (sep < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (mblength == tablen && !memcmp (sep, tab, mblength)) ++ break; ++ else ++ { ++ sep += mblength; ++ continue; ++ } ++ } ++ ++ if (sep >= lim) ++ break; ++ ++ extract_field (line, ptr, sep - ptr); ++ } ++ } ++ else ++ { ++ /* Skip leading blanks before the first field. */ ++ while(ptr < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (!iswblank(wc) && wc != '\n') ++ break; ++ ptr += mblength; ++ } ++ ++ do ++ { ++ char *sep; ++ state_bak = state; ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ sep = ptr + mblength; ++ while (sep < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (iswblank (wc) || wc == '\n') ++ break; ++ ++ sep += mblength; ++ } ++ ++ extract_field (line, ptr, sep - ptr); ++ if (sep >= lim) ++ return; ++ ++ state_bak = state; ++ mblength = mbrtowc (&wc, sep, lim - sep + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ ptr = sep + mblength; ++ while (ptr < lim) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state); ++ if (mblength == (size_t)-1 || mblength == (size_t)-2) ++ { ++ mblength = 1; ++ state = state_bak; ++ break; ++ } ++ mblength = (mblength < 1) ? 1 : mblength; ++ ++ if (!iswblank (wc) && wc != '\n') ++ break; ++ ++ ptr += mblength; ++ } ++ } ++ while (ptr < lim); ++ } ++ ++ extract_field (line, ptr, lim - ptr); ++} ++#endif ++ + static void + freeline (struct line *line) + { +@@ -331,56 +489,133 @@ keycmp (struct line const *line1, struct line const *line2, + size_t jf_1, size_t jf_2) + { + /* Start of field to compare in each file. */ +- char *beg1; +- char *beg2; +- +- size_t len1; +- size_t len2; /* Length of fields to compare. */ ++ char *beg[2]; ++ char *copy[2]; ++ size_t len[2]; /* Length of fields to compare. */ + int diff; ++ int i, j; ++ int mallocd = 0; + + if (jf_1 < line1->nfields) + { +- beg1 = line1->fields[jf_1].beg; +- len1 = line1->fields[jf_1].len; ++ beg[0] = line1->fields[jf_1].beg; ++ len[0] = line1->fields[jf_1].len; + } + else + { +- beg1 = NULL; +- len1 = 0; ++ beg[0] = NULL; ++ len[0] = 0; + } + + if (jf_2 < line2->nfields) + { +- beg2 = line2->fields[jf_2].beg; +- len2 = line2->fields[jf_2].len; ++ beg[1] = line2->fields[jf_2].beg; ++ len[1] = line2->fields[jf_2].len; + } + else + { +- beg2 = NULL; +- len2 = 0; ++ beg[1] = NULL; ++ len[1] = 0; + } + +- if (len1 == 0) +- return len2 == 0 ? 0 : -1; +- if (len2 == 0) ++ if (len[0] == 0) ++ return len[1] == 0 ? 0 : -1; ++ if (len[1] == 0) + return 1; + + if (ignore_case) + { +- /* FIXME: ignore_case does not work with NLS (in particular, +- with multibyte chars). */ +- diff = memcasecmp (beg1, beg2, MIN (len1, len2)); ++#ifdef HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ size_t mblength; ++ wchar_t wc, uwc; ++ mbstate_t state, state_bak; ++ ++ memset (&state, '\0', sizeof (mbstate_t)); ++ ++ for (i = 0; i < 2; i++) ++ { ++ mallocd = 1; ++ copy[i] = xmalloc (len[i] + 1); ++ memset (copy[i], '\0',len[i] + 1); ++ ++ for (j = 0; j < MIN (len[0], len[1]);) ++ { ++ state_bak = state; ++ mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state); ++ ++ switch (mblength) ++ { ++ case (size_t) -1: ++ case (size_t) -2: ++ state = state_bak; ++ /* Fall through */ ++ case 0: ++ mblength = 1; ++ break; ++ ++ default: ++ uwc = towupper (wc); ++ ++ if (uwc != wc) ++ { ++ mbstate_t state_wc; ++ size_t mblen; ++ ++ memset (&state_wc, '\0', sizeof (mbstate_t)); ++ mblen = wcrtomb (copy[i] + j, uwc, &state_wc); ++ assert (mblen != (size_t)-1); ++ } ++ else ++ memcpy (copy[i] + j, beg[i] + j, mblength); ++ } ++ j += mblength; ++ } ++ copy[i][j] = '\0'; ++ } ++ } ++ else ++#endif ++ { ++ for (i = 0; i < 2; i++) ++ { ++ mallocd = 1; ++ copy[i] = xmalloc (len[i] + 1); ++ ++ for (j = 0; j < MIN (len[0], len[1]); j++) ++ copy[i][j] = toupper (beg[i][j]); ++ ++ copy[i][j] = '\0'; ++ } ++ } + } + else + { +- if (hard_LC_COLLATE) +- return xmemcoll (beg1, len1, beg2, len2); +- diff = memcmp (beg1, beg2, MIN (len1, len2)); ++ copy[0] = beg[0]; ++ copy[1] = beg[1]; + } + ++ if (hard_LC_COLLATE) ++ { ++ diff = xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]); ++ ++ if (mallocd) ++ for (i = 0; i < 2; i++) ++ free (copy[i]); ++ ++ return diff; ++ } ++ diff = memcmp (copy[0], copy[1], MIN (len[0], len[1])); ++ ++ if (mallocd) ++ for (i = 0; i < 2; i++) ++ free (copy[i]); ++ ++ + if (diff) + return diff; +- return len1 < len2 ? -1 : len1 != len2; ++ return len[0] - len[1]; + } + + /* Check that successive input lines PREV and CURRENT from input file +@@ -472,6 +707,11 @@ get_line (FILE *fp, struct line **linep, int which) + } + ++line_no[which - 1]; + ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ xfields_multibyte (line); ++ else ++#endif + xfields (line); + + if (prevline[which - 1]) +@@ -567,21 +807,28 @@ prfield (size_t n, struct line const *line) + + /* Output all the fields in line, other than the join field. */ + ++#define PUT_TAB_CHAR \ ++ do \ ++ { \ ++ (tab != NULL) ? \ ++ fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \ ++ } \ ++ while (0) ++ + static void + prfields (struct line const *line, size_t join_field, size_t autocount) + { + size_t i; + size_t nfields = autoformat ? autocount : line->nfields; +- char output_separator = tab < 0 ? ' ' : tab; + + for (i = 0; i < join_field && i < nfields; ++i) + { +- putchar (output_separator); ++ PUT_TAB_CHAR; + prfield (i, line); + } + for (i = join_field + 1; i < nfields; ++i) + { +- putchar (output_separator); ++ PUT_TAB_CHAR; + prfield (i, line); + } + } +@@ -592,7 +839,6 @@ static void + prjoin (struct line const *line1, struct line const *line2) + { + const struct outlist *outlist; +- char output_separator = tab < 0 ? ' ' : tab; + size_t field; + struct line const *line; + +@@ -626,7 +872,7 @@ prjoin (struct line const *line1, struct line const *line2) + o = o->next; + if (o == NULL) + break; +- putchar (output_separator); ++ PUT_TAB_CHAR; + } + putchar (eolchar); + } +@@ -1102,20 +1348,43 @@ main (int argc, char **argv) + + case 't': + { +- unsigned char newtab = optarg[0]; ++ char *newtab = NULL; ++ size_t newtablen; ++ newtab = xstrdup (optarg); ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ mbstate_t state; ++ ++ memset (&state, 0, sizeof (mbstate_t)); ++ newtablen = mbrtowc (NULL, newtab, ++ strnlen (newtab, MB_LEN_MAX), ++ &state); ++ if (newtablen == (size_t) 0 ++ || newtablen == (size_t) -1 ++ || newtablen == (size_t) -2) ++ newtablen = 1; ++ } ++ else ++#endif ++ newtablen = 1; + if (! newtab) +- newtab = '\n'; /* '' => process the whole line. */ ++ newtab = (char*)"\n"; /* '' => process the whole line. */ + else if (optarg[1]) + { +- if (STREQ (optarg, "\\0")) +- newtab = '\0'; +- else +- die (EXIT_FAILURE, 0, _("multi-character tab %s"), +- quote (optarg)); ++ if (newtablen == 1 && newtab[1]) ++ { ++ if (STREQ (newtab, "\\0")) ++ newtab[0] = '\0'; ++ } ++ } ++ if (tab != NULL && strcmp (tab, newtab)) ++ { ++ free (newtab); ++ die (EXIT_FAILURE, 0, _("incompatible tabs")); + } +- if (0 <= tab && tab != newtab) +- die (EXIT_FAILURE, 0, _("incompatible tabs")); + tab = newtab; ++ tablen = newtablen; + } + break; + diff --git a/src/local.mk b/src/local.mk -index a8ad6b4..b0e61ec 100644 +index e1d15ce..1a5ffaa 100644 --- a/src/local.mk +++ b/src/local.mk -@@ -490,8 +490,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) +@@ -434,8 +434,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) src_basenc_SOURCES = src/basenc.c src_basenc_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS) -src_expand_SOURCES = src/expand.c src/expand-common.c -src_unexpand_SOURCES = src/unexpand.c src/expand-common.c -+src_expand_SOURCES = src/expand.c src/expand-common.c lib/mbfile.c lib/mbchar.c -+src_unexpand_SOURCES = src/unexpand.c src/expand-common.c lib/mbfile.c lib/mbchar.c ++src_expand_SOURCES = src/expand.c src/expand-common.c lib/mbfile.c ++src_unexpand_SOURCES = src/unexpand.c src/expand-common.c lib/mbfile.c src_wc_SOURCES = src/wc.c - if USE_AVX512_WC_LINECOUNT + if USE_AVX2_WC_LINECOUNT diff --git a/src/pr.c b/src/pr.c -index 10b8c52..079c86c 100644 +index 4c17c00..b4fab1c 100644 --- a/src/pr.c +++ b/src/pr.c -@@ -312,6 +312,24 @@ - #include +@@ -311,6 +311,24 @@ + #include #include + @@ -1443,8 +2297,8 @@ index 10b8c52..079c86c 100644 +#endif + #include "system.h" - #include "c-ctype.h" - #include "fadvise.h" + #include "die.h" + #include "error.h" @@ -325,6 +343,18 @@ #include "xstrtol-error.h" #include "xdectoint.h" @@ -1486,15 +2340,15 @@ index 10b8c52..079c86c 100644 static bool read_line (COLUMN *p); static bool print_page (void); static bool print_stored (COLUMN *p); -@@ -428,6 +471,7 @@ static void pad_across_to (int position); - static void add_line_number (COLUMN *p); - static int getoptnum (char const *n_str, int min, char const *errfmt); +@@ -429,6 +472,7 @@ static void add_line_number (COLUMN *p); + static void getoptnum (char const *n_str, int min, int *num, + char const *errfmt); static void getoptarg (char *arg, char switch_char, char *character, + int *character_length, int *character_width, int *number); static void print_files (int number_of_files, char **av); static void init_parameters (int number_of_files); -@@ -441,7 +485,6 @@ static void store_char (char c); +@@ -442,7 +486,6 @@ static void store_char (char c); static void pad_down (unsigned int lines); static void read_rest_of_line (COLUMN *p); static void skip_read (COLUMN *p, int column_number); @@ -1502,7 +2356,7 @@ index 10b8c52..079c86c 100644 static void cleanup (void); static void print_sep_string (void); static void separator_string (char const *optarg_S); -@@ -453,7 +496,7 @@ static COLUMN *column_vector; +@@ -454,7 +497,7 @@ static COLUMN *column_vector; we store the leftmost columns contiguously in buff. To print a line from buff, get the index of the first character from line_vector[i], and print up to line_vector[i + 1]. */ @@ -1511,7 +2365,7 @@ index 10b8c52..079c86c 100644 /* Index of the position in buff where the next character will be stored. */ -@@ -557,7 +600,7 @@ static int chars_per_column; +@@ -558,7 +601,7 @@ static int chars_per_column; static bool untabify_input = false; /* (-e) The input tab character. */ @@ -1520,7 +2374,7 @@ index 10b8c52..079c86c 100644 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... where the leftmost column is 1. */ -@@ -567,7 +610,10 @@ static int chars_per_input_tab = 8; +@@ -568,7 +611,10 @@ static int chars_per_input_tab = 8; static bool tabify_output = false; /* (-i) The output tab character. */ @@ -1532,7 +2386,7 @@ index 10b8c52..079c86c 100644 /* (-i) The width of the output tab. */ static int chars_per_output_tab = 8; -@@ -637,7 +683,13 @@ static int line_number; +@@ -638,7 +684,13 @@ static int line_number; static bool numbered_lines = false; /* (-n) Character which follows each line number. */ @@ -1547,7 +2401,7 @@ index 10b8c52..079c86c 100644 /* (-n) line counting starts with 1st line of input file (not with 1st line of 1st page printed). */ -@@ -690,6 +742,7 @@ static bool use_col_separator = false; +@@ -691,6 +743,7 @@ static bool use_col_separator = false; -a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */ static char const *col_sep_string = ""; static int col_sep_length = 0; @@ -1555,7 +2409,7 @@ index 10b8c52..079c86c 100644 static char *column_separator = (char *) " "; static char *line_separator = (char *) "\t"; -@@ -852,6 +905,13 @@ separator_string (char const *optarg_S) +@@ -853,6 +906,13 @@ separator_string (char const *optarg_S) integer_overflow (); col_sep_length = len; col_sep_string = optarg_S; @@ -1569,7 +2423,7 @@ index 10b8c52..079c86c 100644 } int -@@ -876,6 +936,21 @@ main (int argc, char **argv) +@@ -877,6 +937,21 @@ main (int argc, char **argv) atexit (close_stdout); @@ -1591,7 +2445,7 @@ index 10b8c52..079c86c 100644 n_files = 0; file_names = (argc > 1 ? xnmalloc (argc - 1, sizeof (char *)) -@@ -952,8 +1027,12 @@ main (int argc, char **argv) +@@ -953,8 +1028,12 @@ main (int argc, char **argv) break; case 'e': if (optarg) @@ -1606,7 +2460,7 @@ index 10b8c52..079c86c 100644 /* Could check tab width > 0. */ untabify_input = true; break; -@@ -966,8 +1045,12 @@ main (int argc, char **argv) +@@ -967,8 +1046,12 @@ main (int argc, char **argv) break; case 'i': if (optarg) @@ -1632,7 +2486,7 @@ index 10b8c52..079c86c 100644 break; case 'N': skip_count = false; -@@ -1013,6 +1096,7 @@ main (int argc, char **argv) +@@ -1012,6 +1095,7 @@ main (int argc, char **argv) /* Reset an additional input of -s, -S dominates -s */ col_sep_string = ""; col_sep_length = 0; @@ -1640,7 +2494,7 @@ index 10b8c52..079c86c 100644 use_col_separator = true; if (optarg) separator_string (optarg); -@@ -1168,7 +1252,8 @@ getoptnum (char const *n_str, int min, char const *err) +@@ -1166,10 +1250,45 @@ getoptnum (char const *n_str, int min, int *num, char const *err) a number. */ static void @@ -1648,12 +2502,7 @@ index 10b8c52..079c86c 100644 +getoptarg (char *arg, char switch_char, char *character, int *character_length, + int *character_width, int *number) { - if (!*arg) - { -@@ -1177,7 +1262,41 @@ getoptarg (char *arg, char switch_char, char *character, int *number) - } - - if (!c_isdigit (*arg)) + if (!ISDIGIT (*arg)) - *character = *arg++; + { +#ifdef HAVE_MBRTOWC @@ -1693,7 +2542,7 @@ index 10b8c52..079c86c 100644 if (*arg) { long int tmp_long; -@@ -1206,6 +1325,11 @@ static void +@@ -1191,6 +1310,11 @@ static void init_parameters (int number_of_files) { int chars_used_by_number = 0; @@ -1705,7 +2554,7 @@ index 10b8c52..079c86c 100644 lines_per_body = lines_per_page - lines_per_header - lines_per_footer; if (lines_per_body <= 0) -@@ -1243,7 +1367,7 @@ init_parameters (int number_of_files) +@@ -1228,7 +1352,7 @@ init_parameters (int number_of_files) else col_sep_string = column_separator; @@ -1714,7 +2563,7 @@ index 10b8c52..079c86c 100644 use_col_separator = true; } /* It's rather pointless to define a TAB separator with column -@@ -1275,11 +1399,11 @@ init_parameters (int number_of_files) +@@ -1260,11 +1384,11 @@ init_parameters (int number_of_files) + TAB_WIDTH (chars_per_input_tab, chars_per_number); */ /* Estimate chars_per_text without any margin and keep it constant. */ @@ -1728,16 +2577,16 @@ index 10b8c52..079c86c 100644 /* The number is part of the column width unless we are printing files in parallel. */ -@@ -1288,7 +1412,7 @@ init_parameters (int number_of_files) +@@ -1273,7 +1397,7 @@ init_parameters (int number_of_files) } int sep_chars, useful_chars; -- if (ckd_mul (&sep_chars, columns - 1, col_sep_length)) -+ if (ckd_mul (&sep_chars, columns - 1, col_sep_width)) +- if (INT_MULTIPLY_WRAPV (columns - 1, col_sep_length, &sep_chars)) ++ if (INT_MULTIPLY_WRAPV (columns - 1, col_sep_width, &sep_chars)) sep_chars = INT_MAX; - if (ckd_sub (&useful_chars, chars_per_line - chars_used_by_number, - sep_chars)) -@@ -1311,7 +1435,7 @@ init_parameters (int number_of_files) + if (INT_SUBTRACT_WRAPV (chars_per_line - chars_used_by_number, sep_chars, + &useful_chars)) +@@ -1296,7 +1420,7 @@ init_parameters (int number_of_files) We've to use 8 as the lower limit, if we use chars_per_default_tab = 8 to expand a tab which is not an input_tab-char. */ free (clump_buff); @@ -1746,7 +2595,7 @@ index 10b8c52..079c86c 100644 } /* Open the necessary files, -@@ -1417,7 +1541,7 @@ init_funcs (void) +@@ -1402,7 +1526,7 @@ init_funcs (void) /* Enlarge p->start_position of first column to use the same form of padding_not_printed with all columns. */ @@ -1755,7 +2604,7 @@ index 10b8c52..079c86c 100644 /* This loop takes care of all but the rightmost column. */ -@@ -1451,7 +1575,7 @@ init_funcs (void) +@@ -1436,7 +1560,7 @@ init_funcs (void) } else { @@ -1764,7 +2613,7 @@ index 10b8c52..079c86c 100644 h_next = h + chars_per_column; } } -@@ -1751,9 +1875,9 @@ static void +@@ -1733,9 +1857,9 @@ static void align_column (COLUMN *p) { padding_not_printed = p->start_position; @@ -1776,9 +2625,9 @@ index 10b8c52..079c86c 100644 padding_not_printed = ANYWHERE; } -@@ -2030,13 +2154,13 @@ store_char (char c) +@@ -2010,13 +2134,13 @@ store_char (char c) /* May be too generous. */ - buff = xpalloc (buff, &buff_allocated, 1, -1, sizeof *buff); + buff = X2REALLOC (buff, &buff_allocated); } - buff[buff_current++] = c; + buff[buff_current++] = (unsigned char) c; @@ -1792,7 +2641,7 @@ index 10b8c52..079c86c 100644 char *s; int num_width; -@@ -2053,22 +2177,24 @@ add_line_number (COLUMN *p) +@@ -2033,22 +2157,24 @@ add_line_number (COLUMN *p) /* Tabification is assumed for multiple columns, also for n-separators, but 'default n-separator = TAB' hasn't been given priority over equal column_width also specified by POSIX. */ @@ -1821,7 +2670,7 @@ index 10b8c52..079c86c 100644 output_position = POS_AFTER_TAB (chars_per_output_tab, output_position); } -@@ -2227,7 +2353,7 @@ print_white_space (void) +@@ -2207,7 +2333,7 @@ print_white_space (void) while (goal - h_old > 1 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) { @@ -1830,7 +2679,7 @@ index 10b8c52..079c86c 100644 h_old = h_new; } while (++h_old <= goal) -@@ -2247,6 +2373,7 @@ print_sep_string (void) +@@ -2227,6 +2353,7 @@ print_sep_string (void) { char const *s = col_sep_string; int l = col_sep_length; @@ -1838,7 +2687,7 @@ index 10b8c52..079c86c 100644 if (separators_not_printed <= 0) { -@@ -2258,6 +2385,7 @@ print_sep_string (void) +@@ -2238,6 +2365,7 @@ print_sep_string (void) { for (; separators_not_printed > 0; --separators_not_printed) { @@ -1846,7 +2695,7 @@ index 10b8c52..079c86c 100644 while (l-- > 0) { /* 3 types of sep_strings: spaces only, spaces and chars, -@@ -2271,12 +2399,15 @@ print_sep_string (void) +@@ -2251,12 +2379,15 @@ print_sep_string (void) } else { @@ -1863,7 +2712,7 @@ index 10b8c52..079c86c 100644 /* sep_string ends with some spaces */ if (spaces_not_printed > 0) print_white_space (); -@@ -2307,7 +2438,7 @@ print_clump (COLUMN *p, int n, char *clump) +@@ -2284,7 +2415,7 @@ print_clump (COLUMN *p, int n, char *clump) required number of tabs and spaces. */ static void @@ -1872,7 +2721,7 @@ index 10b8c52..079c86c 100644 { if (tabify_output) { -@@ -2331,6 +2462,74 @@ print_char (char c) +@@ -2308,6 +2439,74 @@ print_char (char c) putchar (c); } @@ -1947,7 +2796,7 @@ index 10b8c52..079c86c 100644 /* Skip to page PAGE before printing. PAGE may be larger than total number of pages. */ -@@ -2507,9 +2706,9 @@ read_line (COLUMN *p) +@@ -2485,9 +2684,9 @@ read_line (COLUMN *p) align_empty_cols = false; } @@ -1959,7 +2808,7 @@ index 10b8c52..079c86c 100644 padding_not_printed = ANYWHERE; } -@@ -2578,7 +2777,7 @@ print_stored (COLUMN *p) +@@ -2556,7 +2755,7 @@ print_stored (COLUMN *p) COLUMN *q; int line = p->current_line++; @@ -1968,7 +2817,7 @@ index 10b8c52..079c86c 100644 /* FIXME UMR: Uninitialized memory read: * This is occurring while in: -@@ -2590,7 +2789,7 @@ print_stored (COLUMN *p) +@@ -2568,7 +2767,7 @@ print_stored (COLUMN *p) xmalloc [xmalloc.c:94] init_store_cols [pr.c:1648] */ @@ -1977,7 +2826,7 @@ index 10b8c52..079c86c 100644 pad_vertically = true; -@@ -2610,9 +2809,9 @@ print_stored (COLUMN *p) +@@ -2588,9 +2787,9 @@ print_stored (COLUMN *p) } } @@ -1989,7 +2838,7 @@ index 10b8c52..079c86c 100644 padding_not_printed = ANYWHERE; } -@@ -2625,8 +2824,8 @@ print_stored (COLUMN *p) +@@ -2603,8 +2802,8 @@ print_stored (COLUMN *p) if (spaces_not_printed == 0) { output_position = p->start_position + end_vector[line]; @@ -2000,7 +2849,7 @@ index 10b8c52..079c86c 100644 } return true; -@@ -2645,7 +2844,7 @@ print_stored (COLUMN *p) +@@ -2623,7 +2822,7 @@ print_stored (COLUMN *p) number of characters is 1.) */ static int @@ -2009,7 +2858,7 @@ index 10b8c52..079c86c 100644 { unsigned char uc = c; char *s = clump_buff; -@@ -2655,10 +2854,10 @@ char_to_clump (char c) +@@ -2633,10 +2832,10 @@ char_to_clump (char c) int chars; int chars_per_c = 8; @@ -2022,7 +2871,7 @@ index 10b8c52..079c86c 100644 { width = TAB_WIDTH (chars_per_c, input_position); -@@ -2739,6 +2938,164 @@ char_to_clump (char c) +@@ -2717,6 +2916,164 @@ char_to_clump (char c) return chars; } @@ -2188,14 +3037,13 @@ index 10b8c52..079c86c 100644 looking for more options and printing the next batch of files. diff --git a/src/sort.c b/src/sort.c -index 05d00cc..eb51f20 100644 +index 3b775d6..a0ba243 100644 --- a/src/sort.c +++ b/src/sort.c -@@ -30,6 +30,15 @@ +@@ -29,6 +29,14 @@ #include #include - #include -+ + #include +#if HAVE_WCHAR_H +# include +#endif @@ -2206,8 +3054,8 @@ index 05d00cc..eb51f20 100644 + #include "system.h" #include "argmatch.h" - #include "assure.h" -@@ -160,14 +169,39 @@ static int thousands_sep; + #include "die.h" +@@ -159,14 +167,39 @@ static int thousands_sep; /* We currently ignore multi-byte grouping chars. */ static bool thousands_sep_ignored; @@ -2248,7 +3096,7 @@ index 05d00cc..eb51f20 100644 /* The kind of blanks for '-b' to skip in various options. */ enum blanktype { bl_start, bl_end, bl_both }; -@@ -344,13 +378,11 @@ static bool stable; +@@ -343,13 +376,11 @@ static bool stable; /* An int value outside char range. */ enum { NON_CHAR = CHAR_MAX + 1 }; @@ -2265,9 +3113,9 @@ index 05d00cc..eb51f20 100644 /* Flag to remove consecutive duplicate lines from the output. Only the last of a sequence of equal lines will be output. */ -@@ -386,6 +418,46 @@ struct tempnode - static struct tempnode *volatile temphead; - static struct tempnode *volatile *temptail = &temphead; +@@ -805,6 +836,46 @@ reap_all (void) + reap (-1); + } +/* Function pointers. */ +static void @@ -2312,7 +3160,7 @@ index 05d00cc..eb51f20 100644 /* Clean up any remaining temporary files. */ static void -@@ -1343,7 +1415,7 @@ zaptemp (char const *name) +@@ -1272,7 +1343,7 @@ zaptemp (char const *name) free (node); } @@ -2321,7 +3169,7 @@ index 05d00cc..eb51f20 100644 static int struct_month_cmp (void const *m1, void const *m2) -@@ -1358,7 +1430,7 @@ struct_month_cmp (void const *m1, void const *m2) +@@ -1287,7 +1358,7 @@ struct_month_cmp (void const *m1, void const *m2) /* Initialize the character class tables. */ static void @@ -2330,7 +3178,7 @@ index 05d00cc..eb51f20 100644 { size_t i; -@@ -1370,7 +1442,7 @@ inittables (void) +@@ -1299,7 +1370,7 @@ inittables (void) fold_toupper[i] = toupper (i); } @@ -2339,7 +3187,7 @@ index 05d00cc..eb51f20 100644 /* If we're not in the "C" locale, read different names for months. */ if (hard_LC_TIME) { -@@ -1450,6 +1522,84 @@ specify_nmerge (int oi, char c, char const *s) +@@ -1381,6 +1452,84 @@ specify_nmerge (int oi, char c, char const *s) xstrtol_fatal (e, oi, c, long_options, s); } @@ -2424,7 +3272,7 @@ index 05d00cc..eb51f20 100644 /* Specify the amount of main memory to use when sorting. */ static void specify_sort_size (int oi, char c, char const *s) -@@ -1676,7 +1826,7 @@ buffer_linelim (struct buffer const *buf) +@@ -1612,7 +1761,7 @@ buffer_linelim (struct buffer const *buf) by KEY in LINE. */ static char * @@ -2433,7 +3281,7 @@ index 05d00cc..eb51f20 100644 { char *ptr = line->text, *lim = ptr + line->length - 1; size_t sword = key->sword; -@@ -1685,10 +1835,10 @@ begfield (struct line const *line, struct keyfield const *key) +@@ -1621,10 +1770,10 @@ begfield (struct line const *line, struct keyfield const *key) /* The leading field separator itself is included in a field when -t is absent. */ @@ -2446,7 +3294,7 @@ index 05d00cc..eb51f20 100644 ++ptr; if (ptr < lim) ++ptr; -@@ -1718,12 +1868,71 @@ begfield (struct line const *line, struct keyfield const *key) +@@ -1650,12 +1799,71 @@ begfield (struct line const *line, struct keyfield const *key) return ptr; } @@ -2519,7 +3367,7 @@ index 05d00cc..eb51f20 100644 { char *ptr = line->text, *lim = ptr + line->length - 1; size_t eword = key->eword, echar = key->echar; -@@ -1738,10 +1947,10 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1670,10 +1878,10 @@ limfield (struct line const *line, struct keyfield const *key) 'beginning' is the first character following the delimiting TAB. Otherwise, leave PTR pointing at the first 'blank' character after the preceding field. */ @@ -2532,7 +3380,7 @@ index 05d00cc..eb51f20 100644 ++ptr; if (ptr < lim && (eword || echar)) ++ptr; -@@ -1787,10 +1996,10 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1719,10 +1927,10 @@ limfield (struct line const *line, struct keyfield const *key) */ /* Make LIM point to the end of (one byte past) the current field. */ @@ -2545,7 +3393,7 @@ index 05d00cc..eb51f20 100644 if (newlim) lim = newlim; } -@@ -1825,6 +2034,130 @@ limfield (struct line const *line, struct keyfield const *key) +@@ -1753,6 +1961,130 @@ limfield (struct line const *line, struct keyfield const *key) return ptr; } @@ -2676,7 +3524,7 @@ index 05d00cc..eb51f20 100644 /* Fill BUF reading from FP, moving buf->left bytes from the end of buf->buf to the beginning first. If EOF is reached and the file wasn't terminated by a newline, supply one. Set up BUF's line -@@ -1911,8 +2244,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) +@@ -1839,8 +2171,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file) else { if (key->skipsblanks) @@ -2701,7 +3549,7 @@ index 05d00cc..eb51f20 100644 line->keybeg = line_start; } } -@@ -2050,12 +2397,10 @@ find_unit_order (char const *number) +@@ -1976,12 +2322,10 @@ find_unit_order (char const *number) ATTRIBUTE_PURE static int @@ -2717,7 +3565,7 @@ index 05d00cc..eb51f20 100644 int diff = find_unit_order (a) - find_unit_order (b); return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep)); -@@ -2067,7 +2412,7 @@ human_numcompare (char const *a, char const *b) +@@ -1993,7 +2337,7 @@ human_numcompare (char const *a, char const *b) ATTRIBUTE_PURE static int @@ -2726,7 +3574,7 @@ index 05d00cc..eb51f20 100644 { while (blanks[to_uchar (*a)]) a++; -@@ -2077,6 +2422,25 @@ numcompare (char const *a, char const *b) +@@ -2003,6 +2347,25 @@ numcompare (char const *a, char const *b) return strnumcmp (a, b, decimal_point, thousands_sep); } @@ -2749,10 +3597,10 @@ index 05d00cc..eb51f20 100644 +} +#endif /* HAV_EMBRTOWC */ + - static int - nan_compare (long double a, long double b) - { -@@ -2118,7 +2482,7 @@ general_numcompare (char const *sa, char const *sb) + /* Work around a problem whereby the long double value returned by glibc's + strtold ("NaN", ...) contains uninitialized bits: clear all bytes of + A and B before calling strtold. FIXME: remove this function if +@@ -2053,7 +2416,7 @@ general_numcompare (char const *sa, char const *sb) Return 0 if the name in S is not recognized. */ static int @@ -2761,7 +3609,7 @@ index 05d00cc..eb51f20 100644 { size_t lo = 0; size_t hi = MONTHS_PER_YEAR; -@@ -2457,15 +2821,14 @@ debug_key (struct line const *line, struct keyfield const *key) +@@ -2329,15 +2692,14 @@ debug_key (struct line const *line, struct keyfield const *key) char saved = *lim; *lim = '\0'; @@ -2779,7 +3627,7 @@ index 05d00cc..eb51f20 100644 else if (key->general_numeric) ignore_value (strtold (beg, &tighter_lim)); else if (key->numeric || key->human_numeric) -@@ -2611,7 +2974,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2483,7 +2845,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) /* Warn about significant leading blanks. */ bool implicit_skip = key_numeric (key) || key->month; bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ @@ -2788,7 +3636,7 @@ index 05d00cc..eb51f20 100644 && ((!key->skipsblanks && !implicit_skip) || (!key->skipsblanks && key->schar) || (!key->skipeblanks && key->echar))) -@@ -2659,9 +3022,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2531,9 +2893,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) bool number_locale_warned = false; if (basic_numeric_field_span) { @@ -2801,7 +3649,7 @@ index 05d00cc..eb51f20 100644 { error (0, 0, _("field separator %s is treated as a " -@@ -2672,9 +3035,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2544,9 +2906,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) } if (basic_numeric_field_span || general_numeric_field_span) { @@ -2814,7 +3662,7 @@ index 05d00cc..eb51f20 100644 { error (0, 0, _("field separator %s is treated as a " -@@ -2682,19 +3045,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) +@@ -2554,19 +2916,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) quote (((char []) {decimal_point, 0}))); number_locale_warned = true; } @@ -2838,8 +3686,17 @@ index 05d00cc..eb51f20 100644 } } -@@ -2746,11 +3109,87 @@ diff_reversed (int diff, bool reversed) - return reversed ? _GL_CMP (0, diff) : diff; +@@ -2577,7 +2939,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) + { + error (0, 0, + _("%snumbers use %s as a decimal point in this locale"), +- tab == decimal_point ? "" : _("note "), ++ (tab_length && tab[0] == decimal_point) ? "" : _("note "), + quote (((char []) {decimal_point, 0}))); + + } +@@ -2610,11 +2972,87 @@ key_warnings (struct keyfield const *gkey, bool gkey_only) + error (0, 0, _("option '-r' only applies to last-resort comparison")); } +#if HAVE_MBRTOWC @@ -2927,17 +3784,17 @@ index 05d00cc..eb51f20 100644 { struct keyfield *key = keylist; -@@ -2831,7 +3270,7 @@ keycompare (struct line const *a, struct line const *b) +@@ -2699,7 +3137,7 @@ keycompare (struct line const *a, struct line const *b) else if (key->human_numeric) diff = human_numcompare (ta, tb); else if (key->month) -- diff = getmonth (ta, nullptr) - getmonth (tb, nullptr); -+ diff = getmonth (ta, tlena, nullptr) - getmonth (tb, tlenb, nullptr); +- diff = getmonth (ta, NULL) - getmonth (tb, NULL); ++ diff = getmonth (ta, tlena, NULL) - getmonth (tb, tlenb, NULL); else if (key->random) diff = compare_random (ta, tlena, tb, tlenb); else if (key->version) -@@ -2941,6 +3380,211 @@ keycompare (struct line const *a, struct line const *b) - return diff_reversed (diff, key->reverse); +@@ -2815,6 +3253,211 @@ keycompare (struct line const *a, struct line const *b) + return key->reverse ? -diff : diff; } +#if HAVE_MBRTOWC @@ -3148,7 +4005,7 @@ index 05d00cc..eb51f20 100644 /* Compare two lines A and B, returning negative, zero, or positive depending on whether A compares less than, equal to, or greater than B. */ -@@ -2968,7 +3612,7 @@ compare (struct line const *a, struct line const *b) +@@ -2842,7 +3485,7 @@ compare (struct line const *a, struct line const *b) diff = - NONZERO (blen); else if (blen == 0) diff = 1; @@ -3157,7 +4014,7 @@ index 05d00cc..eb51f20 100644 { /* xmemcoll0 is a performance enhancement as it will not unconditionally write '\0' after the -@@ -4340,6 +4984,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) +@@ -4226,6 +4869,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) break; case 'f': key->translate = fold_toupper; @@ -3165,7 +4022,7 @@ index 05d00cc..eb51f20 100644 break; case 'g': key->general_numeric = true; -@@ -4419,7 +5064,7 @@ main (int argc, char **argv) +@@ -4305,7 +4949,7 @@ main (int argc, char **argv) initialize_exit_failure (SORT_FAILURE); hard_LC_COLLATE = hard_locale (LC_COLLATE); @@ -3174,7 +4031,7 @@ index 05d00cc..eb51f20 100644 hard_LC_TIME = hard_locale (LC_TIME); #endif -@@ -4442,6 +5087,29 @@ main (int argc, char **argv) +@@ -4328,6 +4972,29 @@ main (int argc, char **argv) thousands_sep = NON_CHAR; } @@ -3204,7 +4061,7 @@ index 05d00cc..eb51f20 100644 have_read_stdin = false; inittables (); -@@ -4717,13 +5385,34 @@ main (int argc, char **argv) +@@ -4602,13 +5269,34 @@ main (int argc, char **argv) case 't': { @@ -3214,7 +4071,7 @@ index 05d00cc..eb51f20 100644 + size_t newtab_length = 1; + strncpy (newtab, optarg, MB_LEN_MAX); + if (! newtab[0]) - error (SORT_FAILURE, 0, _("empty tab")); + die (SORT_FAILURE, 0, _("empty tab")); - if (optarg[1]) +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) @@ -3237,20 +4094,20 @@ index 05d00cc..eb51f20 100644 +#endif + if (newtab_length == 1 && optarg[1]) { - if (streq (optarg, "\\0")) + if (STREQ (optarg, "\\0")) - newtab = '\0'; + newtab[0] = '\0'; else { /* Provoke with 'sort -txx'. Complain about -@@ -4734,9 +5423,11 @@ main (int argc, char **argv) - quote (optarg)); +@@ -4619,9 +5307,11 @@ main (int argc, char **argv) + quote (optarg)); } } - if (tab != TAB_DEFAULT && tab != newtab) + if (tab_length && (tab_length != newtab_length + || memcmp (tab, newtab, tab_length) != 0)) - error (SORT_FAILURE, 0, _("incompatible tabs")); + die (SORT_FAILURE, 0, _("incompatible tabs")); - tab = newtab; + memcpy (tab, newtab, newtab_length); + tab_length = newtab_length; @@ -3258,10 +4115,10 @@ index 05d00cc..eb51f20 100644 break; diff --git a/src/unexpand.c b/src/unexpand.c -index ff234d7..7c36ef6 100644 +index 7d6100f..04cd646 100644 --- a/src/unexpand.c +++ b/src/unexpand.c -@@ -39,6 +39,9 @@ +@@ -38,6 +38,9 @@ #include #include #include @@ -3269,12 +4126,12 @@ index ff234d7..7c36ef6 100644 +#include + #include "system.h" - #include "expand-common.h" + #include "die.h" -@@ -105,24 +108,46 @@ unexpand (void) +@@ -106,24 +109,47 @@ unexpand (void) { /* Input stream. */ - FILE *fp = next_file (nullptr); + FILE *fp = next_file (NULL); + mb_file_t mbf; /* The array of pending blanks. In non-POSIX locales, blanks can @@ -3291,27 +4148,28 @@ index ff234d7..7c36ef6 100644 if (!fp) return; - + mbf_init (mbf, fp); + found_bom=check_bom(fp,&mbf); + + if (using_utf_locale == false && found_bom == true) ++ { ++ /*try using some predefined locale */ + ++ if (set_utf_locale () != 0) + { -+ /* Try using some predefined locale */ -+ if (set_utf_locale () != 0) -+ { -+ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale")); -+ } ++ error (EXIT_FAILURE, errno, _("cannot set UTF-8 locale")); + } -+ ++ } /* The worst case is a non-blank character, then one blank, then a tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so allocate MAX_COLUMN_WIDTH bytes to store the blanks. */ -- pending_blank = ximalloc (max_column_width); -+ pending_blank = ximalloc (max_column_width * sizeof (mbf_char_t)); +- pending_blank = xmalloc (max_column_width); ++ pending_blank = xmalloc (max_column_width * sizeof (mbf_char_t)); + + if (found_bom == true) ++ { + print_bom(); ++ } while (true) { @@ -3321,7 +4179,7 @@ index ff234d7..7c36ef6 100644 /* If true, perform translations. */ bool convert = true; -@@ -156,12 +181,44 @@ unexpand (void) +@@ -157,12 +183,44 @@ unexpand (void) do { @@ -3369,10 +4227,10 @@ index ff234d7..7c36ef6 100644 if (blank) { -@@ -175,16 +232,16 @@ unexpand (void) +@@ -179,16 +237,16 @@ unexpand (void) + if (next_tab_column < column) + die (EXIT_FAILURE, 0, _("input line is too long")); - if (convert) - { - if (c == '\t') + if (mb_iseq (c, '\t')) { @@ -3389,7 +4247,7 @@ index ff234d7..7c36ef6 100644 if (! (prev_blank && column == next_tab_column)) { -@@ -192,13 +249,14 @@ unexpand (void) +@@ -196,13 +254,14 @@ unexpand (void) will be replaced by tabs. */ if (column == next_tab_column) one_blank_before_tab_stop = true; @@ -3406,7 +4264,7 @@ index ff234d7..7c36ef6 100644 } /* Discard pending blanks, unless it was a single -@@ -206,7 +264,7 @@ unexpand (void) +@@ -210,7 +269,7 @@ unexpand (void) pending = one_blank_before_tab_stop; } } @@ -3415,18 +4273,19 @@ index ff234d7..7c36ef6 100644 { /* Go back one column, and force recalculation of the next tab stop. */ -@@ -216,16 +274,20 @@ unexpand (void) +@@ -218,9 +277,9 @@ unexpand (void) + next_tab_column = column; + tab_index -= !!tab_index; } - else +- else ++ else if (!mb_iseq (c, '\n')) { - column++; -- if (!column) -+ const uintmax_t orig_column = column; + column += mb_width (c); -+ if (column < orig_column) - error (EXIT_FAILURE, 0, _("input line is too long")); + if (!column) + die (EXIT_FAILURE, 0, _("input line is too long")); } - +@@ -228,8 +287,11 @@ unexpand (void) if (pending) { if (pending > 1 && one_blank_before_tab_stop) @@ -3437,10 +4296,10 @@ index ff234d7..7c36ef6 100644 + for (int n = 0; n < pending; ++n) + mb_putc (pending_blank[n], stdout); + if (ferror (stdout)) - write_error (); + die (EXIT_FAILURE, errno, _("write error")); pending = 0; one_blank_before_tab_stop = false; -@@ -235,16 +297,17 @@ unexpand (void) +@@ -239,16 +301,17 @@ unexpand (void) convert &= convert_entire_line || blank; } @@ -3454,18 +4313,183 @@ index ff234d7..7c36ef6 100644 - if (putchar (c) < 0) + mb_putc (c, stdout); + if (ferror (stdout)) - write_error (); + die (EXIT_FAILURE, errno, _("write error")); } - while (c != '\n'); + while (!mb_iseq (c, '\n')); } } +diff --git a/src/uniq.c b/src/uniq.c +index e5996f0..871d47c 100644 +--- a/src/uniq.c ++++ b/src/uniq.c +@@ -21,6 +21,17 @@ + #include + #include + ++/* Get mbstate_t, mbrtowc(). */ ++#if HAVE_WCHAR_H ++# include ++#endif ++ ++/* Get isw* functions. */ ++#if HAVE_WCTYPE_H ++# include ++#endif ++#include ++ + #include "system.h" + #include "argmatch.h" + #include "linebuffer.h" +@@ -33,6 +44,18 @@ + #include "memcasecmp.h" + #include "quote.h" + ++/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC ++ installation; work around this configuration error. */ ++#if !defined MB_LEN_MAX || MB_LEN_MAX < 2 ++# define MB_LEN_MAX 16 ++#endif ++ ++/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ ++#if HAVE_MBRTOWC && defined mbstate_t ++# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) ++#endif ++ ++ + /* The official name of this program (e.g., no 'g' prefix). */ + #define PROGRAM_NAME "uniq" + +@@ -139,6 +162,10 @@ enum + GROUP_OPTION = CHAR_MAX + 1 + }; + ++/* Function pointers. */ ++static char * ++(*find_field) (struct linebuffer *line); ++ + static struct option const longopts[] = + { + {"count", no_argument, NULL, 'c'}, +@@ -254,7 +281,7 @@ size_opt (char const *opt, char const *msgid) + + ATTRIBUTE_PURE + static char * +-find_field (struct linebuffer const *line) ++find_field_uni (struct linebuffer *line) + { + size_t count; + char const *lp = line->buffer; +@@ -274,6 +301,83 @@ find_field (struct linebuffer const *line) + return line->buffer + i; + } + ++#if HAVE_MBRTOWC ++ ++# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \ ++ do \ ++ { \ ++ mbstate_t state_bak; \ ++ \ ++ CONVFAIL = 0; \ ++ state_bak = *STATEP; \ ++ \ ++ MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \ ++ \ ++ switch (MBLENGTH) \ ++ { \ ++ case (size_t)-2: \ ++ case (size_t)-1: \ ++ *STATEP = state_bak; \ ++ CONVFAIL++; \ ++ /* Fall through */ \ ++ case 0: \ ++ MBLENGTH = 1; \ ++ } \ ++ } \ ++ while (0) ++ ++static char * ++find_field_multi (struct linebuffer *line) ++{ ++ size_t count; ++ char *lp = line->buffer; ++ size_t size = line->length - 1; ++ size_t pos; ++ size_t mblength; ++ wchar_t wc; ++ mbstate_t *statep; ++ int convfail = 0; ++ ++ pos = 0; ++ statep = &(line->state); ++ ++ /* skip fields. */ ++ for (count = 0; count < skip_fields && pos < size; count++) ++ { ++ while (pos < size) ++ { ++ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); ++ ++ if (convfail || !(iswblank (wc) || wc == '\n')) ++ { ++ pos += mblength; ++ break; ++ } ++ pos += mblength; ++ } ++ ++ while (pos < size) ++ { ++ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); ++ ++ if (!convfail && (iswblank (wc) || wc == '\n')) ++ break; ++ ++ pos += mblength; ++ } ++ } ++ ++ /* skip fields. */ ++ for (count = 0; count < skip_chars && pos < size; count++) ++ { ++ MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail); ++ pos += mblength; ++ } ++ ++ return lp + pos; ++} ++#endif ++ + /* Return false if two strings OLD and NEW match, true if not. + OLD and NEW point not to the beginnings of the lines + but rather to the beginnings of the fields to compare. +@@ -494,6 +598,19 @@ main (int argc, char **argv) + + atexit (close_stdout); + ++#if HAVE_MBRTOWC ++ if (MB_CUR_MAX > 1) ++ { ++ find_field = find_field_multi; ++ } ++ else ++#endif ++ { ++ find_field = find_field_uni; ++ } ++ ++ ++ + skip_chars = 0; + skip_fields = 0; + check_chars = SIZE_MAX; diff --git a/tests/Coreutils.pm b/tests/Coreutils.pm -index b55fb9d..ac80f49 100644 +index fad7ab9..c9021a6 100644 --- a/tests/Coreutils.pm +++ b/tests/Coreutils.pm -@@ -269,6 +269,9 @@ sub run_tests ($$$$$) +@@ -264,6 +264,9 @@ sub run_tests ($$$$$) # Yes, this is an arbitrary limit. If it causes trouble, # consider removing it. my $max = 30; @@ -3476,7 +4500,7 @@ index b55fb9d..ac80f49 100644 { warn "$program_name: $test_name: test name is too long (> $max)\n"; diff --git a/tests/expand/mb.sh b/tests/expand/mb.sh -new file mode 100644 +new file mode 100755 index 0000000..dd6007c --- /dev/null +++ b/tests/expand/mb.sh @@ -3665,7 +4689,7 @@ index 0000000..dd6007c + +exit $fail diff --git a/tests/i18n/sort.sh b/tests/i18n/sort.sh -new file mode 100644 +new file mode 100755 index 0000000..26c95de --- /dev/null +++ b/tests/i18n/sort.sh @@ -3700,19 +4724,19 @@ index 0000000..26c95de + +Exit $fail diff --git a/tests/local.mk b/tests/local.mk -index 53fc53e..0148422 100644 +index 0f77786..dbe1843 100644 --- a/tests/local.mk +++ b/tests/local.mk -@@ -412,6 +412,8 @@ all_tests = \ - tests/sort/sort-field-limit.sh \ - tests/sort/sort-files0-from.pl \ - tests/sort/sort-float.sh \ +@@ -377,6 +377,8 @@ all_tests = \ + tests/misc/sort-discrim.sh \ + tests/misc/sort-files0-from.pl \ + tests/misc/sort-float.sh \ + tests/misc/sort-mb-tests.sh \ + tests/i18n/sort.sh \ - tests/sort/sort-h-thousands-sep.sh \ - tests/sort/sort-merge.pl \ - tests/sort/sort-merge-fdlimit.sh \ -@@ -618,6 +620,7 @@ all_tests = \ + tests/misc/sort-h-thousands-sep.sh \ + tests/misc/sort-merge.pl \ + tests/misc/sort-merge-fdlimit.sh \ +@@ -576,6 +578,7 @@ all_tests = \ tests/du/threshold.sh \ tests/du/trailing-slash.sh \ tests/du/two-args.sh \ @@ -3720,7 +4744,7 @@ index 53fc53e..0148422 100644 tests/id/gnu-zero-uids.sh \ tests/id/no-context.sh \ tests/id/context.sh \ -@@ -774,6 +777,7 @@ all_tests = \ +@@ -727,6 +730,7 @@ all_tests = \ tests/touch/read-only.sh \ tests/touch/relative.sh \ tests/touch/trailing-slash.sh \ @@ -3729,7 +4753,7 @@ index 53fc53e..0148422 100644 # See tests/factor/create-test.sh. diff --git a/tests/misc/expand.pl b/tests/misc/expand.pl -index 4b07210..68b9ea1 100755 +index 7a77e6f..27f6652 100755 --- a/tests/misc/expand.pl +++ b/tests/misc/expand.pl @@ -27,6 +27,15 @@ my $prog = 'expand'; @@ -3795,8 +4819,151 @@ index 4b07210..68b9ea1 100755 my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; +diff --git a/tests/misc/fold.pl b/tests/misc/fold.pl +index 2834f92..bc1616a 100755 +--- a/tests/misc/fold.pl ++++ b/tests/misc/fold.pl +@@ -20,9 +20,18 @@ use strict; + + (my $program_name = $0) =~ s|.*/||; + ++my $prog = 'fold'; ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + ++# uncommented to enable multibyte paths ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ + my @Tests = + ( + ['s1', '-w2 -s', {IN=>"a\t"}, {OUT=>"a\n\t"}], +@@ -31,9 +40,48 @@ my @Tests = + ['s4', '-w4 -s', {IN=>"abc ef\n"}, {OUT=>"abc \nef\n"}], + ); + ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether fold is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ ++@Tests = triple_test \@Tests; ++ ++# Remember that triple_test creates from each test with exactly one "IN" ++# file two more tests (.p and .r suffix on name) corresponding to reading ++# input from a file and from a pipe. The pipe-reading test would fail ++# due to a race condition about 1 in 20 times. ++# Remove the IN_PIPE version of the "output-is-input" test above. ++# The others aren't susceptible because they have three inputs each. ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + +-my $prog = 'fold'; + my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose); + exit $fail; +diff --git a/tests/misc/join.pl b/tests/misc/join.pl +index 06ad777..be40204 100755 +--- a/tests/misc/join.pl ++++ b/tests/misc/join.pl +@@ -25,6 +25,15 @@ my $limits = getlimits (); + + my $prog = 'join'; + ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ ++my $mb_locale; ++#Comment out next line to disable multibyte tests ++$mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ + my $delim = chr 0247; + sub t_subst ($) + { +@@ -333,8 +342,49 @@ foreach my $t (@tv) + push @Tests, $new_ent; + } + ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether join is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ #Adjust the output some error messages including test_name for mb ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR}} ++ (@new_t)) ++ { ++ my $sub2 = {ERR_SUBST => "s/$test_name-mb/$test_name/"}; ++ push @new_t, $sub2; ++ push @$t, $sub2; ++ } ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ + @Tests = triple_test \@Tests; + ++#skip invalid-j-mb test, it is failing because of the format ++@Tests = grep {$_->[0] ne 'invalid-j-mb'} @Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + diff --git a/tests/misc/sort-mb-tests.sh b/tests/misc/sort-mb-tests.sh -new file mode 100644 +new file mode 100755 index 0000000..11836ba --- /dev/null +++ b/tests/misc/sort-mb-tests.sh @@ -3846,136 +5013,10 @@ index 0000000..11836ba +compare exp out || { fail=1; cat out; } + +Exit $fail -diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl -index bb7469c..c1dec95 100755 ---- a/tests/misc/unexpand.pl -+++ b/tests/misc/unexpand.pl -@@ -27,6 +27,14 @@ my $limits = getlimits (); - - my $prog = 'unexpand'; - -+# comment out next line to disable multibyte tests -+my $mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - my @Tests = - ( - ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}], -@@ -132,6 +140,37 @@ my @Tests = - ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}], - ); - -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether unexpand is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ next if ($test_name =~ 'b-1'); -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ -+@Tests = triple_test \@Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - -diff --git a/tests/pr/pr-tests.pl b/tests/pr/pr-tests.pl -index 60e6106..3c64a08 100755 ---- a/tests/pr/pr-tests.pl -+++ b/tests/pr/pr-tests.pl -@@ -24,6 +24,15 @@ use strict; - my $prog = 'pr'; - my $normalize_strerror = "s/': .*/'/"; - -+my $mb_locale; -+#Uncomment the following line to enable multibyte tests -+$mb_locale = $ENV{LOCALE_FR_UTF8}; -+! defined $mb_locale || $mb_locale eq 'none' -+ and $mb_locale = 'C'; -+ -+my $try = "Try \`$prog --help' for more information.\n"; -+my $inval = "$prog: invalid byte, character or field list\n$try"; -+ - my @tv = ( - - # -b option is no longer an official option. But it's still working to -@@ -515,8 +524,48 @@ push @Tests, - {IN=>"x\tx\tx\tx\tx\nx\tx\tx\tx\tx\n"}, - {OUT=>"x\tx\tx\tx\tx\tx\tx\tx\tx\tx\n"} ]; - -+# Add _POSIX2_VERSION=199209 to the environment of each test -+# that uses an old-style option like +1. -+if ($mb_locale ne 'C') -+ { -+ # Duplicate each test vector, appending "-mb" to the test name and -+ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we -+ # provide coverage for the distro-added multi-byte code paths. -+ my @new; -+ foreach my $t (@Tests) -+ { -+ my @new_t = @$t; -+ my $test_name = shift @new_t; -+ -+ # Depending on whether pr is multi-byte-patched, -+ # it emits different diagnostics: -+ # non-MB: invalid byte or field list -+ # MB: invalid byte, character or field list -+ # Adjust the expected error output accordingly. -+ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} -+ (@new_t)) -+ { -+ my $sub = {ERR_SUBST => 's/, character//'}; -+ push @new_t, $sub; -+ push @$t, $sub; -+ } -+ #temporarily skip some failing tests -+ next if ($test_name =~ "col-0" or $test_name =~ "col-inval" or $test_name =~ "asan1"); -+ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; -+ } -+ push @Tests, @new; -+ } -+ - @Tests = triple_test \@Tests; - -+# Remember that triple_test creates from each test with exactly one "IN" -+# file two more tests (.p and .r suffix on name) corresponding to reading -+# input from a file and from a pipe. The pipe-reading test would fail -+# due to a race condition about 1 in 20 times. -+# Remove the IN_PIPE version of the "output-is-input" test above. -+# The others aren't susceptible because they have three inputs each. -+@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; -+ - my $save_temps = $ENV{DEBUG}; - my $verbose = $ENV{VERBOSE}; - -diff --git a/tests/sort/sort-merge.pl b/tests/sort/sort-merge.pl -index a3204d3..40942a5 100755 ---- a/tests/sort/sort-merge.pl -+++ b/tests/sort/sort-merge.pl +diff --git a/tests/misc/sort-merge.pl b/tests/misc/sort-merge.pl +index 7eb4574..eda884c 100755 +--- a/tests/misc/sort-merge.pl ++++ b/tests/misc/sort-merge.pl @@ -26,6 +26,15 @@ my $prog = 'sort'; # Turn off localization of executable's output. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; @@ -4032,10 +5073,10 @@ index a3204d3..40942a5 100755 my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; -diff --git a/tests/sort/sort.pl b/tests/sort/sort.pl -index 5fa9d52..a66952a 100755 ---- a/tests/sort/sort.pl -+++ b/tests/sort/sort.pl +diff --git a/tests/misc/sort.pl b/tests/misc/sort.pl +index 0b0adca..fd27821 100755 +--- a/tests/misc/sort.pl ++++ b/tests/misc/sort.pl @@ -24,10 +24,15 @@ my $prog = 'sort'; # Turn off localization of executable's output. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; @@ -4053,7 +5094,7 @@ index 5fa9d52..a66952a 100755 # Since each test is run with a file name and with redirected stdin, # the name in the diagnostic is either the file name or "-". # Normalize each diagnostic to use '-'. -@@ -428,6 +433,38 @@ foreach my $t (@Tests) +@@ -423,6 +428,38 @@ foreach my $t (@Tests) } } @@ -4092,7 +5133,7 @@ index 5fa9d52..a66952a 100755 @Tests = triple_test \@Tests; # Remember that triple_test creates from each test with exactly one "IN" -@@ -437,6 +474,7 @@ foreach my $t (@Tests) +@@ -432,6 +469,7 @@ foreach my $t (@Tests) # Remove the IN_PIPE version of the "output-is-input" test above. # The others aren't susceptible because they have three inputs each. @Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; @@ -4100,8 +5141,210 @@ index 5fa9d52..a66952a 100755 my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; +diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl +index 2e1906f..fe66012 100755 +--- a/tests/misc/unexpand.pl ++++ b/tests/misc/unexpand.pl +@@ -27,6 +27,14 @@ my $limits = getlimits (); + + my $prog = 'unexpand'; + ++# comment out next line to disable multibyte tests ++my $mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + my @Tests = + ( + ['a1', {IN=> ' 'x 1 ."y\n"}, {OUT=> ' 'x 1 ."y\n"}], +@@ -128,6 +136,37 @@ my @Tests = + ['ts2', '-t5,8', {IN=>"x\t \t y\n"}, {OUT=>"x\t\t y\n"}], + ); + ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether unexpand is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ next if ($test_name =~ 'b-1'); ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ ++@Tests = triple_test \@Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + +diff --git a/tests/misc/uniq.pl b/tests/misc/uniq.pl +index aa163cd..91d617d 100755 +--- a/tests/misc/uniq.pl ++++ b/tests/misc/uniq.pl +@@ -23,9 +23,17 @@ my $limits = getlimits (); + my $prog = 'uniq'; + my $try = "Try '$prog --help' for more information.\n"; + ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + # Turn off localization of executable's output. + @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; + ++my $mb_locale; ++#Comment out next line to disable multibyte tests ++$mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ + # When possible, create a "-z"-testing variant of each test. + sub add_z_variants($) + { +@@ -262,6 +270,53 @@ foreach my $t (@Tests) + and push @$t, {ENV=>'_POSIX2_VERSION=199209'}; + } + ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether uniq is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ # In test #145, replace the each ‘...’ by '...'. ++ if ($test_name =~ "145") ++ { ++ my $sub = { ERR_SUBST => "s/‘([^’]+)’/'\$1'/g"}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ next if ( $test_name =~ "schar" ++ or $test_name =~ "^obs-plus" ++ or $test_name =~ "119"); ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ ++# Remember that triple_test creates from each test with exactly one "IN" ++# file two more tests (.p and .r suffix on name) corresponding to reading ++# input from a file and from a pipe. The pipe-reading test would fail ++# due to a race condition about 1 in 20 times. ++# Remove the IN_PIPE version of the "output-is-input" test above. ++# The others aren't susceptible because they have three inputs each. ++ ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++ + @Tests = add_z_variants \@Tests; + @Tests = triple_test \@Tests; + +diff --git a/tests/pr/pr-tests.pl b/tests/pr/pr-tests.pl +index 7ac6d4c..ae6cc35 100755 +--- a/tests/pr/pr-tests.pl ++++ b/tests/pr/pr-tests.pl +@@ -24,6 +24,15 @@ use strict; + my $prog = 'pr'; + my $normalize_strerror = "s/': .*/'/"; + ++my $mb_locale; ++#Uncomment the following line to enable multibyte tests ++$mb_locale = $ENV{LOCALE_FR_UTF8}; ++! defined $mb_locale || $mb_locale eq 'none' ++ and $mb_locale = 'C'; ++ ++my $try = "Try \`$prog --help' for more information.\n"; ++my $inval = "$prog: invalid byte, character or field list\n$try"; ++ + my @tv = ( + + # -b option is no longer an official option. But it's still working to +@@ -512,8 +521,48 @@ push @Tests, + {IN=>"x\tx\tx\tx\tx\nx\tx\tx\tx\tx\n"}, + {OUT=>"x\tx\tx\tx\tx\tx\tx\tx\tx\tx\n"} ]; + ++# Add _POSIX2_VERSION=199209 to the environment of each test ++# that uses an old-style option like +1. ++if ($mb_locale ne 'C') ++ { ++ # Duplicate each test vector, appending "-mb" to the test name and ++ # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we ++ # provide coverage for the distro-added multi-byte code paths. ++ my @new; ++ foreach my $t (@Tests) ++ { ++ my @new_t = @$t; ++ my $test_name = shift @new_t; ++ ++ # Depending on whether pr is multi-byte-patched, ++ # it emits different diagnostics: ++ # non-MB: invalid byte or field list ++ # MB: invalid byte, character or field list ++ # Adjust the expected error output accordingly. ++ if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} ++ (@new_t)) ++ { ++ my $sub = {ERR_SUBST => 's/, character//'}; ++ push @new_t, $sub; ++ push @$t, $sub; ++ } ++ #temporarily skip some failing tests ++ next if ($test_name =~ "col-0" or $test_name =~ "col-inval" or $test_name =~ "asan1"); ++ push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; ++ } ++ push @Tests, @new; ++ } ++ + @Tests = triple_test \@Tests; + ++# Remember that triple_test creates from each test with exactly one "IN" ++# file two more tests (.p and .r suffix on name) corresponding to reading ++# input from a file and from a pipe. The pipe-reading test would fail ++# due to a race condition about 1 in 20 times. ++# Remove the IN_PIPE version of the "output-is-input" test above. ++# The others aren't susceptible because they have three inputs each. ++@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; ++ + my $save_temps = $ENV{DEBUG}; + my $verbose = $ENV{VERBOSE}; + diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh -new file mode 100644 +new file mode 100755 index 0000000..8a82d74 --- /dev/null +++ b/tests/unexpand/mb.sh @@ -4279,5 +5522,5 @@ index 0000000..8a82d74 +LC_ALL=C unexpand in in > out || fail=1 +compare exp out > /dev/null 2>&1 || fail=1 -- -2.52.0 +2.34.1 diff --git a/coreutils-provides.inc b/coreutils-provides.inc new file mode 100644 index 0000000..8314a4e --- /dev/null +++ b/coreutils-provides.inc @@ -0,0 +1,9 @@ +Provides: bundled(gnulib) + +# make it possible to install the latest available Adobe Reader RPM for Linux +Provides: /bin/cat +Provides: /bin/chmod +Provides: /bin/echo +Provides: /bin/ln +Provides: /bin/rm +Provides: /bin/touch diff --git a/coreutils-python3.patch b/coreutils-python3.patch deleted file mode 100644 index 447fdbc..0000000 --- a/coreutils-python3.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 8927d505ecb5334f09c48ef98ef1f464f581d0f7 Mon Sep 17 00:00:00 2001 -From: rpm-build -Date: Tue, 2 Apr 2024 14:11:26 +0100 -Subject: [PATCH] coreutils-python3.patch - ---- - init.cfg | 4 ++-- - tests/d_type-check | 2 +- - tests/du/move-dir-while-traversing.sh | 6 +++--- - 3 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/init.cfg b/init.cfg -index ac05f7b..26d9516 100644 ---- a/init.cfg -+++ b/init.cfg -@@ -601,10 +601,10 @@ seek_data_capable_() - # Skip the current test if "." lacks d_type support. - require_dirent_d_type_() - { -- python < /dev/null \ -+ python3 < /dev/null \ - || skip_ python missing: assuming no d_type support - -- python "$abs_srcdir"/tests/d_type-check \ -+ python3 "$abs_srcdir"/tests/d_type-check \ - || skip_ requires d_type support - } - -diff --git a/tests/d_type-check b/tests/d_type-check -index 1a2f76f..42d3924 100644 ---- a/tests/d_type-check -+++ b/tests/d_type-check -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/python3 - # Exit 0 if "." and "./tempfile" have useful d_type information, else 1. - # Intended to exit 0 only on Linux/GNU systems. - import os -diff --git a/tests/du/move-dir-while-traversing.sh b/tests/du/move-dir-while-traversing.sh -index adf482b..cf9214a 100755 ---- a/tests/du/move-dir-while-traversing.sh -+++ b/tests/du/move-dir-while-traversing.sh -@@ -21,8 +21,8 @@ print_ver_ du - require_trap_signame_ - - # We use a python-inotify script, so... --python -m pyinotify -h > /dev/null \ -- || skip_ 'python inotify package not installed' -+python3 -m pyinotify -h > /dev/null \ -+ || skip_ 'python3 inotify package not installed' - - # Move a directory "up" while du is processing its sub-directories. - # While du is processing a hierarchy .../B/C/D/... this script -@@ -33,7 +33,7 @@ python -m pyinotify -h > /dev/null \ - # rename syscall before du finishes processing the subtree under D/. - - cat <<'EOF' > inotify-watch-for-dir-access.py --#!/usr/bin/env python -+#!/usr/bin/env python3 - import pyinotify as pn - import os,sys - --- -2.51.0 - diff --git a/coreutils-selinux.patch b/coreutils-selinux.patch new file mode 100644 index 0000000..1f7f3a3 --- /dev/null +++ b/coreutils-selinux.patch @@ -0,0 +1,136 @@ +From d70ddb3eb845c494280e7365e2b889242e7e1bb9 Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Mon, 4 Oct 2021 08:45:53 +0200 +Subject: [PATCH] coreutils-selinux.patch + +--- + doc/coreutils.texi | 5 +++++ + man/chcon.x | 2 +- + man/runcon.x | 2 +- + src/cp.c | 16 +++++++++++++++- + src/install.c | 10 ++++++++-- + 5 files changed, 30 insertions(+), 5 deletions(-) + +diff --git a/doc/coreutils.texi b/doc/coreutils.texi +index 6810c15..19b535c 100644 +--- a/doc/coreutils.texi ++++ b/doc/coreutils.texi +@@ -8766,6 +8766,11 @@ done + exit $fail + @end example + ++@item -c ++@cindex SELinux security context information, preserving ++Preserve SELinux security context of the original files if possible. ++Some file systems don't support storing of SELinux security context. ++ + @item --copy-contents + @cindex directories, copying recursively + @cindex copying directories recursively +diff --git a/man/chcon.x b/man/chcon.x +index 8c1ff6f..c84fb96 100644 +--- a/man/chcon.x ++++ b/man/chcon.x +@@ -1,4 +1,4 @@ + [NAME] +-chcon \- change file security context ++chcon \- change file SELinux security context + [DESCRIPTION] + .\" Add any additional description here +diff --git a/man/runcon.x b/man/runcon.x +index d2df13e..5c5f5d8 100644 +--- a/man/runcon.x ++++ b/man/runcon.x +@@ -1,5 +1,5 @@ + [NAME] +-runcon \- run command with specified security context ++runcon \- run command with specified SELinux security context + [DESCRIPTION] + Run COMMAND with completely-specified CONTEXT, or with current or + transitioned security context modified by one or more of LEVEL, +diff --git a/src/cp.c b/src/cp.c +index c97a675..89fb8ec 100644 +--- a/src/cp.c ++++ b/src/cp.c +@@ -191,6 +191,9 @@ Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY.\n\ + additional attributes: context, links, xattr,\ + \n\ + all\n\ ++"), stdout); ++ fputs (_("\ ++ -c deprecated, same as --preserve=context\n\ + "), stdout); + fputs (_("\ + --no-preserve=ATTR_LIST don't preserve the specified attributes\n\ +@@ -954,7 +957,7 @@ main (int argc, char **argv) + selinux_enabled = (0 < is_selinux_enabled ()); + cp_option_init (&x); + +- while ((c = getopt_long (argc, argv, "abdfHilLnprst:uvxPRS:TZ", ++ while ((c = getopt_long (argc, argv, "abcdfHilLnprst:uvxPRS:TZ", + long_opts, NULL)) + != -1) + { +@@ -1002,6 +1005,17 @@ main (int argc, char **argv) + copy_contents = true; + break; + ++ case 'c': ++ fprintf (stderr, "%s: warning: option '-c' is deprecated, please use '--preserve=context' instead\n", argv[0]); ++ if ( x.set_security_context ) { ++ (void) fprintf(stderr, "%s: cannot force target context and preserve it\n", argv[0]); ++ exit( 1 ); ++ } ++ else if (selinux_enabled) { ++ x.preserve_security_context = true; ++ x.require_preserve_context = true; ++ } ++ break; + case 'd': + x.preserve_links = true; + x.dereference = DEREF_NEVER; +diff --git a/src/install.c b/src/install.c +index c9456fe..2b1bee9 100644 +--- a/src/install.c ++++ b/src/install.c +@@ -638,7 +638,7 @@ In the 4th form, create all components of the given DIRECTORY(ies).\n\ + -v, --verbose print the name of each directory as it is created\n\ + "), stdout); + fputs (_("\ +- --preserve-context preserve SELinux security context\n\ ++ -P, --preserve-context preserve SELinux security context (-P deprecated)\n\ + -Z set SELinux security context of destination\n\ + file and each created directory to default type\n\ + --context[=CTX] like -Z, or if CTX is specified then set the\n\ +@@ -790,7 +790,7 @@ main (int argc, char **argv) + dir_arg = false; + umask (0); + +- while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pt:TvS:Z", long_options, ++ while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pPt:TvS:Z", long_options, + NULL)) != -1) + { + switch (optc) +@@ -851,6 +851,8 @@ main (int argc, char **argv) + no_target_directory = true; + break; + ++ case 'P': ++ fprintf (stderr, "%s: warning: option '-P' is deprecated, please use '--preserve-context' instead\n", argv[0]); + case PRESERVE_CONTEXT_OPTION: + if (! selinux_enabled) + { +@@ -858,6 +860,10 @@ main (int argc, char **argv) + "this kernel is not SELinux-enabled")); + break; + } ++ if ( x.set_security_context ) { ++ (void) fprintf(stderr, "%s: cannot force target context and preserve it\n", argv[0]); ++ exit( 1 ); ++ } + x.preserve_security_context = true; + use_default_selinux_context = false; + break; +-- +2.31.1 + diff --git a/coreutils-ss.tar.xz.sig b/coreutils-ss.tar.xz.sig new file mode 100644 index 0000000..34c9c21 --- /dev/null +++ b/coreutils-ss.tar.xz.sig @@ -0,0 +1,16 @@ +-----BEGIN PGP SIGNATURE----- + +iQIzBAABCAAdFiEEbDfcEhIaUAa8HbgE32/ZcTBgN9kFAmJXLFQACgkQ32/ZcTBg +N9nZHA/9FYyhdcGBRb38sXL9H8eDZVgyyU9n0vosx54C4fkZGLb94yJIKTkdtSJC +buBycS5PGBGZwx1TC7U5dYj9I/WB27ZIdrH4qEHnKLtPfGfDy1en0+JpJle4FWWi +uViW5xxqt33ME+bE7DXZUMimvfQR6uEEkL2UU7hJdWDikf3VgSQJllCViS92zDsg +v9Y4Wz2dvctLaGu4jYd+WRKgJ+N1kliJKDaz6KHKrWEjfFH2ECXYttm21y2fZ3Ib +PZZSHKnSzzw4yyrJFDiLELKEILdsqxVr7Bxbv52ZqBTLsSOW0mjR+LMC5yU/gjMT +DstU38k6L0SSSYh+6T0NCeW0Plxq17fLxDmQa8lfrnbW6rhtGu8eW4FdWa7n7qPC +qwsvRCLxc6Kd4Wpw5qOioyzCzV20bpwsJvvylfn2wnln7Pr0Xb2iEyh2Oi/KWWo5 +MiGdW8WR9vLScHSwzMyp8iTnwOZW6IBIfPkq3Nr3ExTz5m4bFPho7tDvHn+/BY3W +XXyJ/QJqyJqtOcC6wR8VK1eWtmgDDBKWDkJn1XTMAVVPmeF7BLH0WLCSpp4vVvQI +U9PKF+y6Gt3yqtoLbFbl5SyAmvH62fyvvoOSozO/evXADTMCZVKUgBywg6zM0yEA +FBM1GTzPoLa9pXStVFmqeT0UKo9YFez5E+m4bGlrbo2VPJh1Zws= +=94MD +-----END PGP SIGNATURE----- diff --git a/coreutils.spec b/coreutils.spec index 6712263..8cf4c64 100644 --- a/coreutils.spec +++ b/coreutils.spec @@ -1,16 +1,12 @@ Summary: A set of basic GNU tools commonly used in shell scripts Name: coreutils -Version: 9.9 -Release: 2%{?dist} -# some used parts of gnulib are under various variants of LGPL -License: GPL-3.0-or-later AND GFDL-1.3-no-invariants-or-later AND LGPL-2.1-or-later AND LGPL-3.0-or-later +Version: 9.0 +Release: 9999.2%{?dist} +License: GPLv3+ Url: https://www.gnu.org/software/coreutils/ -Source0: https://ftp.gnu.org/gnu/%{name}/%{name}-%{version}.tar.xz -Source1: https://ftp.gnu.org/gnu/%{name}/%{name}-%{version}.tar.xz.sig -# From https://savannah.gnu.org/project/release-gpgkeys.php?group=coreutils&download=1 -# which is linked as project keyring on https://savannah.gnu.org/projects/coreutils -Source2: coreutils-keyring.gpg +Source0: https://pixelbeat.org/cu/coreutils-ss.tar.xz Source50: supported_utils +Source51: coreutils-provides.inc Source105: coreutils-colorls.sh Source106: coreutils-colorls.csh @@ -21,30 +17,32 @@ Source106: coreutils-colorls.csh Patch100: coreutils-8.26-test-lock.patch # require_selinux_(): use selinuxenabled(8) if available -Patch101: coreutils-8.26-selinuxenable.patch +Patch105: coreutils-8.26-selinuxenable.patch # downstream changes to default DIR_COLORS Patch102: coreutils-8.32-DIR_COLORS.patch - -# use python3 in tests -Patch103: coreutils-python3.patch - -# df --direct +# to be removed (#548834) +Patch103: coreutils-8.2-uname-processortype.patch +#df --direct Patch104: coreutils-df-direct.patch +#add note about mkdir --mode behaviour into info documentation(#610559) +Patch107: coreutils-8.4-mkdir-modenote.patch -# gnulib C23 support -# https://github.com/coreutils/gnulib/commit/df17f4f37ed3ca373d23ad42eae51122bdb96626 -Patch105: coreutils-9.9-gnulib-c23.patch - -# fix cut test failure on aarch64 rawhide (rhbz#2424302) -# https://github.com/coreutils/coreutils/commit/95044cb5eaea83d02f768feb5ab79fcf5e6ad782 -Patch106: coreutils-9.9-fix-cut-test-aarch64.patch +# sh-utils +#add info about TZ envvar to date manpage +Patch703: sh-utils-2.0.11-dateman.patch # (sb) lin18nux/lsb compliance - multibyte functionality patch Patch800: coreutils-i18n.patch -Conflicts: filesystem < 3 +#getgrouplist() patch from Ulrich Drepper. +Patch908: coreutils-getgrouplist.patch +#SELINUX Patch - implements Redhat changes +#(upstream did some SELinux implementation unlike with RedHat patch) +Patch950: coreutils-selinux.patch + +Conflicts: filesystem < 3 # To avoid clobbering installs Conflicts: coreutils-single @@ -63,40 +61,23 @@ BuildRequires: libselinux-utils BuildRequires: make BuildRequires: openssl-devel BuildRequires: strace -BuildRequires: systemd-devel BuildRequires: texinfo -# For gpg verification of source tarball -BuildRequires: gnupg2 # test-only dependencies -BuildRequires: acl -BuildRequires: gdb BuildRequires: perl-interpreter BuildRequires: perl(FileHandle) -BuildRequires: python3 -BuildRequires: tzdata -%ifarch %valgrind_arches -BuildRequires: valgrind -%endif - -%if 0%{?fedora} -BuildRequires: perl(Expect) -BuildRequires: python3-inotify -%endif - %if 23 < 0%{?fedora} || 7 < 0%{?rhel} # needed by i18n test-cases BuildRequires: glibc-langpack-en BuildRequires: glibc-langpack-fr BuildRequires: glibc-langpack-ko -BuildRequires: glibc-langpack-sv %endif Requires: %{name}-common = %{version}-%{release} Provides: coreutils-full = %{version}-%{release} -Provides: bundled(gnulib) +%include %{SOURCE51} Obsoletes: %{name} < 8.24-100 %description @@ -108,7 +89,7 @@ Summary: coreutils multicall binary Suggests: coreutils-common Provides: coreutils = %{version}-%{release} Provides: coreutils%{?_isa} = %{version}-%{release} -Provides: bundled(gnulib) +%include %{SOURCE51} # To avoid clobbering installs Conflicts: coreutils < 8.24-100 # Note RPM doesn't support separate buildroots for %files @@ -125,9 +106,6 @@ packaged as a single multicall binary. # https://bugzilla.redhat.com/show_bug.cgi?id=1107973#c7 Obsoletes: %{name} < 8.24-100 -# Gnulib translations are maintained seprately since coreutils 9.6 (#2393892) -Requires: gnulib-l10n - # info doc refers to "Specifying the Time Zone" from glibc-doc (#959597) Suggests: glibc-doc @@ -137,8 +115,7 @@ Optional though recommended components, including documentation and translations. %prep -%{gpgverify} --keyring='%{SOURCE2}' --signature='%{SOURCE1}' --data='%{SOURCE0}' -%autosetup -N +%autosetup -N -n %{name}-%{version}.209-87d5 # will be regenerated in the build directories rm -f src/fs.h @@ -162,15 +139,16 @@ sed src/dircolors.hin \ find tests -name '*.sh' -perm 0644 -print -exec chmod 0755 '{}' '+' (echo "<<< done") 2>/dev/null -# FIXME: Force a newer gettext version to workaround `autoreconf -i` errors -# with coreutils 9.6 and bundled gettext 0.19.2 from gettext-common-devel. -sed -i "s/0.19.2/$(rpm -q --queryformat '%%{VERSION}\n' gettext-devel)/" bootstrap.conf configure.ac - autoreconf -fiv %build export CFLAGS="$RPM_OPT_FLAGS -fno-strict-aliasing -fpic" +# disable -flto on ppc64le to make test-float pass (#1789115) +%ifarch ppc64le +CFLAGS="$CFLAGS -fno-lto" +%endif + # Upstream suggests to build with -Dlint for static analyzers: # https://lists.gnu.org/archive/html/coreutils/2018-06/msg00110.html # ... and even for production binary RPMs: @@ -199,7 +177,6 @@ for type in separate single; do --cache-file=../config.cache \ --enable-install-program=arch \ --enable-no-install-program=kill,uptime \ - --enable-systemd \ --with-tty-group \ DEFAULT_POSIX2_VERSION=200112 alternative=199209 || : %make_build all V=1 @@ -226,20 +203,14 @@ for type in separate single; do fi (cd $type && make DESTDIR=$RPM_BUILD_ROOT/$subdir $install) -%if "%{_sbindir}" != "%{_bindir}" # chroot was in /usr/sbin : - mkdir -p $RPM_BUILD_ROOT/$subdir/%_sbindir + mkdir -p $RPM_BUILD_ROOT/$subdir/{%{_bindir},%{_sbindir}} mv $RPM_BUILD_ROOT/$subdir/{%_bindir,%_sbindir}/chroot -%endif # Move multicall variants to *.single. # RemovePathPostfixes will strip that later. if test $type = 'single'; then - for dir in %{_bindir} \ -%if "%{_sbindir}" != "%{_bindir}" -%{_sbindir} \ -%endif -%{_libexecdir}/%{name}; do + for dir in %{_bindir} %{_sbindir} %{_libexecdir}/%{name}; do for bin in $RPM_BUILD_ROOT/%{_libexecdir}/%{name}/$dir/*; do basebin=$(basename $bin) mv $bin $RPM_BUILD_ROOT/$dir/$basebin.single @@ -267,9 +238,7 @@ rm -f $RPM_BUILD_ROOT%{_infodir}/dir %files single %{_bindir}/*.single -%if "%{_sbindir}" != "%{_bindir}" %{_sbindir}/chroot.single -%endif %dir %{_libexecdir}/coreutils %{_libexecdir}/coreutils/*.so.single # duplicate the license because coreutils-common does not need to be installed @@ -286,271 +255,6 @@ rm -f $RPM_BUILD_ROOT%{_infodir}/dir %license COPYING %changelog -* Tue Jan 13 2026 Lukáš Zaoral - 9.9-2 -- fix cut test failure on aarch64 rawhide (rhbz#2424302) - -* Wed Nov 26 2025 Lukáš Zaoral - 9.9-1 -- rebase to latest upstream release (rhbz#2413803) - -* Mon Sep 29 2025 Lukáš Zaoral - 9.8-3 -- require gnulib-l10n for translations of gnulib messages (rhbz#2393892) - -* Thu Sep 25 2025 Lukáš Zaoral - 9.8-2 -- tail: fix tailing larger number of lines in regular files (rhbz#2398008) - -* Wed Sep 24 2025 Lukáš Zaoral - 9.8-1 -- rebase to latest upstream release (rhbz#2397467) -- remove downstream patch for selinux options deprecated since 2009 - -* Wed Jul 23 2025 Fedora Release Engineering - 9.7-5 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_43_Mass_Rebuild - -* Mon Jun 30 2025 Lukáš Zaoral - 9.7-4 -- stty: add support for arbitrary baud rates (rhbz#2375439) - -* Wed May 28 2025 Lukáš Zaoral - 9.7-3 -- sort: fix buffer under-read (CVE-2025-5278) - -* Mon May 19 2025 Lukáš Zaoral - 9.7-2 -- cp/mv: do not fail when copying of trivial NFSv4 ACLs fails (rhbz#2363149) - -* Wed Apr 09 2025 Lukáš Zaoral - 9.7-1 -- rebase to latest upstream release (rhbz#2358624) - -* Tue Feb 25 2025 Lukáš Zaoral - 9.6-2 -- fix 'who -m' with guessed tty names (rhbz#2343998) - -* Mon Jan 20 2025 Lukáš Zaoral - 9.6-1 -- rebase to latest upstream version (rhbz#2338620) -- sync i18n patch with SUSE (Kudos to Berny Völker!) - -* Thu Jan 16 2025 Fedora Release Engineering - 9.5-13 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_42_Mass_Rebuild - -* Sun Jan 12 2025 Zbigniew Jędrzejewski-Szmek - 9.5-12 -- Rebuilt for the bin-sbin merge (2nd attempt) - -* Wed Nov 13 2024 Florian Weimer - 9.5-11 -- Affinity mask handling in nproc for large CPU counts (rhbz#2325167) - -* Fri Sep 27 2024 Lukáš Zaoral - 9.5-10 -- fix fold -b with UTF8 locale (RHEL-60295) - -* Tue Aug 27 2024 Lukáš Zaoral - 9.5-9 -- show web sessions in who output (rhbz#2307847) - -* Wed Aug 21 2024 Lukáš Zaoral - 9.5-8 -- add missing systemd-devel buildrequires - -* Wed Jul 17 2024 Fedora Release Engineering - 9.5-7 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_41_Mass_Rebuild - -* Mon Jul 15 2024 Lukáš Zaoral - 9.5-6 -- Rebuilt for the bin-sbin merge - -* Mon Jul 15 2024 Sohum Mendon - 9.5-5 -- fix incorrect exit status when fold is called with a non-existent file - -* Tue Jul 09 2024 Zbigniew Jędrzejewski-Szmek - 9.5-4 -- Rebuilt for the bin-sbin merge - -* Thu Jul 04 2024 Lukáš Zaoral - 9.5-3 -- do not buildrequire perl(Expect) on ELN - -* Tue Jun 04 2024 Lukáš Zaoral - 9.5-2 -- enable LTO on ppc64le - -* Tue Apr 02 2024 Lukáš Zaoral - 9.5-1 -- rebase to latest upstream version (rhbz#2272063) -- sync i18n patch with SUSE (Kudos to Berny Völker!) -- add some test dependencies to execute additional part of the upstream test-suite - -* Mon Jan 29 2024 Lukáš Zaoral - 9.4-6 -- fix tail on kernels with 64k page sizes (RHEL-22866) - -* Wed Jan 24 2024 Fedora Release Engineering - 9.4-5 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild - -* Fri Jan 19 2024 Fedora Release Engineering - 9.4-4 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild - -* Thu Jan 18 2024 Lukáš Zaoral - 9.4-3 -- fix compilation on i686 - -* Thu Jan 18 2024 Lukáš Zaoral - 9.4-2 -- fix buffer overflow in split (CVE-2024-0684) - -* Fri Sep 15 2023 Lukáš Zaoral - 9.4-1 -- new upstream release 9.4 (#2235759) -- enable integration with systemd -- fix the license field - -* Wed Jul 19 2023 Fedora Release Engineering - 9.3-2 -- Rebuilt for https://fedoraproject.org/wiki/Fedora_39_Mass_Rebuild - -* Tue Apr 18 2023 Kamil Dudka - 9.3-1 -- remove obsolete Provides for absolute paths -- new upstream release 9.3 - -* Tue Apr 11 2023 Lukáš Zaoral - 9.2-4 -- migrate to SPDX license format - -* Fri Mar 24 2023 Kamil Dudka - 9.2-3 -- copy: fix --reflink=auto to fallback in more cases (#2180056) -- cksum: fix reporting of failed checks (#2180056) - -* Wed Mar 22 2023 Kamil Dudka - 9.2-2 -- coreutils-getgrouplist.patch: drop a patch no longer needed - -* Wed Mar 22 2023 Kamil Dudka - 9.2-1 -- new upstream release 9.2 - -* Thu Jan 19 2023 Fedora Release Engineering -- Rebuilt for https://fedoraproject.org/wiki/Fedora_38_Mass_Rebuild - -* Mon Jan 02 2023 Kamil Dudka - 9.1-10 -- drop obsolete downstream-only extension of date(1) man page -- undocument downstream SELinux options deprecated since 2009 - -* Mon Jan 02 2023 Kamil Dudka - 9.1-9 -- basic support for checking NFSv4 ACLs (#2137866) - -* Mon Sep 19 2022 Kamil Dudka - 9.1-8 -- remove obsolete extension of mkdir(1) info documentation - -* Tue Aug 23 2022 Kamil Dudka - 9.1-7 -- remove non-upstream patch for uname -i/-p (#548834) - -* Mon Aug 08 2022 Kamil Dudka - 9.1-6 -- improve wording of a comment in /etc/DIR_COLORS (#2112593) - -* Mon Aug 08 2022 Kamil Dudka - 9.1-5 -- improve handling of control characters in unexpand (#2112870) - -* Mon Aug 01 2022 Kamil Dudka - 9.1-4 -- prevent unexpand from failing on control characters (#2112870) - -* Wed Jul 20 2022 Fedora Release Engineering -- Rebuilt for https://fedoraproject.org/wiki/Fedora_37_Mass_Rebuild - -* Sat Apr 23 2022 Pádraig Brady - 9.1-2 -- make simple backups in correct dir; broken in 9.1 - -* Tue Apr 19 2022 Kamil Dudka - 9.1-1 -- new upstream release 9.1 - -* Mon Mar 21 2022 Kamil Dudka - 9.0-5 -- ls, stat: avoid triggering automounts (#2044981) - -* Tue Mar 01 2022 Kamil Dudka - 9.0-4 -- make `df --direct` work again (#2058686) - -* Wed Jan 19 2022 Fedora Release Engineering -- Rebuilt for https://fedoraproject.org/wiki/Fedora_36_Mass_Rebuild - -* Mon Oct 04 2021 Kamil Dudka - 9.0-2 -- chmod: fix exit status when ignoring symlinks - -* Sun Sep 26 2021 Kamil Dudka - 9.0-1 -- new upstream release 9.0 - -* Tue Sep 14 2021 Sahana Prasad -- Rebuilt with OpenSSL 3.0.0 - -* Wed Jul 21 2021 Fedora Release Engineering -- Rebuilt for https://fedoraproject.org/wiki/Fedora_35_Mass_Rebuild - -* Wed Jul 07 2021 Kamil Dudka - 8.32-30 -- df: fix duplicated remote entries due to bind mounts (#1979814) - -* Thu Jul 01 2021 Kamil Dudka - 8.32-28 -- tail: fix stack out-of-bounds write with --follow - -* Tue Jun 08 2021 Kamil Dudka - 8.32-27 -- mountlist: recognize fuse.portal as dummy file system (#1913358) - -* Mon May 17 2021 Kamil Dudka - 8.32-26 -- cp: pick additional copy_file_range()-related fixes from upstream - -* Mon May 03 2021 Kamil Dudka - 8.32-24 -- copy: ensure we enforce --reflink=never (#1956080) - -* Tue Apr 27 2021 Kamil Dudka - 8.32-23 -- copy: do not refuse to copy a swap file - -* Fri Apr 09 2021 Kamil Dudka - 8.32-22 -- weaken the dependency on glibc-doc to reduce minimal installations -- drop the last use of ncurses no longer needed (#1830318) -- utimens: fix confusing arg type in internal func - -* Fri Mar 26 2021 Kamil Dudka - 8.32-21 -- hostname,ln: fix memory leaks detected by Coverity - -* Wed Mar 24 2021 Kamil Dudka - 8.32-20 -- cp: use copy_file_range if available - -* Thu Feb 18 2021 Kamil Dudka - 8.32-19 -- stat: add support for the exfat file system (#1921427) - -* Wed Feb 03 2021 Kamil Dudka - 8.32-18 -- make coreutils-common recommend glibc-doc for info doc refs (#959597) - -* Tue Feb 02 2021 Kamil Dudka - 8.32-17 -- ls: fix crash printing SELinux context for unstatable files (#1921249) -- split: fix --number=K/N to output correct part of file (#1921246) -- expr: fix invalid read with unmatched \(...\) (#1919775) - -* Tue Jan 26 2021 Fedora Release Engineering -- Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild - -* Tue Dec 08 2020 Kamil Dudka - 8.32-15 -- rm: do not skip files upon failure to remove an empty dir (#1905481) - -* Tue Nov 03 2020 Kamil Dudka - 8.32-14 -- df,stat,tail: recognize more file system types - -* Wed Oct 14 2020 Kamil Dudka - 8.32-13 -- make the %%build section idempotent - -* Mon Aug 17 2020 Kamil Dudka - 8.32-12 -- do not install /etc/DIR_COLORS.256color (#1830318) - -* Thu Jul 30 2020 Kamil Dudka - 8.32-11 -- cp: default to --reflink=auto (#1861108) - -* Mon Jul 27 2020 Fedora Release Engineering -- Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild - -* Fri Jul 24 2020 Kamil Dudka - 8.32-9 -- disable -flto on ppc64le to make test-float pass (#1789115) - -* Mon Jul 13 2020 Tom Stellard - 8.32-8 -- Use make macros -- https://fedoraproject.org/wiki/Changes/UseMakeBuildInstallMacro - -* Fri Jun 26 2020 James Cassell - 8.32-7 -- move ncurses to -common package since it's needed for colorls.sh -- make ncurses optional - -* Fri May 15 2020 Kamil Dudka - 8.32-6 -- compile with -Dlint to enable optional initialization and cleanup code - -* Thu Apr 23 2020 Kamil Dudka - 8.32-5 -- du: simplify leaf optimization for XFS (#1823247) - -* Fri Apr 17 2020 Tom Stellard - 8.32-4 -- Fix missing inline function definition - -* Wed Mar 11 2020 Kamil Dudka - 8.32-3 -- uniq: remove collation handling as required by newer POSIX - -* Mon Mar 09 2020 Kamil Dudka - 8.32-2 -- make mknod work again in chroot without /proc being mounted (#1811038) -- ls: restore 8.31 behavior on removed directories - -* Thu Mar 05 2020 Kamil Dudka - 8.32-1 -- new upstream release 8.32 - * Tue Feb 11 2020 Kamil Dudka - 8.31-10 - make upstream test-suite work with root privileges (#1800597) diff --git a/sh-utils-2.0.11-dateman.patch b/sh-utils-2.0.11-dateman.patch new file mode 100644 index 0000000..60cdaa6 --- /dev/null +++ b/sh-utils-2.0.11-dateman.patch @@ -0,0 +1,12 @@ +diff -urNp coreutils-5.97-orig/man/date.x coreutils-5.97/man/date.x +--- coreutils-5.97-orig/man/date.x 1999-11-02 15:07:36.000000000 +0100 ++++ coreutils-5.97/man/date.x 2008-10-15 10:13:31.000000000 +0200 +@@ -11,3 +11,8 @@ calendar date, time of day, time zone, day of week, relative time, + relative date, and numbers. An empty string indicates the beginning + of the day. The date string format is more complex than is easily + documented here but is fully described in the info documentation. ++[ENVIRONMENT] ++.TP ++TZ ++Specifies the timezone, unless overridden by command line parameters. ++If neither is specified, the setting from /etc/localtime is used. diff --git a/sources b/sources index 0952ab1..04795af 100644 --- a/sources +++ b/sources @@ -1,2 +1 @@ -SHA512 (coreutils-9.9.tar.xz.sig) = 0a3dfdfa6b4234e2e1d42142269f959bdf3cf8f6605a50270a27eff84dd22588f182121f7dd3eeb04be45f5109d02690215065b3d3b43882874d0e165a1435d0 -SHA512 (coreutils-9.9.tar.xz) = e7b0e59f7732d2c098ea4934014f470248bd5c4764210e9200a698010a8e3b95bbb26e543f0cd73ed5a4b8e1f8cda932c73f39954d68175e4deaa47526610c65 +SHA512 (coreutils-ss.tar.xz) = 3d51477d0f5a2082dd723974554185c855eea28189875b9b7ce88605db044897b0b8b0244d06ff6eb9f6fee5c2279437539dd8518d5a30d02e824d3a54b51303 diff --git a/tests/readlink-cannot-handle-recursive-symlinks/Makefile b/tests/readlink-cannot-handle-recursive-symlinks/Makefile new file mode 100644 index 0000000..49d37a7 --- /dev/null +++ b/tests/readlink-cannot-handle-recursive-symlinks/Makefile @@ -0,0 +1,63 @@ +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# Makefile of /CoreOS/coreutils/readlink-cannot-handle-recursive-symlink s +# Description: Test for readlink cannot handle recursive symlinks +# Author: Jan Scotka +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# Copyright (c) 2010 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing +# to use, modify, copy, or redistribute it subject to the terms +# and conditions of the GNU General Public License version 2. +# +# This program is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied +# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this program; if not, write to the Free +# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +export TEST=/CoreOS/coreutils/readlink-cannot-handle-recursive-symlinks +export TESTVERSION=1.0 + +BUILT_FILES= + +FILES=$(METADATA) runtest.sh Makefile PURPOSE + +.PHONY: all install download clean + +run: $(FILES) build + ./runtest.sh + +build: $(BUILT_FILES) + chmod a+x runtest.sh + +clean: + rm -f *~ $(BUILT_FILES) + + +include /usr/share/rhts/lib/rhts-make.include + +$(METADATA): Makefile + @echo "Owner: Jan Scotka " > $(METADATA) + @echo "Name: $(TEST)" >> $(METADATA) + @echo "TestVersion: $(TESTVERSION)" >> $(METADATA) + @echo "Path: $(TEST_DIR)" >> $(METADATA) + @echo "Description: Test for readlink cannot handle recursive symlinks" >> $(METADATA) + @echo "Type: Sanity" >> $(METADATA) + @echo "TestTime: 5m" >> $(METADATA) + @echo "RunFor: coreutils" >> $(METADATA) + @echo "Requires: coreutils" >> $(METADATA) + @echo "Priority: Normal" >> $(METADATA) + @echo "License: GPLv2" >> $(METADATA) + @echo "Confidential: no" >> $(METADATA) + @echo "Destructive: no" >> $(METADATA) + + rhts-lint $(METADATA) diff --git a/tests/readlink-cannot-handle-recursive-symlinks/PURPOSE b/tests/readlink-cannot-handle-recursive-symlinks/PURPOSE new file mode 100644 index 0000000..b9fd740 --- /dev/null +++ b/tests/readlink-cannot-handle-recursive-symlinks/PURPOSE @@ -0,0 +1,54 @@ +PURPOSE of /CoreOS/coreutils/readlink-cannot-handle-recursive-symlinks +Description: Test for readlink cannot handle recursive symlinks +Author: Jan Scotka +Bug summary: readlink cannot handle recursive symlinks + +Description: + +Description of problem: +The readlink command fails with an error "Too many levels of symbolic links", even if there are only 2 levels. + +The readlink command from RHEL 3 and RHEL 4 and from Fedora 11 all work fine. + +Among other changes between RHEL 4 and RHEL 5, RHEL 5's coreutils added calls to cycle_check() in lib/canonicalize.c, but in upstream canonicalize.c (now in gnulib instead of coreutils), we see the comment: + /* Detect loops. We cannot use the cycle-check module here, + since it's actually possible to encounter the same symlink + more than once in a given traversal. However, encountering + the same symlink,NAME pair twice does indicate a loop. */ + +http://git.savannah.gnu.org/gitweb/?p=gnulib.git;a=blob;f=lib/canonicalize.c;h=4f348398fd69ae516396313d18ac294a4ca3dde3;hb=b653eda3ac4864de205419d9f41eec267cb89eeb#l262 + +The latest canonicalize.c uses seen_triple() instead of cycle_check(). + + +Version-Release number of selected component (if applicable): +coreutils-5.97-19.el5 + +How reproducible: +every time + +Steps to Reproduce: +1. Create a directory with a symlink to itself + mkdir /tmp/dir + cd /tmp/dir + ln -s ../dir dirlink + +2. Run readlink using the 'dirlink' symlink recursively + readlink -v -f dirlink + readlink -v -f dirlink/dirlink + readlink -v -f dirlink/dirlink/dirlink + +Actual results: +The first readlink command on just dirlink succeeds, but the 2nd and 3rd commands fail with + readlink: dirlink/dirlink: Too many levels of symbolic links + +Expected results: +$ readlink -v -f dirlink +/tmp/dir +$ readlink -v -f dirlink/dirlink +/tmp/dir +$ readlink -v -f dirlink/dirlink/dirlink +/tmp/dir + + +Additional info: diff --git a/tests/readlink-cannot-handle-recursive-symlinks/runtest.sh b/tests/readlink-cannot-handle-recursive-symlinks/runtest.sh new file mode 100755 index 0000000..6ee251f --- /dev/null +++ b/tests/readlink-cannot-handle-recursive-symlinks/runtest.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# vim: dict=/usr/share/beakerlib/dictionary.vim cpt=.,w,b,u,t,i,k +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# runtest.sh of /CoreOS/coreutils/readlink-cannot-handle-recursive-symlinks +# Description: Test for readlink cannot handle recursive symlinks +# Author: Jan Scotka +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# Copyright (c) 2010 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing +# to use, modify, copy, or redistribute it subject to the terms +# and conditions of the GNU General Public License version 2. +# +# This program is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied +# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this program; if not, write to the Free +# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +# Include rhts environment +. /usr/bin/rhts-environment.sh +. /usr/lib/beakerlib/beakerlib.sh + +PACKAGE="coreutils" + +rlJournalStart + rlPhaseStartSetup + rlAssertRpm $PACKAGE + rlRun "TmpDir=\`mktemp -d\`" 0 "Creating tmp directory" + rlRun "pushd $TmpDir" + rlRun "mkdir link" 0 "Creating test directory" + rlRun "cd link" 0 "cd to this dir" + rlRun "ln -s ../link link" 0 "creating symlink to ../link -> link" + rlPhaseEnd + + rlPhaseStartTest + rlLog "Test of readlink up to 20 iteration" + export TMPVAR="link" + for foo in `seq 20` + do echo $TMPVAR + rlRun "readlink -v -f $TMPVAR" 0 "readlink of $TMPVAR" + TMPVAR="$TMPVAR/link" + done + rlPhaseEnd + + rlPhaseStartCleanup + rlRun "popd" + rlRun "rm -r $TmpDir" 0 "Removing tmp directory" + rlPhaseEnd +rlJournalPrintText +rlJournalEnd diff --git a/tests/test-basics b/tests/test-basics new file mode 100755 index 0000000..7324553 --- /dev/null +++ b/tests/test-basics @@ -0,0 +1,39 @@ +#!/bin/sh + +# Checks that touch ls rm and foo work +# https://www.mankier.com/1/beakerlib#Examples +. /usr/share/beakerlib/beakerlib.sh + +# Set the full test name +TEST="/examples/beakerlib/Sanity/phases" + +# Package being tested +PACKAGE="coreutils" + +rlJournalStart + # Setup phase: Prepare test directory + rlPhaseStartSetup + rlAssertRpm $PACKAGE + rlRun 'TmpDir=$(mktemp -d)' 0 'Creating tmp directory' # no-reboot + rlRun "pushd $TmpDir" + rlPhaseEnd + + # Test phase: Testing touch, ls and rm commands + rlPhaseStartTest + rlRun "touch foo" 0 "Creating the foo test file" + rlAssertExists "foo" + rlRun "ls -l foo" 0 "Listing the foo test file" + rlRun "rm foo" 0 "Removing the foo test file" + rlAssertNotExists "foo" + rlRun "ls -l foo" 2 "Listing foo should now report an error" + rlPhaseEnd + + # Cleanup phase: Remove test directory + rlPhaseStartCleanup + rlRun "popd" + rlRun "rm -r $TmpDir" 0 "Removing tmp directory" + rlPhaseEnd +rlJournalEnd + +# Print the test report +rlJournalPrintText diff --git a/tests/test_basics.yml b/tests/test_basics.yml new file mode 100644 index 0000000..d5727cf --- /dev/null +++ b/tests/test_basics.yml @@ -0,0 +1,9 @@ +--- +# This first play always runs on the local staging system +- hosts: localhost + tags: + - atomic + - classic + - container + roles: + - { role: standard-test-beakerlib, tests: [ test-basics, readlink-cannot-handle-recursive-symlinks ] } diff --git a/tests/tests.yml b/tests/tests.yml new file mode 100644 index 0000000..529263d --- /dev/null +++ b/tests/tests.yml @@ -0,0 +1,2 @@ +# Fedora Continuous Integration: https://fedoraproject.org/wiki/CI +- include: test_basics.yml