bids-schema/get_test_data
Benjamin A. Beasley bf8e9a5ac8 Trivial improvements to the get_test_data script
[skip changelog]
2025-01-02 09:03:26 -05:00

91 lines
2.6 KiB
Bash
Executable file

#!/bin/sh
set -o errexit
set -o nounset
if [ "$#" != '0' ]
then
cat 1>&2 <<EOF
Usage: $0
Parses bids-schema.spec in the same directory as this script; downloads test
data and produces two archives, filtered to include only selected datasets with
audited license terms, in the current working directory.
EOF
exit 1
fi
OUTDIR="${PWD}"
TMPDIR="$(mktemp -d)"
trap "rm -rf '${TMPDIR}'" INT TERM EXIT
SPEC="$(cd "$(dirname "${0}")"; echo "${PWD}")/bids-schema.spec"
MACROS="$(grep -E '^%global' "${SPEC}")"
get_macro()
{
rpm -E "${MACROS}
%{${1}}"
}
EX_URL="$(get_macro examples_url)"
EX_COMMIT="$(get_macro examples_commit)"
EX_LIST="$(get_macro examples)"
ERREX_URL="$(get_macro error_examples_url)"
ERREX_COMMIT="$(get_macro error_examples_commit)"
ERREX_LIST="$(get_macro error_examples)"
cd "${TMPDIR}"
EX_ARCHIVE="bids-examples-${EX_COMMIT}.tar.gz"
EX_DL="${EX_URL}/archive/${EX_COMMIT}/${EX_ARCHIVE}"
echo "--> Downloading: ${EX_DL}" 1>&2
curl -L -O "${EX_DL}"
ERREX_ARCHIVE="bids-error-examples-${ERREX_COMMIT}.tar.gz"
ERREX_DL="${ERREX_URL}/archive/${ERREX_COMMIT}/${ERREX_ARCHIVE}"
echo "--> Downloading: ${ERREX_DL}" 1>&2
curl -L -O "${ERREX_DL}"
echo "--> Extracting: ${EX_ARCHIVE})" 1>&2
tar -xzf "${EX_ARCHIVE}"
echo "--> Extracting: ${ERREX_ARCHIVE})" 1>&2
tar -xzf "${ERREX_ARCHIVE}"
echo '--> Removing all but “whitelisted” datasets' 1>&2
EX_ARCHDIR="$(basename "${EX_ARCHIVE}" '.tar.gz')"
ERREX_ARCHDIR="$(basename "${ERREX_ARCHIVE}" '.tar.gz')"
mv "${EX_ARCHDIR}" "${EX_ARCHDIR}-original"
mv "${ERREX_ARCHDIR}" "${ERREX_ARCHDIR}-original"
mkdir "${EX_ARCHDIR}" "${ERREX_ARCHDIR}"
for ds in ${EX_LIST}
do
mv "${EX_ARCHDIR}-original/${ds}" "${EX_ARCHDIR}"
done
for ds in ${ERREX_LIST}
do
mv "${ERREX_ARCHDIR}-original/${ds}" "${ERREX_ARCHDIR}"
done
# Restore the original mtimes
touch -r "${EX_ARCHDIR}-original" "${EX_ARCHDIR}"
touch -r "${ERREX_ARCHDIR}-original" "${ERREX_ARCHDIR}"
rearchive()
{
ARCHDIR="${1}"
FILTERED="${2}"
echo "--> Re-archiving: ${FILTERED}" 1>&2
# https://www.gnu.org/software/tar/manual/html_section/Reproducibility.html
TZ=UTC LC_ALL=C tar \
--create \
--sort=name \
--format=posix \
--numeric-owner --owner=0 --group=0 \
--mode=go+u,go-w \
--pax-option='delete=atime,delete=ctime' \
"${ARCHDIR}/" |
zstdmt --ultra -22 > "${FILTERED}"
}
EX_FILTERED="${EX_ARCHDIR}-filtered.tar.zst"
ERREX_FILTERED="${ERREX_ARCHDIR}-filtered.tar.zst"
rearchive "${EX_ARCHDIR}" "${EX_FILTERED}"
rearchive "${ERREX_ARCHDIR}" "${ERREX_FILTERED}"
mv "${EX_FILTERED}" "${ERREX_FILTERED}" "${OUTDIR}"
echo 'Done.' 1>&2