diff --git a/lib/functions/compilation/ccache.sh b/lib/functions/compilation/ccache.sh index 9ed869ffef..0dbe517952 100644 --- a/lib/functions/compilation/ccache.sh +++ b/lib/functions/compilation/ccache.sh @@ -21,9 +21,9 @@ function do_with_ccache_statistics() { display_alert "ccache dir size before" "${ccache_dir_size_before_human}" "ccache" # Show the ccache configuration + wait_for_disk_sync "before ccache config" display_alert "ccache configuration" "ccache" "ccache" run_host_command_logged ccache --show-config "&&" sync - sync fi display_alert "Running ccache'd build..." "ccache" "ccache" diff --git a/lib/functions/general/chroot-helpers.sh b/lib/functions/general/chroot-helpers.sh index a218695e6b..61adedba05 100644 --- a/lib/functions/general/chroot-helpers.sh +++ b/lib/functions/general/chroot-helpers.sh @@ -7,7 +7,7 @@ mount_chroot() { local target target="$(realpath "$1")" # normalize, remove last slash if dir display_alert "mount_chroot" "$target" "debug" - mount -t tmpfs tmpfs "${target}/tmp" + mount -t tmpfs tmpfs "${target}/tmp" # @TODO: maybe /run/user/0 etc and XDG_RUNTIME_DIR mount -t proc chproc "${target}"/proc mount -t sysfs chsys "${target}"/sys mount -t devtmpfs chdev "${target}"/dev || mount --bind /dev "${target}"/dev @@ -28,8 +28,8 @@ umount_chroot() { umount --recursive "${target}"/dev || true umount "${target}"/proc || true umount "${target}"/sys || true - umount "${target}"/tmp || true - sync + umount "${target}"/tmp || true # @TODO: maybe /run/user/0 etc and XDG_RUNTIME_DIR + wait_for_disk_sync "after umount chroot" run_host_command_logged grep -E "'${target}/(dev|proc|sys|tmp)'" /proc/mounts "||" true done } @@ -49,7 +49,7 @@ function umount_chroot_recursive() { return 0 fi display_alert "Unmounting recursively" "${description} - be patient" "" - sync # sync. coalesce I/O. wait for writes to flush to disk. it might take a second. + wait_for_disk_sync "before recursive umount ${description}" # sync. coalesce I/O. wait for writes to flush to disk. it might take a second. # First, try to umount some well-known dirs, in a certain order. for speed. local -a well_known_list=("dev/pts" "dev" "proc" "sys" "boot/efi" "boot/firmware" "boot" "tmp" ".") for well_known in "${well_known_list[@]}"; do @@ -64,7 +64,7 @@ function umount_chroot_recursive() { display_alert "${#current_mount_list[@]} dirs still mounted after ${tries} tries:" "${current_mount_list[*]}" "wrn" fi cut -d " " -f 2 "/proc/mounts" | grep "^${target}" | xargs -n1 umount --recursive &> /dev/null || true # ignore errors - sync # wait for fsync, then count again for next loop. + wait_for_disk_sync "during recursive umount ${description}" # sync. coalesce I/O. wait for writes to flush to disk. it might take a second. mapfile -t current_mount_list < <(cut -d " " -f 2 "/proc/mounts" | grep "^${target}") tries=$((tries + 1)) done diff --git a/lib/functions/host/host-utils.sh b/lib/functions/host/host-utils.sh index 0a5e3ff013..5f36841dbe 100644 --- a/lib/functions/host/host-utils.sh +++ b/lib/functions/host/host-utils.sh @@ -293,3 +293,31 @@ function get_descendants_of_pid_array() { descendants_of_pid_array_result=(${descendants}) display_alert "Descendants of PID $1: ${descendants_of_pid_array_result[*]}" "array = get_descendants_of_pid_array" "debug" } + +# This essentially runs "sync". It's a bit more complicated than that, though. +# If "sync" takes more than 5 seconds, we want to let the user know. +# So run sync under "timeout" in a loop, until it completes successfully. +# call: wait_for_disk_sync "after writing a huge file to disk" +function wait_for_disk_sync() { + declare -i timeout_seconds=10 + declare -i sync_worked=0 + declare -i sync_timeout_count=0 + declare -i total_wait=0 + + while [[ "${sync_worked}" -eq 0 ]]; do + declare -i sync_timeout=0 + display_alert "wait_for_disk_sync: $*" "sync start, timeout_seconds:${timeout_seconds}" "debug" + # run a bash sending all to /dev/null, then send to /dev/null again, so we don't get "Killed" messages + bash -c "timeout --signal=9 ${timeout_seconds} sync &> /dev/null" &> /dev/null && sync_worked=1 || sync_timeout=1 + display_alert "wait_for_disk_sync: $*" "sync done worked: ${sync_worked} sync_timeout: ${sync_timeout}" "debug" + if [[ "${sync_timeout}" -eq 1 ]]; then + total_wait=$((total_wait + timeout_seconds)) + sync_timeout_count=$((sync_timeout_count + 1)) + display_alert "Waiting sync to storage $*" "fsync taking more than ${total_wait}s - be patient" "warn" + else + if [[ "${sync_timeout_count}" -gt 0 ]]; then + display_alert "Sync to storage OK $*" "fsync took more than ${total_wait}s" "info" + fi + fi + done +} diff --git a/lib/functions/host/tmpfs-utils.sh b/lib/functions/host/tmpfs-utils.sh index e317260fb0..06b2462b38 100644 --- a/lib/functions/host/tmpfs-utils.sh +++ b/lib/functions/host/tmpfs-utils.sh @@ -71,13 +71,13 @@ function cleanup_tmpfs_for() { if [[ "${umount_tmpfs}" == "umount_tmpfs" ]]; then display_alert "cleanup_tmpfs_for: umount tmpfs" "${tmpfs_name}" "cleanup" cd "${SRC}" || echo "cleanup_tmpfs_for: cd failed to ${SRC}" >&2 # avoid cwd in use error - sync # let disk coalesce + wait_for_disk_sync "before tmps cleanup ${tmpfs_name}" # let disk coalesce umount "${tmpfs_path}" &> /dev/null || { display_alert "cleanup_tmpfs_for: umount failed" "${tmpfs_name} tmpfs umount failed, will try lazy" "cleanup" # Do a lazy umount... last-resort... - sync + wait_for_disk_sync "before tmps cleanup ${tmpfs_name} lazy" # let disk coalesce umount -l "${tmpfs_path}" &> /dev/null || display_alert "cleanup_tmpfs_for: lazy umount failed" "${tmpfs_name} tmpfs lazy umount also failed" "cleanup" - sync + wait_for_disk_sync "after tmps cleanup ${tmpfs_name} lazy" # let disk coalesce } # Check if the tmpfs is still mounted after all that trying, log error, show debug, and give up with error diff --git a/lib/functions/image/partitioning.sh b/lib/functions/image/partitioning.sh index d0bd757bfe..8f50713cd2 100644 --- a/lib/functions/image/partitioning.sh +++ b/lib/functions/image/partitioning.sh @@ -134,7 +134,7 @@ function prepare_partitions() { # stage: create blank image display_alert "Creating blank image for rootfs" "truncate: $sdsize MiB" "info" run_host_command_logged truncate "--size=${sdsize}M" "${SDCARD}".raw # please provide EVIDENCE of problems with this; using dd is very slow - run_host_command_logged sync + wait_for_disk_sync "after truncate SDCARD.raw" # stage: calculate boot partition size local bootstart=$(($OFFSET * 2048)) @@ -240,7 +240,7 @@ function prepare_partitions() { if [[ $ROOTFS_TYPE == btrfs && $BTRFS_COMPRESSION != none ]]; then local fscreateopt="-o compress-force=${BTRFS_COMPRESSION}" fi - sync # force writes to be really flushed + wait_for_disk_sync "after mkfs" # force writes to be really flushed display_alert "Mounting rootfs" "$rootdevice" run_host_command_logged mount ${fscreateopt} $rootdevice $MOUNT/ # create fstab (and crypttab) entry diff --git a/lib/functions/image/rootfs-to-image.sh b/lib/functions/image/rootfs-to-image.sh index 1091056535..e42d8a1d49 100644 --- a/lib/functions/image/rootfs-to-image.sh +++ b/lib/functions/image/rootfs-to-image.sh @@ -80,7 +80,7 @@ function create_image_from_sdcard_rootfs() { run_host_command_logged sfdisk -l "${LOOP}" # @TODO: use asset.. fi - run_host_command_logged sync + wait_for_disk_sync "before umount MOUNT" umount_chroot_recursive "${MOUNT}" "MOUNT" [[ $CRYPTROOT_ENABLE == yes ]] && cryptsetup luksClose "$ROOT_MAPPER" diff --git a/lib/functions/logging/logging.sh b/lib/functions/logging/logging.sh index 5808401875..6bb073796a 100644 --- a/lib/functions/logging/logging.sh +++ b/lib/functions/logging/logging.sh @@ -18,9 +18,9 @@ function logging_init() { declare -g bright_red_color="\e[1;31m" red_color="\e[0;31m" declare -g bright_blue_color="\e[1;34m" blue_color="\e[0;34m" declare -g bright_magenta_color="\e[1;35m" magenta_color="\e[0;35m" - declare -g -i logging_section_counter=0 # -i: integer - declare -g tool_color="${gray_color}" # default to gray... (should be ok on terminals, @TODO: I've seen it too dark on a few random screenshots though - if [[ "${CI}" == "true" ]]; then # ... but that is too dark for Github Actions + declare -g -i logging_section_counter=0 # -i: integer + declare -g tool_color="${gray_color}" # default to gray... (should be ok on terminals, @TODO: I've seen it too dark on a few random screenshots though + if [[ "${CI}" == "true" ]]; then # ... but that is too dark for Github Actions declare -g tool_color="${normal_color}" declare -g SHOW_LOG="${SHOW_LOG:-"yes"}" # if in CI/GHA, default to showing log fi @@ -112,11 +112,11 @@ function print_current_asset_log_base_file() { } function check_and_close_fd_13() { - sync # let the disk catch up + wait_for_disk_sync "before closing fd 13" # let the disk catch up if [[ -e /proc/self/fd/13 ]]; then display_alert "Closing fd 13" "log still open" "cleanup" # no reason to be alarmed exec 13>&- || true # close the file descriptor, lest sed keeps running forever. - sync # make sure the file is written to disk + wait_for_disk_sync "after fd 13 closure" # make sure the file is written to disk else display_alert "Not closing fd 13" "log already closed" "cleanup" fi @@ -146,7 +146,7 @@ function check_and_close_fd_13() { else display_alert "global_tee_pid already dead after descendants killed" "${global_tee_pid}" "cleanup" fi - sync # wait for the disk to catch up + wait_for_disk_sync "after killing tee pid" # wait for the disk to catch up else display_alert "Not killing global_tee_pid" "${global_tee_pid} not running" "cleanup" fi diff --git a/lib/functions/main/default-build.sh b/lib/functions/main/default-build.sh index 9a6045973c..20b64f414b 100644 --- a/lib/functions/main/default-build.sh +++ b/lib/functions/main/default-build.sh @@ -1,6 +1,8 @@ # This does NOT run under the logging manager. We should invoke the do_with_logging wrapper for # strategic parts of this. Attention: rootfs does it's own logging, so just let that be. function main_default_build_single() { + wait_for_disk_sync "before starting build" # fsync, wait for disk to sync, and then continue. alert user if takes too long. + # Check that WORKDIR_BASE_TMP exists; if not, create it. if [[ ! -d "${WORKDIR_BASE_TMP}" ]]; then mkdir -p "${WORKDIR_BASE_TMP}" @@ -21,7 +23,8 @@ function main_default_build_single() { declare -g -x CCACHE_TEMPDIR="${WORKDIR}/ccache_tmp" # Export CCACHE_TEMPDIR, under Workdir, which is hopefully under tmpfs. Thanks @the-Going for this. declare -g -x XDG_RUNTIME_DIR="${WORKDIR}/xdg_tmp" # XDG_RUNTIME_DIR is used by the likes of systemd/freedesktop centric apps. - declare start=$(date +%s) + declare start + start=$(date +%s) ### Write config summary LOG_SECTION="config_summary" do_with_logging write_config_summary_output_file @@ -188,10 +191,9 @@ function main_default_build_single() { display_alert "Target directory" "${DEB_STORAGE}/" "info" display_alert "File name" "${CHOSEN_KERNEL}_${REVISION}_${ARCH}.deb" "info" fi - + # At this point, the WORKDIR should be clean. Add debug info. debug_tmpfs_show_usage "AFTER ALL PKGS BUILT" - # build rootfs, if not only kernel. if [[ $KERNEL_ONLY != yes ]]; then diff --git a/lib/functions/rootfs/create-cache.sh b/lib/functions/rootfs/create-cache.sh index 28e244035b..0ce5c2074f 100644 --- a/lib/functions/rootfs/create-cache.sh +++ b/lib/functions/rootfs/create-cache.sh @@ -297,7 +297,7 @@ function create_new_rootfs_cache() { # stage: make rootfs cache archive display_alert "Ending debootstrap process and preparing cache" "$RELEASE" "info" - sync + wait_for_disk_sync "before tar rootfs" # the only reason to unmount here is compression progress display based on rootfs size calculation # also, it doesn't make sense to copy stuff like "/dev" etc. those are filtered below anyway