diff --git a/lib/functions/host/host-utils.sh b/lib/functions/host/host-utils.sh index 56e41ae25e..0a5e3ff013 100644 --- a/lib/functions/host/host-utils.sh +++ b/lib/functions/host/host-utils.sh @@ -272,3 +272,24 @@ function trap_handler_reset_output_owner() { display_alert "Resetting tmp directory owner" "${SRC}/.tmp" "debug" reset_uid_owner_non_recursive "${SRC}/.tmp" } + +# Recursive function to find all descendant processes of a given PID. Writes to stdout. +function list_descendants_of_pid() { + local children + children=$(ps -o "pid=" --ppid "$1" | xargs echo -n) + + for pid in $children; do + list_descendants_of_pid "$pid" + done + + echo -n "${children} " +} + +function get_descendants_of_pid_array() { + local descendants + descendants="$(list_descendants_of_pid "$1")" + display_alert "Descendants of PID $1: ${descendants}" "string - get_descendants_of_pid_array" "debug" + # shellcheck disable=SC2206 # lets expand! + descendants_of_pid_array_result=(${descendants}) + display_alert "Descendants of PID $1: ${descendants_of_pid_array_result[*]}" "array = get_descendants_of_pid_array" "debug" +} diff --git a/lib/functions/logging/logging.sh b/lib/functions/logging/logging.sh index bceaf83015..480be55889 100644 --- a/lib/functions/logging/logging.sh +++ b/lib/functions/logging/logging.sh @@ -30,17 +30,18 @@ function logging_init() { else declare wsl2_type wsl2_detect_type - if [[ "${wsl2_type}" != "none" ]]; then - local windows_emoji="💲" # 💰 or 💲 for M$ -- get it? + if [[ "${wsl2_type}" != "none" ]]; then + local windows_emoji="💲" # 💰 or 💲 for M$ -- get it? declare -g left_marker="${gray_color}[${windows_emoji}|${normal_color}" fi fi } function logging_error_show_log() { + [[ "${SHOW_LOG}" == "yes" ]] && return 0 # Do nothing if we're already showing the log on stderr. + # Do NOT unset CURRENT_LOGFILE here... it's used by traps. + local logfile_to_show="${CURRENT_LOGFILE}" # store current logfile in separate var - unset CURRENT_LOGFILE # stop logging, otherwise crazy - [[ "${SHOW_LOG}" == "yes" ]] && return 0 # Do nothing if we're already showing the log on stderr. if [[ "${CI}" == "true" ]]; then # Close opened CI group, even if there is none; errors would be buried otherwise. echo "::endgroup::" fi @@ -48,14 +49,14 @@ function logging_error_show_log() { if [[ -f "${logfile_to_show}" ]]; then local prefix_sed_contents="${normal_color}${left_marker}${padding}👉${padding}${right_marker} " local prefix_sed_cmd="s/^/${prefix_sed_contents}/;" - display_alert " 👇👇👇 Showing logfile below 👇👇👇" "${logfile_to_show}" "err" + CURRENT_LOGFILE="" display_alert " 👇👇👇 Showing logfile below 👇👇👇" "${logfile_to_show}" "err" # shellcheck disable=SC2002 # my cat is great. thank you, shellcheck. cat "${logfile_to_show}" | grep -v -e "^$" | sed -e "${prefix_sed_cmd}" 1>&2 # write it to stderr!! - display_alert " 👆👆👆 Showing logfile above 👆👆👆" "${logfile_to_show}" "err" + CURRENT_LOGFILE="" display_alert " 👆👆👆 Showing logfile above 👆👆👆" "${logfile_to_show}" "err" else - display_alert "✋ Error log not available at this stage of build" "check messages above" "debug" + CURRENT_LOGFILE="" display_alert "✋ Error log not available at this stage of build" "check messages above" "debug" fi return 0 } @@ -108,25 +109,44 @@ function print_current_asset_log_base_file() { } function check_and_close_fd_13() { + sync # let the disk catch up if [[ -e /proc/self/fd/13 ]]; then - sync # let the disk catch up display_alert "Closing fd 13" "log still open" "cleanup" # no reason to be alarmed exec 13>&- || true # close the file descriptor, lest sed keeps running forever. sync # make sure the file is written to disk - sleep 1 # give it a second to die. else display_alert "Not closing fd 13" "log already closed" "cleanup" fi + # "tee_pid" is a misnomer: it in reality is a shell pid with tee and sed children. display_alert "Checking if global_tee_pid is set and running" "global_tee_pid: ${global_tee_pid}" "cleanup" if [[ -n "${global_tee_pid}" && ${global_tee_pid} -gt 1 ]] && ps -p "${global_tee_pid}" > /dev/null; then - display_alert "Killing global_tee_pid" "${global_tee_pid}" "cleanup" - kill "${global_tee_pid}" && wait "${global_tee_pid}" + display_alert "Killing global_tee_pid's children" "global_tee_pid: ${global_tee_pid}" "cleanup" + + declare -a descendants_of_pid_array_result=() + get_descendants_of_pid_array "${global_tee_pid}" || true + # loop over descendants_of_pid_array_result and kill'em'all + for descendant_pid in "${descendants_of_pid_array_result[@]}"; do + # check if PID is still alive before killing; it might have died already due to death of parent. + if ps -p "${descendant_pid}" > /dev/null; then + display_alert "Killing descendant pid" "${descendant_pid}" "cleanup" + { kill "${descendant_pid}" && wait "${global_tee_pid}"; } || true + else + display_alert "Descendant PID already dead" "${descendant_pid}" "cleanup" + fi + done + + # If the global_tee_pid is still alive, kill it. + if ps -p "${global_tee_pid}" > /dev/null; then + display_alert "Killing global_tee_pid" "${global_tee_pid}" "cleanup" + kill "${global_tee_pid}" && wait "${global_tee_pid}" + else + display_alert "global_tee_pid already dead after descendants killed" "${global_tee_pid}" "cleanup" + fi sync # wait for the disk to catch up else display_alert "Not killing global_tee_pid" "${global_tee_pid} not running" "cleanup" fi - } function discard_logs_tmp_dir() { diff --git a/lib/functions/logging/runners.sh b/lib/functions/logging/runners.sh index 925af27b0a..e03f3b1d3c 100644 --- a/lib/functions/logging/runners.sh +++ b/lib/functions/logging/runners.sh @@ -211,9 +211,8 @@ function run_host_command_logged_raw() { if [[ -f "${CURRENT_LOGFILE}" ]]; then # echo -e "\033[91mBright Red\033[0m" echo -e "\033[91m-->--> command failed with error code ${exit_code} after $((SECONDS - seconds_start)) seconds${normal_color:-}" >> "${CURRENT_LOGFILE}" fi - # This is very specific; remove CURRENT_LOGFILE's value when calling display_alert here otherwise logged twice. - CURRENT_LOGFILE="" display_alert "cmd exited with code ${exit_code}" "$*" "wrn" - CURRENT_LOGFILE="" display_alert "stacktrace for failed command" "$(show_caller_full)" "wrn" # @TODO logfile might be useless without this + display_alert "cmd exited with code ${exit_code}" "$*" "wrn" + display_alert "stacktrace for failed command" "$(show_caller_full)" "wrn" # Obtain extra info about error, eg, log files produced, extra messages set by caller, etc. logging_enrich_run_command_error_info diff --git a/lib/functions/logging/section-logging.sh b/lib/functions/logging/section-logging.sh index 25bcf2118f..ea8030a014 100644 --- a/lib/functions/logging/section-logging.sh +++ b/lib/functions/logging/section-logging.sh @@ -1,11 +1,11 @@ function start_logging_section() { - export logging_section_counter=$((logging_section_counter + 1)) # increment counter, used in filename - export CURRENT_LOGGING_COUNTER + declare -g logging_section_counter=$((logging_section_counter + 1)) # increment counter, used in filename + declare -g CURRENT_LOGGING_COUNTER CURRENT_LOGGING_COUNTER="$(printf "%03d" "$logging_section_counter")" - export CURRENT_LOGGING_SECTION=${LOG_SECTION:-early} # default to "early", should be overwritten soon enough - export CURRENT_LOGGING_SECTION_START=${SECONDS} - export CURRENT_LOGGING_DIR="${LOGDIR}" # set in cli-entrypoint.sh - export CURRENT_LOGFILE="${CURRENT_LOGGING_DIR}/${CURRENT_LOGGING_COUNTER}.000.${CURRENT_LOGGING_SECTION}.log" + declare -g CURRENT_LOGGING_SECTION=${LOG_SECTION:-early} # default to "early", should be overwritten soon enough + declare -g CURRENT_LOGGING_SECTION_START=${SECONDS} + declare -g CURRENT_LOGGING_DIR="${LOGDIR}" # set in cli-entrypoint.sh + declare -g CURRENT_LOGFILE="${CURRENT_LOGGING_DIR}/${CURRENT_LOGGING_COUNTER}.000.${CURRENT_LOGGING_SECTION}.log" mkdir -p "${CURRENT_LOGGING_DIR}" touch "${CURRENT_LOGFILE}" # Touch it, make sure it's writable. @@ -44,14 +44,14 @@ function do_with_logging() { # So whatever is being called, should prevent rogue stuff writing to stderr. # this is mostly handled by redirecting stderr to stdout: 2>&1 + # global var to store the pid of spawned logging process. + declare -g -i global_tee_pid=0 + if [[ "${SHOW_LOG}" == "yes" ]]; then local prefix_sed_contents prefix_sed_contents="$(logging_echo_prefix_for_pv "tool") $(echo -n -e "${tool_color}")" # spaces are significant local prefix_sed_cmd="s/^/${prefix_sed_contents}/;" - # global var to store the pid of spawned logging process. - declare -g -i global_tee_pid=0 - # Create a 13rd file descriptor sending it to sed. https://unix.stackexchange.com/questions/174849/redirecting-stdout-to-terminal-and-file-without-using-a-pipe # Also terrible: don't hold a reference to cwd by changing to SRC always # There's handling of this fd 13 in check_and_close_fd_13() diff --git a/lib/functions/logging/trap-logging.sh b/lib/functions/logging/trap-logging.sh index 8c4d7bcf04..8be303105d 100644 --- a/lib/functions/logging/trap-logging.sh +++ b/lib/functions/logging/trap-logging.sh @@ -19,9 +19,6 @@ function trap_handler_cleanup_logging() { local target_path="${DEST}/logs" mkdir_recursive_and_set_uid_owner "${target_path}" # @TODO: this might be full of logs and is slow - # Check if fd 13 is still open; close it and wait for tee to die. This is done again in discard_logs_tmp_dir() - check_and_close_fd_13 - # Before writing new logfile, compress and move existing ones to archive folder. # - Unless running under CI. # - Also not if signalled via SKIP_LOG_ARCHIVE=yes @@ -53,6 +50,16 @@ function trap_handler_cleanup_logging() { display_alert "Not archiving old logs." "CI=${CI:-false}, SKIP_LOG_ARCHIVE=${SKIP_LOG_ARCHIVE:-no}" "debug" fi + ## Here -- we need to definitely stop logging, cos we're gonna consolidate and delete the logs. + display_alert "End of logging" "STOP LOGGING: CURRENT_LOGFILE: ${CURRENT_LOGFILE}" "debug" + + # Stop logging to file... + CURRENT_LOGFILE="" + unset CURRENT_LOGFILE + + # Check if fd 13 is still open; close it and wait for tee to die. This is done again in discard_logs_tmp_dir() + check_and_close_fd_13 + # Export Markdown assets. local target_file="${target_path}/armbian-${ARMBIAN_LOG_CLI_ID}-${ARMBIAN_BUILD_UUID}.md" export_markdown_logs diff --git a/lib/functions/logging/traps.sh b/lib/functions/logging/traps.sh index 9e68c1875a..e32b831f70 100644 --- a/lib/functions/logging/traps.sh +++ b/lib/functions/logging/traps.sh @@ -78,6 +78,9 @@ function main_trap_handler() { # Run the cleanup handlers, always. run_cleanup_handlers || true ;; + *) + display_alert "main_trap_handler" "Unknown trap type '${trap_type}'" "err" + ;; esac } @@ -163,8 +166,7 @@ function exit_with_error() { #exec {FD}> /var/lock/armbian-debootstrap-losetup #flock -u "${FD}" - # let's try early to close tee/sed and fd 13 which might be opened if this happened in a logging section - check_and_close_fd_13 + # do NOT close the fd 13 here, otherwise the error will not be logged to logfile... exit 43 }