#!/bin/bash # Copyright (C) 2014-2017 Neil Brown # # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # Author: Neil Brown # Email: # This script should be run periodically to automatically # perform a 'check' on any md arrays. # # It supports a 'time budget' such that any incomplete 'check' # will be checkpointed when that time has expired. # A subsequent invocation will allow the 'check' to continue. # # Arrays are identified by UUID and the 'sync_completed' value is stored # in /var/lib/mdcheck/MD_UUID_$UUID. When the script has finished checking # an array, it creates a file /var/lib/mdcheck/Checked_$UUID. # # Modes are: # --continue Don't start new checks, only continue previously started # ones for which MD_UUID_$UUID already exists. # --start Like --continue, but also start new checks for arrays # for which the file Checked_$UUID does not exist. # --restart: Enable restarting checks that were previously finished # (remove all Checked_* files) and exit immediately. # This mode doesn't start any array checks. # # With none of these options given, a new check from 0 will be started on # all arrays, even those that hadn't finished before. # # Options are: # --duration This is passed to "date --date=$duration" to find out # when to finish # If the script is run from systemd, simply write to the journal on stderr. # Otherwise, use logger. log() { if [[ "$INVOCATION_ID" ]]; then echo "$@" >&2 else logger -p daemon.info "mdcheck: $*" fi } # get device name from sysfs devname() { local dev [[ -f "$1/uevent" ]] && \ dev=$(eval "$(cat "$1/uevent")"; echo -n "$DEVNAME") [[ "$dev" && -b "/dev/$dev" ]] || { log "failed to read DEVNAME from $1" return 1 } echo -n "/dev/$dev" } usage() { echo >&2 'Usage: mdcheck [mode] [options] Mode: --help print this help --continue only continue previously started checks --start continue, and start check on arrays that have not been checked --restart re-enable checking previously finished arrays start check from position 0 on all arrays Options: --duration set the amount of time to run the checks for ( must be understood by "date --date")' } set_mode() { [[ "$MODE" ]] && { echo >&2 'ERROR: only one of --continue, --start, or --restart may be set ' usage exit 1 } MODE=$1 } args=$(getopt -o "" -l help,continue,start,restart,duration: -n mdcheck -- "$@") rv=$? if [ $rv -ne 0 ]; then usage exit $rv fi eval set -- $args MODE= endtime= while [ " $1" != " --" ] do case $1 in --help ) usage exit 0 ;; --continue|--start|--restart) set_mode "$1" ;; --duration ) shift; dur=$1 endtime=$(date --date "$dur" "+%s") ;; esac shift done shift [[ $# -eq 0 ]] || { usage exit 1 } case $MODE in --restart) log 'Re-enabling array checks for previously finished arrays' rm -f /var/lib/mdcheck/Checked_* exit 0 ;; "") log 'Starting new check from 0 on all MD RAID arrays' rm -f /var/lib/mdcheck/Checked_* /var/lib/mdcheck/MD_UUID_* ;; esac # We need a temp file occasionally... tmp=/var/lib/mdcheck/.md-check-$$ cnt=0 cleanup() { # We've waited, and there are still checks running. # Time to stop them. for i in `eval echo {1..$cnt}` do eval fl=\$MD_${i}_fl eval sys=\$MD_${i}_sys eval dev=\$MD_${i}_dev if [ -z "$fl" ]; then continue; fi if [ "`cat $sys/md/sync_action`" != 'check' ] then eval MD_${i}_fl= rm -f $fl continue; fi echo idle > $sys/md/sync_action cat $sys/md/sync_min > $fl log pause checking $dev at `cat $fl` done rm -f "$tmp" } trap 'exit 129' 2 3 15 trap 'cleanup' 0 # firstly, clean out really old state files mkdir -p /var/lib/mdcheck find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \; # Now look at each md device. for sync_act in /sys/block/*/md/sync_action do [ -e "$sync_act" ] || continue if [ "`cat $sync_act`" != 'idle' ] then # This array is busy continue fi sys=${sync_act%/md/*} dev=$(devname "$sys") || continue BINDIR/mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue source $tmp [[ "$MD_UUID" ]] || continue fl="/var/lib/mdcheck/MD_UUID_$MD_UUID" checked="${fl/MD_UUID_/Checked_}" if [[ -f "$fl" ]]; then [[ ! -f "$checked" ]] || { log "WARNING: $checked exists, continuing anyway" } start=`cat "$fl"` elif [[ ! -f "$checked" && "$MODE" != --continue ]]; then start=0 else # nothing to do continue fi : "$((cnt+=1))" eval MD_${cnt}_fl=\$fl eval MD_${cnt}_sys=\$sys eval MD_${cnt}_dev=\$dev echo $start > $fl echo $start > $sys/md/sync_min echo check > $sys/md/sync_action log checking $dev from $start done if [ -z "$endtime" ] then exit 0 fi while [ `date +%s` -lt $endtime ] do any= for i in `eval echo {1..$cnt}` do eval fl=\$MD_${i}_fl eval sys=\$MD_${i}_sys eval dev=\$MD_${i}_dev if [ -z "$fl" ]; then continue; fi if [ "`cat $sys/md/sync_action`" != 'check' ] then log finished checking $dev eval MD_${i}_fl= rm -f "$fl" touch "${fl/MD_UUID_/Checked_}" continue; fi read a rest < $sys/md/sync_completed echo $a > $fl any=yes done # mdcheck_continue.timer is started by mdcheck_start.timer. # When the check action can be finished in mdcheck_start.service, # it doesn't need mdcheck_continue anymore. if [ -z "$any" ]; then systemctl stop mdcheck_continue.timer exit 0; fi sleep 220 & wait $! done