mirror of
https://github.com/morgan9e/mirror
synced 2026-04-14 00:14:15 +09:00
1762 lines
57 KiB
Bash
1762 lines
57 KiB
Bash
#!/bin/zsh
|
|
# Simple script to grab the file list from Fedora and rsync everything that's
|
|
# changed since the last time we pulled.
|
|
#
|
|
# Originally written by Jason Tibbitts <tibbs@math.uh.edu> in 2016.
|
|
# Donated to the public domain. If you require a statement of license, please
|
|
# consider this work to be licensed as "CC0 Universal", any version you choose.
|
|
|
|
# Variables in upper case are user configurables.
|
|
|
|
# ZSHISM? Turn on empty globs and globbing of dots
|
|
set -G -4
|
|
export LANG=C
|
|
# ZSHISM? newline for IFS.
|
|
IFS=$'\n'
|
|
|
|
# Declare globals
|
|
typeset -A tcounts # Transfer counts
|
|
|
|
# Do this very early
|
|
starttime=$(date +%s)
|
|
|
|
# Debug output;
|
|
# Level 0: nothing except errors.
|
|
# Level 1: lvl0 unless there is a tranfer, and then basic info and times.
|
|
# Output goes to a file which may be spit out at the end of the run.
|
|
# Level >= 2: Always some info, output to the terminal.
|
|
db1 () {
|
|
if (( VERBOSE >= 2 )); then
|
|
echo $*
|
|
elif (( VERBOSE >= 1 )); then
|
|
echo $* >> $outfile
|
|
fi
|
|
# Otherwise output nothing....
|
|
}
|
|
db1f () { db1 $(printf $*); }
|
|
|
|
db2 () { (( VERBOSE >= 2 )) && echo $*}
|
|
db2f () { (( VERBOSE >= 2 )) && printf $*}
|
|
db3 () { (( VERBOSE >= 3 )) && echo '>>' $*}
|
|
db4 () { (( VERBOSE >= 4 )) && echo '>>>>' $*}
|
|
sep () { (( VERBOSE >= 2 )) && echo '============================================================'}
|
|
|
|
logwrite () {
|
|
# Send logging info to the right place
|
|
if [[ -n $LOGJOURNAL ]]; then
|
|
echo $* >&3
|
|
elif [[ -n $LOGFILE && -w $LOGFILE ]]; then
|
|
echo $(date '+%b %d %T') $* >> $LOGFILE
|
|
fi
|
|
}
|
|
|
|
logit () {
|
|
# Basic logging function
|
|
local item=$1
|
|
shift
|
|
local err=''
|
|
[[ $item == 'E' ]] && err='ERR:'
|
|
[[ $item == 'e' ]] && err='Err:'
|
|
|
|
echo "$item $err $@" >> $sessionlog
|
|
|
|
if [[ $LOGITEMS =~ $item || $LOGITEMS =~ '@' ]]; then
|
|
logwrite $err $*
|
|
fi
|
|
if (( VERBOSE >= 3 )); then
|
|
db3 Log: $err $*
|
|
fi
|
|
|
|
# XXX Consider sending errors to stdout
|
|
#if [[ -n $err ]]; then
|
|
# (>&2 echo $*)
|
|
#fi
|
|
}
|
|
|
|
retcheck () {
|
|
local ret=$1
|
|
local prg=''
|
|
[[ -n $2 ]] && prg="$2 "
|
|
|
|
if [[ $ret -ne 0 ]]; then
|
|
db1 "${prg}failed at $functrace[1]: with return $ret"
|
|
logit E "${prg}call failed at $functrace[1]: with return $ret"
|
|
fi
|
|
}
|
|
|
|
lock () {
|
|
eval "exec 9>>$1"
|
|
flock -n 9 && return 0
|
|
return 1
|
|
}
|
|
|
|
save_state () {
|
|
# Doing an mv here actually undoes the locking. Could use cp instead.
|
|
# Currently the unlocking is a good thing because it allows the checkin to
|
|
# proceed without the next run waiting. But this should be audited.
|
|
if [[ -z $skiptimestamp ]]; then
|
|
db2 Saving mirror time to $TIMEFILE
|
|
if [[ -e $TIMEFILE ]]; then
|
|
mv $TIMEFILE $TIMEFILE.prev
|
|
fi
|
|
echo LASTTIME=$starttime > $TIMEFILE
|
|
|
|
if (( ? != 0 )); then
|
|
(>&2 echo Problem saving timestamp file $TIMEFILE)
|
|
logit E "Failed to update timestamp file"
|
|
exit 1
|
|
fi
|
|
else
|
|
db2 Skipping timestamp save.
|
|
fi
|
|
}
|
|
|
|
append_state () {
|
|
# Think about how to save extra state in the timestamp file or some
|
|
# associated file. Should we even do this?
|
|
# Should this be saved to a separate status file instead?
|
|
|
|
|
|
# Cannot rewrite the file or else the locking breaks. Updating it should
|
|
# be OK.
|
|
# Save things in a format that can be sourced (VAR=value).
|
|
# Repeated uses (VAR=value2) are OK and overwrite the previous value when the file is sourced.
|
|
|
|
# What would use this? A separate status program or some other monitor?
|
|
#
|
|
# Save data about the current transfer:
|
|
# The current point in the process (
|
|
# Counts
|
|
# The current tempdir
|
|
# Important transfer list files
|
|
# The current rsync output file (for tailing and counting) since this is random.
|
|
|
|
}
|
|
|
|
cat_or_email () {
|
|
# Output the contents of a file, either to stdout or in an email
|
|
local file=$1
|
|
|
|
if [[ ( ! -t 0 ) && ( -n "$EMAILDEST" ) ]]; then
|
|
mail -E -s "$EMAILSUBJECT" "$EMAILDEST" < $file
|
|
else
|
|
cat $file
|
|
fi
|
|
}
|
|
|
|
finish () {
|
|
# Finish up, either dumping output to stdout or, if email is configured and
|
|
# not running interactively, send email.
|
|
#
|
|
# Takes two optional arguments. The first is the return value; the script
|
|
# will exit with that value and will dump the output file to stdout if the
|
|
# value is nonzero. If the second is nonempty, the output will be
|
|
# dumped/mailed regardless of the return value.
|
|
local ret=$1
|
|
local out=$2
|
|
db1 "========================="
|
|
db1 "Mirror finished: $(date) ($ret)"
|
|
logit R "Run end; exiting $ret."
|
|
if [[ $ret -gt 0 || -n $out ]]; then
|
|
cat_or_email $outfile
|
|
fi
|
|
exit $ret
|
|
}
|
|
|
|
filter () {
|
|
# Client-side file list filtering.
|
|
if [[ -n $FILTEREXP ]]; then
|
|
db4 filtering $1
|
|
sed --in-place=-prefilter -r -e "\,$FILTEREXP,d" $1
|
|
fi
|
|
}
|
|
|
|
hr_b () {
|
|
# Produce human-readable byte counts
|
|
# Yes, this has a bug at 1024EB
|
|
typeset -F2 out
|
|
|
|
if [[ $1 -lt 1024 ]]; then
|
|
echo ${1}B
|
|
return
|
|
fi
|
|
|
|
out=$(( $1 / 1024. ))
|
|
for unit in KB MB GB TB PB EB; do
|
|
(( $out < 1024 )) && break
|
|
out=$(( out / 1024. ))
|
|
done
|
|
|
|
echo ${out}${unit}
|
|
}
|
|
|
|
hr_s () {
|
|
# Produce human-readable second counts
|
|
typeset -F2 out=$1
|
|
|
|
if [[ $1 -lt 60 ]]; then
|
|
echo ${1}s
|
|
return
|
|
fi
|
|
|
|
out=$(( $1 / 60. ))
|
|
if [[ $out -lt 60 ]]; then
|
|
echo ${out}m
|
|
return
|
|
fi
|
|
|
|
out=$(( $out / 60. ))
|
|
echo ${out}h
|
|
}
|
|
|
|
parse_rsync_stats () {
|
|
# Parse some of the statistics that rsync gives us.
|
|
# Takes an rsync output log (stdout) as an argument.
|
|
# No return value, but sill set several global variables:
|
|
# rsfilestransferred
|
|
# rsfilesize
|
|
# rstotalbytesreceived
|
|
# rstotalbytessent
|
|
# rsfilelistgentime
|
|
# rsfilelisttransfertime
|
|
# rstransferspeed
|
|
# rsspeedup
|
|
# These will all be set unset if not present in the given log.
|
|
#
|
|
# Here's the full block of info that rsync provides:
|
|
#
|
|
# rsync[30399] (receiver) heap statistics:
|
|
# arena: 311296 (bytes from sbrk)
|
|
# ordblks: 2 (chunks not in use)
|
|
# smblks: 1
|
|
# hblks: 2 (chunks from mmap)
|
|
# hblkhd: 532480 (bytes from mmap)
|
|
# allmem: 843776 (bytes from sbrk + mmap)
|
|
# usmblks: 0
|
|
# fsmblks: 48
|
|
# uordblks: 178272 (bytes used)
|
|
# fordblks: 133024 (bytes free)
|
|
# keepcost: 131200 (bytes in releasable chunk)
|
|
#
|
|
# rsync[30394] (generator) heap statistics:
|
|
# arena: 311296 (bytes from sbrk)
|
|
# ordblks: 2 (chunks not in use)
|
|
# smblks: 1
|
|
# hblks: 2 (chunks from mmap)
|
|
# hblkhd: 532480 (bytes from mmap)
|
|
# allmem: 843776 (bytes from sbrk + mmap)
|
|
# usmblks: 0
|
|
# fsmblks: 48
|
|
# uordblks: 178208 (bytes used)
|
|
# fordblks: 133088 (bytes free)
|
|
# keepcost: 131200 (bytes in releasable chunk)
|
|
#
|
|
# Number of files: 11,140 (reg: 9,344, dir: 1,796)
|
|
# Number of created files: 1,329 (reg: 1,327, dir: 2)
|
|
# Number of deleted files: 0
|
|
# Number of regular files transferred: 1,182
|
|
# Total file size: 165,405,056,029 bytes
|
|
# Total transferred file size: 3,615,178,247 bytes
|
|
# Literal data: 3,229,943,512 bytes
|
|
# Matched data: 385,234,735 bytes
|
|
# File list size: 468,791
|
|
# File list generation time: 0.217 seconds
|
|
# File list transfer time: 0.000 seconds
|
|
# Total bytes sent: 1,249,286
|
|
# Total bytes received: 3,231,373,895
|
|
#
|
|
# sent 1,249,286 bytes received 3,231,373,895 bytes 81,838,561.54 bytes/sec
|
|
# total size is 165,405,056,029 speedup is 51.17
|
|
|
|
local log=$1
|
|
|
|
# Number of regular files transferred: 1
|
|
unset rsfilestransferred
|
|
rsfilestransferred=$(awk '/^Number of regular files transferred:/ {print $6; exit}' $log)
|
|
|
|
# Total file size: 10,174,746 bytes
|
|
unset rsfilesize
|
|
rsfilesize=$(awk '/^Total file size: (.*) bytes/ {print $4; exit}' $log | sed -e 's/,//g')
|
|
|
|
# Total bytes received: 2,425,728
|
|
unset rstotalbytesreceived
|
|
rstotalbytesreceived=$(awk '/^Total bytes received: (.*)/ {print $4; exit}' $log | sed -e 's/,//g')
|
|
|
|
# Total bytes sent: 384,602
|
|
unset rstotalbytessent
|
|
rstotalbytessent=$(awk '/^Total bytes sent: (.*)/ {print $4; exit}' $log | sed -e 's/,//g')
|
|
|
|
# File list generation time: 0.308 seconds
|
|
unset rsfilelistgentime
|
|
rsfilelistgentime=$(awk '/^File list generation time: (.*) seconds/ {print $5; exit}' $log)
|
|
|
|
# File list transfer time: 0.000 seconds
|
|
unset rsfilelisttransfertime
|
|
rsfilelisttransfertime=$(awk '/^File list transfer time: (.*) seconds/ {print $5; exit}' $log)
|
|
|
|
# sent 71 bytes received 2,425,728 bytes 156,503.16 bytes/sec
|
|
unset rstransferspeed
|
|
rstransferspeed=$(awk '/^sent .* bytes .* received .* bytes (.*) bytes\/sec$/ {print $7; exit}' $log \
|
|
| sed -e 's/,//g')
|
|
|
|
# total size is 10,174,746 speedup is 4.19
|
|
unset rsspeedup
|
|
rsspeedup=$(awk '/^total size is .* speedup is (.*)$/ {print $7; exit}' $log)
|
|
}
|
|
|
|
do_rsync () {
|
|
# The main function to do a transfer
|
|
# Accepts four options:
|
|
# 1) The source repository
|
|
# 2) The destination directory
|
|
# 3) The list of files
|
|
# 4) The name of an array containing additional rsync options
|
|
#
|
|
# This may sleep and retry when receiving specific errors.
|
|
# Returns the rsync return code (where 0 indicates full success, but other
|
|
# values may indicate a finished copy).
|
|
|
|
local src=$1 dest=$2 files=$3 opts=$4
|
|
local runcount=0
|
|
local log=$(mktemp -p . rsync-out-XXXXXX.log)
|
|
local errlog=$(mktemp -p . rsync-err-XXXXXX.log)
|
|
local sleep rr rvbash rvzsh
|
|
local rsyncto="--timeout=$RSYNCTIMEOUT"
|
|
|
|
local -a verboseopts flopts allopts
|
|
|
|
# These add to the default rsync verbosity
|
|
(( VERBOSE >= 7 )) && verboseopts+=(--progress)
|
|
(( VERBOSE >= 5 )) && verboseopts+=(-v)
|
|
(( VERBOSE >= 4 )) && verboseopts+=(-v)
|
|
|
|
# Usually we won't want to see this.
|
|
(( VERBOSE <= 3 )) && verboseopts+=(--no-motd)
|
|
|
|
verboseopts+=(--progress)
|
|
# verboseopts+=(--info=progress2)
|
|
|
|
flopts=("--files-from=$files")
|
|
allopts=($rsyncto $RSYNCOPTS $verboseopts $flopts ${(P)opts} $src $dest)
|
|
|
|
while true; do
|
|
runcount=$(( runcount+1 ))
|
|
# ZSHISM: (P) flag to act on a variable by name. Sadly, bash has
|
|
# broken array handling. bash 4.3 has local -n for this. Older bash
|
|
# needs hacks, or eval. More info:
|
|
# https://stackoverflow.com/questions/1063347/passing-arrays-as-parameters-in-bash
|
|
# Or just use a freaking global.
|
|
|
|
# We have to do this separately because you can't redirect to /dev/stderr when running under sudo.
|
|
# ZSHISM Teeing both stderr and stdout while keeping the return code is
|
|
# easy in zsh with multios but seems to be terribly difficult under bash.
|
|
db3 Calling $RSYNC $allopts
|
|
logit c calling $RSYNC $allopts
|
|
# XXX background, then save $!, write it to the session log and wait on it.
|
|
if (( VERBOSE >= 5 )); then
|
|
$RSYNC $allopts 1>&1 2>&2 >> $log 2>> $errlog
|
|
elif (( VERBOSE >= 2 )); then
|
|
$RSYNC $allopts >> $log 2>&2 2>> $errlog
|
|
else
|
|
$RSYNC $allopts >> $log 2>> $errlog
|
|
fi
|
|
rr=$?
|
|
|
|
# Check return values
|
|
if (( rr == 0 )); then
|
|
logit C rsync call completed succesfully with return $rr
|
|
parse_rsync_stats $log
|
|
return 0
|
|
|
|
elif (( rr == 24 )); then
|
|
# 24: Partial transfer due to vanished source files
|
|
logit e "rsync says source files vanished."
|
|
return $rr
|
|
|
|
elif (( rr == 5 || rr == 10 || rr == 23 || rr == 30 || rr == 35 )); then
|
|
# Most of these are retryable network issues
|
|
# 5: Error starting client-server protocol
|
|
# 10: Error in socket I/O
|
|
# 30: Timeout in data send/receive
|
|
# 35: Timeout waiting for daemon connection
|
|
# 23: Partial transfer due to error
|
|
# (could be a file list problem)
|
|
if [[ $rr -eq 23 && -f $errlog ]] ; then
|
|
# See if it we tried to tranfer files that don't exist
|
|
grep -q '^rsync: link_stat .* failed: No such file or directory (2)$' $errlog
|
|
if (( ? == 0 )); then
|
|
logit e "Looks like the file list is outdated."
|
|
(>&2 echo "Looks like the file list is outdated.")
|
|
[[ -f $errlog ]] && (>&2 cat $errlog)
|
|
return $rr
|
|
fi
|
|
fi
|
|
|
|
# It's not one of those special 23 errors, so we may retry. First
|
|
# see if we've already tried too many times.
|
|
if (( runcount >= MAXRETRIES )); then
|
|
logit E rsync from $REMOTE failed
|
|
(>&2 echo "Could not sync from $REMOTE")
|
|
[[ -f $errlog ]] && (>&2 cat $errlog)
|
|
return $rr
|
|
fi
|
|
|
|
# Then sleep for a bit
|
|
sleep=$(( 2 ** runcount ))
|
|
logit e "rsync returned $rr (retryable), sleeping for $sleep"
|
|
db2 rsync failed: sleeping for $sleep
|
|
sleep $sleep
|
|
continue
|
|
fi
|
|
|
|
# We only get here if we got a return we didn't expect
|
|
logit E "rsync returned $rr, which was not expected."
|
|
(>&2 echo "rsync returned $rr, which was not expected."
|
|
[[ -f $errlog ]] && cat $errlog
|
|
)
|
|
return $rr
|
|
done
|
|
}
|
|
|
|
usage () {
|
|
cat <<END
|
|
Usage: quick-fedora-mirror [OPTION]
|
|
|
|
Update a local mirror of Fedora content via rsync and perform a mirrormanager
|
|
checkin.
|
|
|
|
Requires a configuration file; will search for this file in the following
|
|
locations:
|
|
|
|
The path provided by -c/--config.
|
|
/etc/quick-fedora-mirror/quick-fedora-mirror.conf
|
|
/etc/quick-fedora-mirror.conf
|
|
~/.config/quick-fedora-mirror.conf
|
|
quick-fedora-mirror.conf in the same directory as this script.
|
|
quick-fedora-mirror.conf in the current directory.
|
|
|
|
Options:
|
|
-a, --alwayscheck Always compare local content with file lists, even if
|
|
file lists have not changed.
|
|
-c, --config PATH Specify configuration file instead of searching.
|
|
-d LEVEL Specify debugging level (0-9).
|
|
-h, --help This message.
|
|
-n, --dry-run Show what would be transferred, but do not actually
|
|
transfer, delete or check in.
|
|
-N, --transfer-only Download, but do not delete or check in.
|
|
-t TIMESTAMP Use TIMESTAMP (in seconds since epuch) as the last
|
|
mirror time.
|
|
-T, --backdate TIME Use TIME (a human readable date) as the last mirror
|
|
time.
|
|
END
|
|
#--checkin-only Force a mirrormanager checkin for all modules, but do
|
|
# not transfer, delete or update the timestamp.
|
|
#--dir-times Update all directory times. (Not implemented.)
|
|
#--refresh REGEX Re-transfer all paths matching REGEX. (Not implemented.)
|
|
}
|
|
|
|
parse_args () {
|
|
# Process arguments, setting all sorts of globals
|
|
while [[ $# > 0 ]]; do
|
|
opt=$1
|
|
case $opt in
|
|
-a | --alwayscheck)
|
|
alwayscheck=1
|
|
;;
|
|
-c | --config)
|
|
cfgfile=$2
|
|
shift
|
|
if [[ ! -r $cfgfile ]]; then
|
|
(>&2 echo Cannot read $cfgfile)
|
|
exit 1
|
|
fi
|
|
;;
|
|
-d) # Debugging
|
|
verboseopt=$2
|
|
shift
|
|
;;
|
|
-h | --help)
|
|
usage
|
|
exit 1
|
|
;;
|
|
-n | --dry-run)
|
|
rsyncdryrun=1
|
|
skipdelete=1
|
|
skiptimestamp=1
|
|
;;
|
|
-N | --transfer-only)
|
|
skipdelete=1
|
|
skiptimestamp=1
|
|
;;
|
|
-t )
|
|
backdate=$2
|
|
alwayscheck=1
|
|
shift
|
|
;;
|
|
-T | --backdate)
|
|
backdate=$(date -d "$2" +%s)
|
|
alwayscheck=1
|
|
shift
|
|
;;
|
|
--checkin-only)
|
|
skiptransfer=1
|
|
skipdelete=1
|
|
skiptimestamp=1
|
|
forcecheckin=1
|
|
;;
|
|
--dir-times)
|
|
updatealldirtimes=1
|
|
alwayscheck=1
|
|
;;
|
|
--refresh)
|
|
skipdelete=1
|
|
skiptimestamp=1
|
|
skipcheckin=1
|
|
refreshpattern=$2
|
|
shift
|
|
;;
|
|
--dump-mm-checkin)
|
|
# Just for the test suite; dump the raw payload to the given
|
|
# filename with the module name appended.
|
|
dumpmmcheckin=$2
|
|
shift
|
|
;;
|
|
--no-paranoia)
|
|
# Don't backdate the last mirrortime
|
|
noparanoia=1
|
|
;;
|
|
*)
|
|
(>&2 echo "Unrecognized argument.")
|
|
exit 1
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
}
|
|
|
|
read_config () {
|
|
# As a convenience, make sure $HOSTNAME is set
|
|
if [[ -z "$HOSTNAME" ]]; then
|
|
HOSTNAME=$(hostname)
|
|
fi
|
|
# Load up the configuration file from any of a number of locations
|
|
local file
|
|
for file in \
|
|
$cfgfile \
|
|
/etc/quick-fedora-mirror/quick-fedora-mirror.conf \
|
|
/etc/quick-fedora-mirror.conf \
|
|
~/.config/quick-fedora-mirror.conf \
|
|
$(dirname $0)/quick-fedora-mirror.conf \
|
|
./quick-fedora-mirror.conf; \
|
|
do
|
|
if [[ -r $file ]]; then
|
|
source $file
|
|
cfgfile=$file
|
|
break
|
|
fi
|
|
done
|
|
|
|
# Override some settings with previously parsed command-line options
|
|
[[ -n $verboseopt ]] && VERBOSE=$verboseopt
|
|
|
|
# Check that the required parameters were provided
|
|
if [[ -z $DESTD ]]; then
|
|
(>&2 echo "You must define DESTD in your configuration file ($cfgfile).")
|
|
fi
|
|
if [[ -z $TIMEFILE ]]; then
|
|
(>&2 echo "You must define TIMEFILE in your configuration file ($cfgfile).")
|
|
fi
|
|
|
|
# Set some other general variables based on the value of provided
|
|
# configuration settings
|
|
[[ -z $CHECKIN_SITE ]] && skipcheckin=1
|
|
[[ -z $MAXCHECKINRETRIES ]] && MAXCHECKINRETRIES=$MAXRETRIES
|
|
}
|
|
|
|
set_default_vars () {
|
|
# Set various defaults before the configuration file is loaded.
|
|
|
|
# Mapping from module names to directories under fedora-buffet
|
|
# ZSHISM (initialize associative array)
|
|
typeset -g -A MODULEMAPPING
|
|
typeset -g -A MIRRORMANAGERMAPPING
|
|
typeset -g -A MIRRORMANAGERMODULEMAPPING
|
|
|
|
MODULEMAPPING=(
|
|
fedora-alt alt
|
|
fedora-archive archive
|
|
fedora-enchilada fedora
|
|
fedora-epel epel
|
|
fedora-secondary fedora-secondary
|
|
)
|
|
|
|
MIRRORMANAGERMAPPING=(
|
|
fedora-alt 'fedora other'
|
|
fedora-archive 'fedora archive'
|
|
fedora-enchilada 'fedora linux'
|
|
fedora-epel 'fedora epel'
|
|
fedora-secondary 'fedora secondary arches'
|
|
)
|
|
|
|
# Mirrormanager has a weird prefix for "fedora-enchilada", so copy the
|
|
# existing module mapping and alter it
|
|
MIRRORMANAGERMODULEMAPPING=(${(kv)MODULEMAPPING})
|
|
MIRRORMANAGERMODULEMAPPING[fedora-enchilada]="fedora/linux"
|
|
|
|
# Default arguments; override in quick-fedora-mirror.conf
|
|
VERBOSE=0
|
|
LOGITEMS=aeElrR
|
|
|
|
DESTD=
|
|
TIMEFILE=
|
|
|
|
CHECKIN_HOST=$(hostname)
|
|
CURL=/usr/bin/curl
|
|
FILELIST='fullfiletimelist-$mdir'
|
|
EXTRAFILES=(fullfilelist imagelist-\$mdir)
|
|
MIRRORMANAGER=https://admin.fedoraproject.org/mirrormanager/xmlrpc
|
|
REMOTE=rsync://dl.fedoraproject.org
|
|
RSYNC=/usr/bin/rsync
|
|
RSYNCTIMEOUT=$((60 * 10))
|
|
WARNDELAY=$((60 * 60 * 24))
|
|
MAXRETRIES=10
|
|
|
|
rsyncver=$(rsync --version | head -1 | awk '{print $3}')
|
|
if [[ $rsyncver == 3.1.3 ]]; then
|
|
# 3.1.3 has broken support for --preallocate and -S (--sparse) together
|
|
RSYNCOPTS=(-aSH -f 'R .~tmp~' --stats --delay-updates --out-format='@ %i %10l %n%L')
|
|
elif [[ $rsyncver == 3.1* ]]; then
|
|
RSYNCOPTS=(-aSH -f 'R .~tmp~' --stats --preallocate --delay-updates --out-format='@ %i %10l %n%L')
|
|
else
|
|
RSYNCOPTS=(-aSH -f 'R .~tmp~' --stats --delay-updates --out-format='@ %i %10l %n%L')
|
|
fi
|
|
|
|
MASTERMODULE=fedora-buffet
|
|
MODULES=(fedora-enchilada fedora-epel)
|
|
}
|
|
|
|
check_file_list_version () {
|
|
# Look at the file list to see if we can handle it
|
|
#
|
|
# Takes the file list name.
|
|
# Returns 0 if we can handle it, 1 if we can't.
|
|
local max_fl_version=3
|
|
local fl=$1
|
|
|
|
if [[ ! -f $fl ]]; then
|
|
(>&2 echo "Cannot check file list \"$fl\". Exiting.")
|
|
exit 1
|
|
fi
|
|
|
|
local flversion=$(awk -F '\t' '/^\[Version/ {s=1; next} /^$/ {exit} {if (s) print $0}' < $fl)
|
|
if [[ "$flversion" -le $max_fl_version ]]; then
|
|
return
|
|
fi
|
|
|
|
# Either it is too new or we just can't parse it, so quit.
|
|
(>&2 echo "File list from the mirror cannot be processed by this script. Exiting.")
|
|
exit 1
|
|
}
|
|
|
|
clean_all_transfer_temps () {
|
|
# Delete temporary transfer files, but not any log files.
|
|
# Be sure to add any extra generated temporaries here.
|
|
# XXX Is it OK that this doesn't delete the file lists? They will just get
|
|
# copied over.
|
|
rm -f *.old
|
|
for i in ${(v)MODULEMAPPING} alldirs allfiles allfilesizes changedpaths \
|
|
changes checksumfailed checksums deletedirs deletefiles flist \
|
|
localdirs localfiles localfilesizes localfulllist master missingdirs \
|
|
missingfiles newdirs newfiles staletmpdirs staletmpfiles \
|
|
transferlist updatedfiles updatetimestamps; do
|
|
rm -f $i-*
|
|
done
|
|
}
|
|
|
|
clean_single_rsync_temp () {
|
|
# Move a single rsync temporary file one directory up in the hierarchy
|
|
#
|
|
# rsync (at least version 3.2.3) appears to have some sort of bug which
|
|
# causes it to fail to sync some files. The working theory is that this
|
|
# happens for small files which need timestamp changes. It has been
|
|
# observed for various .treeinfo (max 1550b), .discinfo (46b max) and a
|
|
# README.html (479b) file. When this manifests, a run will never complete
|
|
# because rsync will fail to transfer the file and move it into the .~tmp~
|
|
# directory, while q-f-m will move it back where it will tail to transfer
|
|
# again.
|
|
#
|
|
# As a workaround for this, we simply delete "small" files (2kb) instead of
|
|
# moving them. Since the number of problem files appears to be small and
|
|
# small files will transfer quickly, this should have little effect on the
|
|
# overall transfer.
|
|
local file=$1
|
|
local size=$(stat -c '%s' $1)
|
|
|
|
db3 "XXXXXX $file $size"
|
|
|
|
if [[ -n $RSYNC_PARTIAL_DIR_BUG && "$size" -lt 2048 ]]; then
|
|
logit A Deleting small previous download $file
|
|
db3 Deleting small previous download: $file
|
|
rm -f $file
|
|
elif [[ ! -f ../$file ]]; then
|
|
logit A Saving previous download $file
|
|
db3 Saving previous download: $file
|
|
mv $file ..
|
|
elif [[ -n $RSYNC_PARTIAL_DIR_BUG ]]; then
|
|
logit A Deleting partial download $file
|
|
db3 Deleting partial download: $file
|
|
rm -f $file
|
|
fi
|
|
}
|
|
|
|
clean_stale_rsync_temps () {
|
|
# Clean up temporaries left over from a previous aborted rsync run.
|
|
local mod=$1
|
|
|
|
db2 Possibly aborted rsync run. Cleaning up.
|
|
logit a "cleaning up previous aborted run: $(wc -l < staletmpfiles-$mod) file(s)."
|
|
|
|
# Move the files in those tmpdirs a level up if a file with the same name
|
|
# doesn't exist (and just delete the temp file if it does). We don't
|
|
# update the file lists because we want rsync to re-check those files and
|
|
# possibly fix up the permissions. The dirs will be cleaned up later.
|
|
#
|
|
# Note that this _may_ leave a few files around which should not be there,
|
|
# and of course the content (and even partial content) will be visible
|
|
# before it technically should be. But that's better than nothing getting
|
|
# done because aborted runs cause an endless buildup of partial transfers.
|
|
# Extra files, if present, will of course be cleaned up at the next run and
|
|
# rsync sill handle completing any partial downloads.
|
|
#
|
|
# XXX We could do better by comparing the stale files against the
|
|
# to-be-fransferred list and only move things which are going to be
|
|
# download in the run, but it's probably not worth it.
|
|
for dir in $(cat staletmpdirs-$mod); do
|
|
pushd "$DESTD/$dir"
|
|
for file in *; do
|
|
clean_single_rsync_temp $file
|
|
done
|
|
popd
|
|
# It may be useful to clean up the temp directory, but in many cases
|
|
# rsync will just recreate it and in any case it really should now be
|
|
# empty.
|
|
# rmdir $DESTD/$dir
|
|
done
|
|
}
|
|
|
|
fetch_file_lists () {
|
|
# Download the file list for each configred module
|
|
# Will set the global variable "checksums" containing the checksum of the
|
|
# file list of each module that exists on the client at the beginning of the transfer.
|
|
|
|
local extra flname module rsyncreturn
|
|
|
|
sep
|
|
logit o Remote file list download start
|
|
db2 Downloading file lists
|
|
# ZSHISM (declare associative array)
|
|
typeset -g -A checksums
|
|
checksums=()
|
|
for module in $MODULES; do
|
|
# ZSHISM? (associative array indexing)
|
|
moduledir=$MODULEMAPPING[$module]
|
|
mkdir $moduledir
|
|
flname=${FILELIST/'$mdir'/$moduledir}
|
|
if [[ -f $DESTD/$moduledir/$flname ]]; then
|
|
cp -p $DESTD/$moduledir/$flname $moduledir
|
|
ln $moduledir/$flname $moduledir/$flname.old
|
|
# ZSHISM (assign assoc. array value)
|
|
checksums[$module]=$(sha1sum $DESTD/$moduledir/$flname | cut -d' ' -f1)
|
|
fi
|
|
|
|
echo $moduledir/$flname >> filelist-transferlist
|
|
done
|
|
|
|
extra=(--no-dirs --relative --compress)
|
|
do_rsync $REMOTE/$MASTERMODULE/ . filelist-transferlist extra
|
|
rsyncreturn=$?
|
|
if [[ $rsyncreturn -ne 0 ]]; then
|
|
(>&2 echo "rsync finished with nonzero exit status.\nCould not retrieve file lists.")
|
|
logit E Aborting due to rsync failure while retrieving file lists
|
|
finish 1
|
|
fi
|
|
|
|
# Log very basic stats
|
|
logit s "File list download: $(hr_b $rstotalbytesreceived) received, $(hr_b $rstransferspeed)/s"
|
|
|
|
# Check that we can handle the downloaded lists
|
|
for module in $MODULES; do
|
|
moduledir=$MODULEMAPPING[$module]
|
|
flname=${FILELIST/'$mdir'/$moduledir}
|
|
check_file_list_version $moduledir/$flname
|
|
done
|
|
|
|
# rsync won't transfer those files to the current directory, so move them and
|
|
# clean up.
|
|
mv */* .
|
|
rmdir * 2> /dev/null
|
|
logit o Remote file list download: end
|
|
}
|
|
|
|
checkin_build_inner_payload () {
|
|
# Build the inner json payload
|
|
# Takes the module name and the name of the output file to use
|
|
local module=$1
|
|
local mm=$2
|
|
local checkinhost=$3
|
|
|
|
local moduledir=$MIRRORMANAGERMODULEMAPPING[$module]
|
|
local mmcheckin=$MIRRORMANAGERMAPPING[$module]
|
|
|
|
cat >$mm <<EOF
|
|
{
|
|
"$mmcheckin": {
|
|
"dirtree": {
|
|
EOF
|
|
|
|
# Output the data for each directory. MM doesn't want the
|
|
# directory name.
|
|
for l in $(cat alldirs-$module); do
|
|
echo " \"${l/$moduledir\/}\": {}," >>$mm
|
|
done
|
|
|
|
# The data sent by report_mirror always includes a blank directory; add it
|
|
# manually here which conveniently means we don't have to deal with the
|
|
# trailing comma. And after that, the various parameters mirrormanager
|
|
# wants.
|
|
cat >>$mm <<EOF
|
|
"": {}
|
|
},
|
|
"enabled": "1"
|
|
},
|
|
"global": {
|
|
"enabled": "1",
|
|
"server": "$MIRRORMANAGER"
|
|
},
|
|
"host": {
|
|
"enabled": "1",
|
|
"name": "$checkinhost"
|
|
},
|
|
"site": {
|
|
"enabled": "1",
|
|
"name": "$CHECKIN_SITE",
|
|
"password": "$CHECKIN_PASSWORD"
|
|
},
|
|
"stats": {},
|
|
"version": 0
|
|
}
|
|
EOF
|
|
}
|
|
|
|
checkin_encode_inner_payload () {
|
|
# Compress and encode the inner payload.
|
|
# Takes the input and output filenames
|
|
|
|
local in=$1
|
|
local out=$2
|
|
|
|
# The xmlrpc endpoint requires that the payload be bzip2 compressed
|
|
bzip2 $mm
|
|
|
|
# base64 encode
|
|
base64 --wrap=0 $in.bz2 > $in.bz2.b64
|
|
|
|
# change '+' to '-' and '/' to '_'
|
|
tr '+/' '-_' < $in.bz2.b64 > $out
|
|
|
|
rm $in.bz2 $in.bz2.b64
|
|
}
|
|
|
|
checkin_build_outer_payload () {
|
|
# Wrap the encoded payload in just the right xml
|
|
# Takes input and output filenames
|
|
|
|
local in=$1
|
|
local out=$2
|
|
|
|
cat >>$out <<EOF
|
|
<?xml version='1.0'?>
|
|
<methodCall>
|
|
<methodName>checkin</methodName>
|
|
<params>
|
|
<param>
|
|
EOF
|
|
echo -n "<value><string>" >>$out
|
|
|
|
cat <$in >>$out
|
|
|
|
cat >>$out <<EOF
|
|
</string></value>
|
|
</param>
|
|
</params>
|
|
</methodCall>
|
|
EOF
|
|
}
|
|
|
|
checkin_upload_payload () {
|
|
# Now actually upload the payload
|
|
# We have to remove the Expect: header that curl sends but which mirrormanager cannot handle
|
|
local payload=$1
|
|
local module=$2
|
|
local -a curlopts
|
|
local curlret
|
|
|
|
logit M "Making xmlrpc call for $module"
|
|
curlopts=(--silent)
|
|
curl --help | grep -q http1\.1
|
|
(( ? == 0 )) && curlopts+=(--http1.1)
|
|
(( VERBOSE >= 4 )) && curlopts=(--verbose)
|
|
db3 "$CURL $curlopts -H \"Expect:\" -H \"Content-Type: text/xml\" --data @$mx $MIRRORMANAGER"
|
|
$CURL $curlopts -H "Expect:" -H "Content-Type: text/xml" --data @$mx $MIRRORMANAGER > curl.out
|
|
curlret=$?
|
|
if [[ $curlret -ne 0 ]]; then
|
|
logit e "Checkin failure: curl returned $curlret"
|
|
(>&2 echo "Checkin failure: curl returned $curlret")
|
|
return 2
|
|
fi
|
|
|
|
# Parse the output to see if we got any useful return
|
|
# The sed call attempts to strip xml tags. Easily fooled but we don't expect
|
|
# any complicated return from mirrormanager.
|
|
sed -e 's/<[^>]*>//g' curl.out > curl.noxml
|
|
grep -q -i successful curl.noxml
|
|
|
|
if [[ $? -ne 0 ]]; then
|
|
db1 "Mirrormanager checkin for $module did not appear to succeed."
|
|
logit e "Doesn't look like we got a good return from mirrormanager."
|
|
logit e $(cat curl.noxml)
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
checkin_module () {
|
|
# Perform the mirrormanager checkin for a particular module
|
|
local module=$1
|
|
|
|
local mm=mirrormanager-payload-$module
|
|
local mx=mirrormanager-xmlrpc-$module
|
|
local moduledir=$MODULEMAPPING[$module]
|
|
|
|
if [[ ! -f alldirs-$module ]]; then
|
|
# We were asked to check in a module that we hadn't previously
|
|
# processed, which should not happen.
|
|
logit E "Cannot perform checkin for $module; no directory list exists."
|
|
return
|
|
fi
|
|
|
|
# Determine the "mirrormanager hostname" to use for this checkin.
|
|
# Different modules can be set up under different "hosts" in mirrormanager,
|
|
# even though these might all be on the same machine. This works around
|
|
# problems mirrormanager has when crawling machines which mirror
|
|
# everything.
|
|
# ZSHISM: This uses "(P)"; the equivalent in bash is "!".
|
|
local checkinhost=$CHECKIN_HOST
|
|
local hostspecificvar=CHECKIN_HOST_${module//-/_}
|
|
if [[ -n ${(P)hostspecificvar} ]]; then
|
|
checkinhost=${(P)hostspecificvar}
|
|
fi
|
|
|
|
db3 "Performing mirrormanager checkin for $module (in $moduledir) as $checkinhost"
|
|
logit M "Processing $module (in $moduledir) as $checkinhost"
|
|
|
|
# Construct the checkin payload
|
|
checkin_build_inner_payload $module $mm $checkinhost
|
|
checkin_encode_inner_payload $mm $mm.enc
|
|
checkin_build_outer_payload $mm.enc $mx
|
|
|
|
# For the test suite, just dump the checkin info and bail
|
|
if [[ -n $dumpmmcheckin ]]; then
|
|
cat $mx > $dumpmmcheckin-$module
|
|
return
|
|
fi
|
|
|
|
# Try to check in until we've retried too often.
|
|
local retries=1
|
|
while true; do
|
|
checkin_upload_payload $mx $module
|
|
|
|
if [[ $? -eq 0 ]]; then
|
|
break
|
|
fi
|
|
|
|
if (( retries >= MAXRETRIES )); then
|
|
logit E "Could not complete checkin after $MAXCHECKINRETRIES tries."
|
|
break
|
|
fi
|
|
|
|
logit e "Checkin attempt $retries failed. Will retry."
|
|
retries=$(( retries +1 ))
|
|
sleep $(( 2*retries ))
|
|
done
|
|
|
|
logit M "Processing $module: end"
|
|
}
|
|
|
|
awk_extract_file_list () {
|
|
local inf=$1
|
|
local outf=$inf.flist
|
|
[[ -n $2 ]] && outf=$2
|
|
|
|
awk ' \
|
|
/^\[Files/ {s=1;next}
|
|
/^$/ {if (s==1) exit}
|
|
s {print}' \
|
|
< $inf > $outf
|
|
retcheck $? awk
|
|
}
|
|
|
|
awk_extract_paths_from_file_list_restricted () {
|
|
local inf=$1
|
|
local outf=$2
|
|
local mdir=$3
|
|
|
|
# We can just ignore the type and permissions completely
|
|
awk -F '\t' "{print \"$mdir/\" \$4}" < $inf > $outf
|
|
retcheck $? awk
|
|
}
|
|
|
|
awk_extract_paths_from_file_list_norestricted () {
|
|
local inf=$1
|
|
local outf=$2
|
|
local mdir=$3
|
|
|
|
awk -F '\t' " \
|
|
{ if (\$2 == \"d\" || \$2 == \"f\" || \$2 == \"l\") \
|
|
print \"$mdir/\" \$4 \
|
|
}" < $inf > $outf
|
|
retcheck $? awk
|
|
}
|
|
|
|
awk_extract_newer_dirs_restricted () {
|
|
local inf=$1
|
|
local outf=$2
|
|
local mdir=$3
|
|
|
|
local last=0
|
|
[[ -n $4 ]] && last=$4
|
|
|
|
awk -F '\t' " \
|
|
/\\[Files/ {s=1;next}
|
|
/^\$/ {s=0;next}
|
|
{ if (s && \$1 >= $last \
|
|
&& (\$2 == \"d\" || \$2 == \"d-\" || \$2 == \"d*\")) \
|
|
print \"$mdir/\" \$4 \
|
|
}" \
|
|
< $inf > $outf
|
|
retcheck $? awk
|
|
}
|
|
|
|
awk_extract_newer_dirs_no_restricted () {
|
|
local inf=$1
|
|
local outf=$2
|
|
local mdir=$3
|
|
|
|
local last=0
|
|
[[ -n $4 ]] && last=$4
|
|
|
|
awk -F '\t' " \
|
|
/\\[Files/ {s=1;next} \
|
|
/^\$/ {s=0;next} \
|
|
{ if (s && \$1 >= $last \
|
|
&& (\$2 == \"d\")) \
|
|
print \"$mdir/\" \$4 \
|
|
}" \
|
|
< $inf > $outf
|
|
retcheck $? awk
|
|
}
|
|
|
|
awk_extract_newer_files_restricted () {
|
|
local inf=$1
|
|
local outf=$2
|
|
local mdir=$3
|
|
|
|
local last=0
|
|
[[ -n $4 ]] && last=$4
|
|
|
|
awk -F '\t' "/\\[Files/ {s=1;next} \
|
|
/^\$/ {s=0;next} \
|
|
{if (s && \$1 >= $last && \
|
|
(\$2 == \"f\" || \$2 == \"f-\" || \$2 == \"f*\" \
|
|
|| \$2 == \"l\" || \$2 == \"l-\" || \$2 == \"l*\" \
|
|
)) \
|
|
print \"$mdir/\" \$4 \"\t\" \$3 \
|
|
} \
|
|
" $inf > $outf
|
|
retcheck $? awk
|
|
}
|
|
|
|
awk_extract_newer_files_no_restricted () {
|
|
local inf=$1
|
|
local outf=$2
|
|
local mdir=$3
|
|
|
|
local last=0
|
|
[[ -n $4 ]] && last=$4
|
|
|
|
awk -F '\t' "/\\[Files/ {s=1;next} \
|
|
/^\$/ {s=0;next} \
|
|
{if (s && \$1 >= $last && \
|
|
(\$2 == \"f\" \
|
|
|| \$2 == \"l\" \
|
|
)) \
|
|
print \"$mdir/\" \$4 \"\t\" \$3 \
|
|
} \
|
|
" $inf > $outf
|
|
retcheck $? awk
|
|
}
|
|
|
|
process_file_list_diff () {
|
|
# Extract and then diff the old and new file lists for a module
|
|
# Creates changedfiles-$module file
|
|
|
|
local fl=$1
|
|
local mod=$2
|
|
local mdir=$3
|
|
|
|
local oldflist=flist-old-$mod
|
|
local newflist=flist-new-$mod
|
|
|
|
logit l "Generating database diff start: $mod"
|
|
|
|
# Extract the file list part of old and new file lists.
|
|
awk_extract_file_list $fl.old flist-old-$mod
|
|
awk_extract_file_list $fl flist-new-$mod
|
|
|
|
# sort each by path
|
|
sort -t$'\t' -k4 $oldflist > $oldflist.sorted
|
|
sort -t$'\t' -k4 $newflist > $newflist.sorted
|
|
|
|
# compute the changes
|
|
diff --changed-group-format='%>' --unchanged-group-format='' $oldflist.sorted $newflist.sorted > changes-$mod
|
|
|
|
# Extract path from changes
|
|
if [[ -n $PREBITFLIP ]]; then
|
|
awk_extract_paths_from_file_list_restricted changes-$mod changedpaths-$mod $mdir
|
|
else
|
|
awk_extract_paths_from_file_list_norestricted changes-$mod changedpaths-$mod $mdir
|
|
fi
|
|
|
|
# We must filter here so that files we don't want to transfer won't appear
|
|
# to have changed.
|
|
filter changedpaths-$mod
|
|
|
|
logit l "Generating database diff end: $mod"
|
|
}
|
|
|
|
compute_file_list_stats () {
|
|
# Calculate and log counts of the various generated lists
|
|
local mod=$1
|
|
local -a stats
|
|
stats=(allfiles alldirs newfiles newdirs changedpaths localfiles \
|
|
localdirs deletefiles deletedirs missingfiles missingdirs \
|
|
updatedfiles updatetimestamps checksumfailed)
|
|
|
|
for i in stats; do
|
|
counts[$i]=0
|
|
[[ -f $i-$mod ]] && counts[$i]=$(wc -l < $i-$mod)
|
|
done
|
|
|
|
counts[totaltransfer]=$(wc -l transferlist-$mod)
|
|
|
|
# Until the rest of the code is fixed up
|
|
counts[extrafiles]=$counts[deletefiles]
|
|
counts[extradirs]=$counts[deletedirs]
|
|
counts[sizechanged]=$counts[updatedfiles]
|
|
counts[allserverfiles]=$counts[allfiles]
|
|
counts[allserverdirs]=$counts[alldirs]
|
|
counts[newserverfiles]=$counts[newfiles]
|
|
counts[newserverdirs]=$counts[newdirs]
|
|
|
|
# Previously these two were printed before generating the local file lists
|
|
db2f "Total on server: %7d files, %4d dirs.\n" $cntallserverfiles $cntallserverdirs
|
|
db2f "New on server: %7d files, %4d dirs.\n" $cntnewserverfiles $cntnewserverdirs
|
|
|
|
db2f "Total on client: %7d files, %4d dirs.\n" $counts[localfiles $counts[localdirs]
|
|
db2f "Not present on server: %7d files, %4d dirs.\n" $counts[extrafiles] $counts[extradirs]
|
|
db2f "Missing on client: %7d files, %4d dirs.\n" $counts[missingfiles] $counts[missingdirs]
|
|
db2f "Size Changed: %7d files.\n" $counts[sizechanged]
|
|
db2f "Timestamps to restore: %7d files.\n" $counts[updatetimestamps]
|
|
db2f "Checksum Failed: %7d files.\n" $counts[checksumfailed]
|
|
db2f "Filelist changes: %7d paths.\n" $counts[changedpaths]
|
|
db2f "Total to transfer: %7d paths.\n" $counts[totaltransfer]
|
|
|
|
logit L "Counts for $mod: Svr:$counts[allserverfiles]/$counts[allserverdirs] Loc:$counts[localfiles]/$counts[localdirs] Diff:$counts[changedpaths] New:$counts[newserverfiles]/$counts[newserverdirs] Xtra:$counts[extrafiles]/$counts[extradirs] Miss:$counts[missingfiles]/$counts[missingdirs] Size:$counts[sizechanged] Csum:$counts[checksumfailed] Dtim:$counts[updatetimestamps]"
|
|
|
|
}
|
|
|
|
generate_local_file_list () {
|
|
# Generate lists of what the client has.
|
|
local mod=$1
|
|
local mdir=$2
|
|
|
|
db3 Generating local file/dir list
|
|
logit l "Generating local file list start: $mod"
|
|
|
|
# Traverse the filesystem only once
|
|
pushd $DESTD
|
|
find $mdir/* -printf '%y\t%p\t%s\n' > $tempd/localfulllist-$mod
|
|
popd
|
|
|
|
# Now extract file and dir lists from that
|
|
awk -F '\t' '{if ($1 == "d") {print $2}}' < localfulllist-$mod > localdirs-$mod
|
|
awk -F '\t' '{if ($1 == "f" || $1 == "l") {print $2}}' < localfulllist-$mod > localfiles-$mod
|
|
awk -F '\t' '{if ($1 == "f" || $1 == "l") {print $2 "\t" $3}}' < localfulllist-$mod > localfilesizes-$mod
|
|
|
|
# Look for stray .~tmp~ dirs
|
|
if [[ -z $NORSYNCRECOVERY ]]; then
|
|
grep '\.~tmp~' localdirs-$mod > staletmpdirs-$mod
|
|
grep '\.~tmp~' localfiles-$mod > staletmpfiles-$mod
|
|
fi
|
|
|
|
logit l "Generating local file list end: $mod"
|
|
}
|
|
|
|
process_local_file_list () {
|
|
# Compare what the client has to what the server has, and generate more
|
|
# lists based on that.
|
|
# Generates the fillowing file lists:
|
|
# deletefiles-$module
|
|
# deletedirs-$module
|
|
# updatetimestamps-$module
|
|
# missingfiles-$module
|
|
# missingdirs-$module
|
|
# updatedfiles-$module
|
|
# checksumfailed-$module
|
|
|
|
# XXX Don't do any master transferlist manipulation here.
|
|
local mod=$1
|
|
local mdir=$2
|
|
|
|
# Find files on the client which don't exist on the server
|
|
sort allfiles-$mod allfiles-$mod localfiles-$mod \
|
|
| uniq -u > deletefiles-$mod
|
|
remove_filelists_from_file deletefiles-$mod $mdir
|
|
|
|
# Find dirs on the client which don't exist on the server
|
|
sort alldirs-$mod alldirs-$mod localdirs-$mod \
|
|
| uniq -u > deletedirs-$mod
|
|
|
|
# Extract dirnames of every file and dir in the delete lists, and all of their parents.
|
|
if [[ -n $updatealldirtimes ]]; then
|
|
echo $mdir > updatetimestamps-$mod
|
|
cat alldirs-$mod >> updatetimestamps-$mod
|
|
else
|
|
awk '{dn($0)} function dn(p) { while (sub(/\/[^\/]*\]?$/, "", p)) print p }' \
|
|
deletefiles-$mod deletedirs-$mod \
|
|
| sort -u > updatetimestamps-$mod
|
|
fi
|
|
|
|
# Find files on the server which are missing on the client
|
|
sort localfiles-$mod localfiles-$mod allfiles-$mod \
|
|
| uniq -u > missingfiles-$mod
|
|
|
|
# Find dirs on the server which are missing on the client
|
|
sort localdirs-$mod localdirs-$mod alldirs-$mod \
|
|
| uniq -u > missingdirs-$mod
|
|
|
|
# Find files which have changed size
|
|
sort allfilesizes-$mod localfilesizes-$mod \
|
|
| uniq -u | awk -F '\t' '{print $1}' \
|
|
| uniq -d > updatedfiles-$mod
|
|
|
|
# Extract and verify checksums
|
|
awk -F '\t' "/^\[Checksums/ {s=1; next} /^$/ {s=0; next} {if (s) print \$1 \" $mdir/\" \$2}" $fl > checksums-$mod
|
|
pushd $DESTD > /dev/null 2>&1
|
|
sha1sum --check --quiet $tempd/checksums-$mod 2> /dev/null \
|
|
| grep -i 'failed$' \
|
|
| awk -F: '{print $1}' > $tempd/checksumfailed-$mod
|
|
popd > /dev/null 2>&1
|
|
}
|
|
|
|
process_remote_file_list () {
|
|
# Extract various file and directory lists from the master file list
|
|
#
|
|
# This will also handle ignoring restricted or pre-bitflip content if
|
|
# necessary.
|
|
#
|
|
# Will create the following files:
|
|
# allfilesizes-$module
|
|
# allfiles-$module
|
|
# alldirs-$module
|
|
# newdirs-$module
|
|
|
|
local fl=$1
|
|
local module=$2
|
|
local moduledir=$3
|
|
|
|
db3 Extracting file and directory lists for $module.
|
|
|
|
if [[ -n $PREBITFLIP ]]; then
|
|
db4 "Directories (pre-bitflip included)"
|
|
awk_extract_newer_dirs_restricted $fl alldirs-$module $moduledir
|
|
|
|
db4 "New dirs (pre-bitflip included)"
|
|
awk_extract_newer_dirs_restricted $fl newdirs-$module $moduledir $LASTTIME
|
|
|
|
db4 "Files (pre-bitflip included)"
|
|
awk_extract_newer_files_restricted $fl allfilesizes-$module $moduledir
|
|
|
|
db4 "New files (pre-bitflip included)"
|
|
awk_extract_newer_files_restricted $fl newfilesizes-$module $moduledir $LASTTIME
|
|
else
|
|
# All dirs, unrestricted only
|
|
db4 "Directories (pre-bitflip excluded)"
|
|
awk_extract_newer_dirs_no_restricted $fl alldirs-$module $moduledir
|
|
|
|
db4 "New dirs (pre-bitflip excluded)"
|
|
awk_extract_newer_dirs_no_restricted $fl newdirs-$module $moduledir $LASTTIME
|
|
|
|
db4 "Files (pre-bitflip excluded)"
|
|
awk_extract_newer_files_no_restricted $fl allfilesizes-$module $moduledir
|
|
|
|
db4 "New files (pre-bitflip excluded)"
|
|
awk_extract_newer_files_no_restricted $fl newfilesizes-$module $moduledir $LASTTIME
|
|
fi
|
|
|
|
# Filter the lists if needed
|
|
filter alldirs-$module
|
|
filter newdirs-$module
|
|
filter allfilesizes-$module
|
|
filter newfilesizes-$module
|
|
|
|
# Produce the file lists without sizes.
|
|
awk -F '\t' '{print $1}' allfilesizes-$module > allfiles-$module; retcheck $? awk
|
|
awk -F '\t' '{print $1}' newfilesizes-$module > newfiles-$module; retcheck $? awk
|
|
}
|
|
|
|
update_master_file_lists () {
|
|
# Simply append various per-module lists to the master lists
|
|
cat deletefiles-$module >> master-deletefiles
|
|
cat deletedirs-$module >> master-deletedirs
|
|
cat updatetimestamps-$module >> master-updatetimestamps
|
|
cat missingfiles-$module >> transferlist-$module
|
|
cat missingdirs-$module >> transferlist-$module
|
|
cat updatedfiles-$module >> transferlist-$module
|
|
cat checksumfailed-$module >> transferlist-$module
|
|
}
|
|
|
|
remove_filelists_from_file () {
|
|
# Remove the file from $FILELIST and anything given by $EXTRAFILES.
|
|
# Takes:
|
|
# file to modify
|
|
# directory of current module (for substituting $mdir)
|
|
# Modifies the file directly
|
|
# Calls egrep -v in a loop. Generally this is called on files of no more
|
|
# than a few thousand lines, so performance shouldn't be an issue.
|
|
|
|
local f=$1
|
|
local moduledir=$2
|
|
local tmp=$f.rfff
|
|
local fl
|
|
|
|
for fl in $FILELIST $EXTRAFILES; do
|
|
fl=${fl/'$mdir'/$moduledir}
|
|
egrep -v "^[^/]*/$fl" $f > $tmp
|
|
mv $tmp $f
|
|
done
|
|
|
|
rm -f $tmp
|
|
}
|
|
|
|
process_module () {
|
|
# Determine what needs to be transferred and removed from a single module.
|
|
#
|
|
# Takes the name of the module to process, returns nothing.
|
|
#
|
|
# Sets the following globals:
|
|
# changed_modules
|
|
#
|
|
# Will leave the following lists in the temporary dir for use by other
|
|
# functions: (all of them; currently deletes nothing)
|
|
#
|
|
# May leave other files, but don't depend on them.
|
|
#
|
|
# The various status variables, for logging:
|
|
# cntallserverfiles/cntallserverdirs - total files/dirs on server.
|
|
# cntnewserverfiles/cntnewserverdirs - new files/dirs on server (since last mirror time)
|
|
# cntlocalfiles/cntlocaldirs - total files/dirs on client.
|
|
# cntextrafiles/cntextradirs - files/dirs on client but not server.
|
|
# cntmissingfiles/cntmissingdirs - files/dirs on server but not client.
|
|
# cntsizechanged - files where size differs between server/client.
|
|
# cntupdatetimestamps - dir timestamps to restore
|
|
# cntchecksumfailed - files where checksum differs between server/client.
|
|
# cntchangedpaths - count of all differences between file lists.
|
|
|
|
local module=$1
|
|
# ZSHISM? (associative array indexing)
|
|
local moduledir=$MODULEMAPPING[$module]
|
|
|
|
local fl=${FILELIST/'$mdir'/$moduledir}
|
|
local cntallserverfiles cntallserverdirs cntnewserverfiles cntnewserverdirs
|
|
local cntchangedpaths cntlocalfiles cntlocaldirs cntextrafiles cntextradirsi
|
|
local cntmissingfiles cntmissingdirs cntsizechanged cntupdatetimestamps cntchecksumfailed
|
|
local extra
|
|
|
|
if [[ -z $alwayscheck && \
|
|
-n $checksums[$module] && \
|
|
$(sha1sum $fl | cut -d' ' -f1) == $checksums[$module] ]]; then
|
|
logit N No change in file list for $module
|
|
db2 No change in file list checksum. Skipping $module.
|
|
continue
|
|
fi
|
|
|
|
sep
|
|
logit P Processing start: $module
|
|
db2 Processing $module
|
|
changed_modules+=$module
|
|
|
|
# Make sure the list is complete.
|
|
tail -2 $fl | grep -q '^\[End\]$'
|
|
if (( ? != 0 )); then
|
|
logit e "Invalid file list; skipping $module"
|
|
(>&2 echo "No end marker. Corrupted file list?"
|
|
echo Skipping $module.)
|
|
return
|
|
fi
|
|
|
|
process_remote_file_list $fl $module $moduledir
|
|
|
|
cntallserverfiles=$(wc -l < allfiles-$module)
|
|
cntallserverdirs=$(wc -l < alldirs-$module)
|
|
db2f "Total on server: %7d files, %4d dirs.\n" $cntallserverfiles $cntallserverdirs
|
|
|
|
cntnewserverfiles=$(wc -l < newfiles-$module)
|
|
cntnewserverdirs=$(wc -l < newdirs-$module)
|
|
db2f "New on server: %7d files, %4d dirs.\n" $cntnewserverfiles $cntnewserverdirs
|
|
|
|
# Add extra files to the transfer list
|
|
echo $moduledir/$fl >> newfiles-$module
|
|
for extra in $EXTRAFILES; do
|
|
extra=${extra/'$mdir'/$moduledir}
|
|
echo $moduledir/$extra >> newfiles-$module
|
|
done
|
|
cat newfiles-$module >> transferlist-$module
|
|
cat newdirs-$module >> transferlist-$module
|
|
|
|
if [[ -d $DESTD/$moduledir ]]; then
|
|
db3 Finding file list changes since last run
|
|
process_file_list_diff $fl $module $moduledir
|
|
cat changedpaths-$module >> transferlist-$module
|
|
|
|
generate_local_file_list $module $moduledir
|
|
|
|
if [[ -s staletmpdirs-$module ]]; then
|
|
clean_stale_rsync_temps $module
|
|
fi
|
|
|
|
# Find files on the client which don't exist on the server
|
|
process_local_file_list $module $moduledir
|
|
update_master_file_lists $module
|
|
|
|
# Count some things we want to use for stats later.
|
|
cntchangedpaths=$(wc -l < changedpaths-$module)
|
|
cntlocalfiles=$(wc -l < localfiles-$module)
|
|
cntlocaldirs=$(wc -l < localdirs-$module)
|
|
cntextrafiles=$(wc -l < deletefiles-$module)
|
|
cntextradirs=$(wc -l < deletedirs-$module)
|
|
cntmissingfiles=$(wc -l < missingfiles-$module)
|
|
cntmissingdirs=$(wc -l < missingdirs-$module)
|
|
cntsizechanged=$(wc -l < updatedfiles-$module)
|
|
cntupdatetimestamps=$(wc -l < updatetimestamps-$module)
|
|
cntchecksumfailed=$(wc -l < checksumfailed-$module)
|
|
|
|
db2f "Total on client: %7d files, %4d dirs.\n" $cntlocalfiles $cntlocaldirs
|
|
db2f "Not present on server: %7d files, %4d dirs.\n" $cntextrafiles $cntextradirs
|
|
db2f "Missing on client: %7d files, %4d dirs.\n" $cntmissingfiles $cntmissingdirs
|
|
db2f "Size Changed: %7d files.\n" $cntsizechanged
|
|
db2f "Timestamps to restore: %7d files.\n" $cntupdatetimestamps
|
|
db2f "Checksum Failed: %7d files.\n" $cntchecksumfailed
|
|
db2f "Filelist changes: %7d paths.\n" $cntchangedpaths
|
|
fi
|
|
|
|
sort -u transferlist-$module >> transferlist-sorted-$module
|
|
cat transferlist-sorted-$module >> master-transferlist
|
|
local cnttotaltransfer=$(wc -l < transferlist-sorted-$module)
|
|
db2f "Total to transfer: %7d paths.\n" $cnttotaltransfer
|
|
|
|
logit L "Counts for $module: Svr:$cntallserverfiles/$cntallserverdirs Loc:$cntlocalfiles/$cntlocaldirs Diff:$cntchangedpaths New:$cntnewserverfiles/$cntnewserverdirs Xtra:$cntextrafiles/$cntextradirs Miss:$cntmissingfiles/$cntmissingdirs Size:$cntsizechanged Csum:$cntchecksumfailed Dtim:$cntupdatetimestamps"
|
|
logit P Processing end: $module
|
|
db2 Finished processing $module.
|
|
|
|
# Some basic info about the transfer.
|
|
db1 Changes in $module: $cnttotaltransfer files/dirs
|
|
if (( cnttotaltransfer <= 5 )); then
|
|
for i in $(cat transferlist-sorted-$module); do
|
|
db1 " $i"
|
|
done
|
|
fi
|
|
|
|
# XXX We should clean some things up at this point, but we also need some
|
|
# files for the checkin later.
|
|
# Should be able to delete all *-$module, except for the dirlists, to give
|
|
# the current mirrormanager versions the things it needs.
|
|
#if (( VERBOSE <= 4 )); then
|
|
# rm *-$module
|
|
#fi
|
|
}
|
|
|
|
|
|
# Main program execution
|
|
# ======================
|
|
parse_args "$@"
|
|
set_default_vars
|
|
read_config
|
|
|
|
# XXX check_dependencies
|
|
|
|
# Paranoia; give us a few extra seconds.
|
|
[[ -z $noparanoia ]] && starttime=$(($starttime-5))
|
|
|
|
# Find the previous mirror time, and backdate if necessary
|
|
LASTTIME=0
|
|
if [[ -r $TIMEFILE ]]; then
|
|
source $TIMEFILE
|
|
fi
|
|
if [[ -n $backdate ]]; then
|
|
LASTTIME=$backdate
|
|
fi
|
|
|
|
# Make a temp dir and clean it up unless we're doing a lot of debugging
|
|
if [[ -z $TMPDIR ]]; then
|
|
tempd=$(mktemp -d -t quick-mirror.XXXXXXXXXX)
|
|
else
|
|
tempd=$(mktemp -d -p $TMPDIR -t quick-mirror.XXXXXXXXXX)
|
|
fi
|
|
|
|
if [[ $? -ne 0 ]]; then
|
|
(>&2 echo "Creating temporary directory failed?")
|
|
exit 1
|
|
fi
|
|
if (( VERBOSE <= 8 )); then
|
|
trap "rm -rf $tempd" EXIT
|
|
fi
|
|
|
|
# Set up a FIFO for logging. Just calling systemd-cat repeatedly just gives us
|
|
# a different PID every time, which is annoying.
|
|
if [[ -n $LOGJOURNAL ]]; then
|
|
logfifo=$tempd/journal.fifo
|
|
mkfifo $logfifo
|
|
systemd-cat -t quick-fedora-mirror < $logfifo &
|
|
exec 3>$logfifo
|
|
fi
|
|
|
|
outfile=$tempd/output
|
|
touch $outfile
|
|
|
|
sessionlog=$tempd/sessionlog
|
|
touch $sessionlog
|
|
|
|
touch $tempd/started-run
|
|
|
|
cd $tempd
|
|
|
|
# At this point we can acquire the lock
|
|
lock $TIMEFILE
|
|
if (( ? != 0 )); then
|
|
db4 Could not acquire lock.
|
|
logit k lock contention
|
|
# Maybe we haven't been able to mirror for some time....
|
|
delay=$(( starttime - LASTTIME ))
|
|
if [[ -n $backdate || $LASTTIME -eq 0 ]]; then
|
|
delay=0
|
|
fi
|
|
|
|
if (( delay > WARNDELAY )); then
|
|
(>&2 echo No completed run since $(date -d @$LASTTIME ).)
|
|
logit E No completed run since $(date -d @$LASTTIME ).
|
|
fi
|
|
exit 1
|
|
fi
|
|
|
|
db1 "Mirror starting: $(date)"
|
|
logit r Run start: cfg $cfgfile, tmp $tempd
|
|
|
|
if [[ -n $MIRRORBUFFET ]]; then
|
|
# We want to mirror everything, so save the admin from listing the
|
|
# individual modules.
|
|
# ZSHISM (get keys from an associative array with (k))
|
|
MODULES=(${(k)MODULEMAPPING})
|
|
# BASHEQ MODULES=${!MODULEMAPPING[@]}
|
|
# bash3 equivalent is terrible
|
|
fi
|
|
|
|
if (( VERBOSE >= 6 )); then
|
|
echo Times:
|
|
echo LASTTIME=$LASTTIME
|
|
echo starttime=$starttime
|
|
echo TIMEFILE=$TIMEFILE
|
|
echo Dirs:
|
|
echo tempd=$tempd
|
|
echo DESTD=$DESTD
|
|
echo Rsync:
|
|
echo REMOTE=$REMOTE
|
|
echo MASTERMODULE=$MASTERMODULE
|
|
echo RSYNC=$RSYNC
|
|
echo RSYNCOPTS=$RSYNCOPTS
|
|
echo Modules:
|
|
echo MODULES=$MODULES
|
|
echo MODULEMAPPING=$MODULEMAPPING
|
|
echo Misc:
|
|
echo VERBOSE=$VERBOSE
|
|
fi
|
|
|
|
(( VERBOSE >= 8 )) && set -x
|
|
|
|
fetch_file_lists
|
|
|
|
logit p Processing start
|
|
changed_modules=()
|
|
for module in $MODULES; do
|
|
process_module $module
|
|
done
|
|
|
|
if [[ ! -e master-transferlist ]]; then
|
|
logit n No changes to synchronize
|
|
db2 No changed files.
|
|
finish 0
|
|
fi
|
|
|
|
if [[ -n $MIRRORBUFFET ]]; then
|
|
echo DIRECTORY_SIZES.txt >> master-transferlist
|
|
|
|
# If there's an rsync temp directory in the top level, delete it to work
|
|
# around a potential rsync bug.
|
|
if [[ -n $RSYNC_PARTIAL_DIR_BUG ]]; then
|
|
rm -rf $DESTD/.~tmp~
|
|
fi
|
|
fi
|
|
|
|
# The actual transfer
|
|
# ===================
|
|
sort -u master-transferlist > master-transferlist.sorted
|
|
linecount=$(wc -l < master-transferlist.sorted)
|
|
sep; sep
|
|
db2 Transferring $linecount files.
|
|
# XXX send total count to log as well
|
|
|
|
touch $tempd/started-transfer
|
|
|
|
# Now we have a list of everything which has changed recently in every module
|
|
# we want, pass that to rsync (non recursive mode!) and it should transfer just
|
|
# the changed files without having to pull the entire huge file list.
|
|
extra=()
|
|
if [[ -n $rsyncdryrun ]]; then
|
|
extra+=(-n)
|
|
fi
|
|
do_rsync $REMOTE/$MASTERMODULE/ $DESTD master-transferlist.sorted extra
|
|
if (( ? != 0 )); then
|
|
(>&2 echo "rsync failed; aborting run.\nWill not check in or delete anything.")
|
|
logit "E Skipping further operations due to rsync failure."
|
|
finish 1
|
|
fi
|
|
|
|
# Total downloaded file count, bytes received, transfer speed
|
|
logit s "stat: downloaded $rsfilestransferred files"
|
|
logit s "stat: received $(hr_b $rstotalbytesreceived)"
|
|
logit s "stat: transfer speed $(hr_b $rstransferspeed)/s"
|
|
|
|
# Everything we can extract from rsync
|
|
logit S "stat: sent $(hr_b $rstotalbytessent)"
|
|
logit S "stat: speedup: $rsspeedup"
|
|
logit S "stat: total size of transferred files: $(hr_b $rsfilesize)"
|
|
logit S "stat: file list gen time $(hr_s $rsfilelistgentime)"
|
|
logit S "stat: file list transfer time $(hr_s $rsfilelisttransfertime)"
|
|
|
|
db1 "========================="
|
|
db1 "Main transfer statistics:"
|
|
db1 " Downloaded files: $rsfilestransferred"
|
|
db1 " Total size of those files: $(hr_b $rsfilesize)"
|
|
db1 " Received: $(hr_b $rstotalbytesreceived)"
|
|
db1 " Sent: $(hr_b $rstotalbytessent)"
|
|
db1 " Speedup: $rsspeedup"
|
|
db1 " Trasfer speed: $(hr_b $rstransferspeed)/s"
|
|
db1 " File list generation time: $(hr_s $rsfilelistgentime)"
|
|
db1 " File list transfer time: $(hr_s $rsfilelisttransfertime)"
|
|
|
|
# Local dir/file deletion
|
|
# =======================
|
|
if [[ -s master-deletedirs ]]; then
|
|
linecount=$(wc -l < master-deletedirs)
|
|
|
|
if [[ -n $skipdelete && $VERBOSE -ge 2 ]]; then
|
|
logit d Directory deletion skipped
|
|
echo "Not deleting $linecount directories. Delete list is:"
|
|
cat master-deletedirs
|
|
echo
|
|
else
|
|
logit d Directory deletion start: $linecount directories
|
|
db2 Removing $linecount stale directories.
|
|
for nuke in $(cat master-deletedirs); do
|
|
if [[ -d "$DESTD/$nuke" ]]; then
|
|
logit D Deleting directory $nuke
|
|
db4 Removing $nuke
|
|
rm -rf "$DESTD/$nuke"
|
|
deletedsomething=1
|
|
fi
|
|
done
|
|
logit d Directory deletion end
|
|
fi
|
|
else
|
|
db2 No stale directories to delete.
|
|
fi
|
|
|
|
if [[ -s master-deletefiles ]]; then
|
|
linecount=$(wc -l < master-deletefiles)
|
|
|
|
if [[ -n $skipdelete ]]; then
|
|
logit d File deletion skipped
|
|
echo Not deleting $linecount stale files. Delete list is:
|
|
cat master-deletefiles
|
|
echo
|
|
else
|
|
logit d File deletion begin: $linecount files
|
|
db2 Removing $linecount stale files.
|
|
# xopts=()
|
|
# (( VERBOSE >= 4 )) && xopts=(-t)
|
|
tr '\n' '\0' < master-deletefiles \
|
|
| (pushd $DESTD; xargs $xopts -0 rm -f ; popd)
|
|
# for nuke in $(cat master-deletefiles); do
|
|
# logit D Deleting file $nuke
|
|
# rm -f "$DESTD/$nuke"
|
|
# done
|
|
deletedsomething=1
|
|
logit d File deletion end
|
|
fi
|
|
else
|
|
db2 No stale files to delete.
|
|
fi
|
|
|
|
if [[ ( -n $KEEPDIRTIMES || -n $updatealldirtimes ) && -s master-updatetimestamps ]]; then
|
|
extra=()
|
|
if [[ -n $rsyncdryrun ]]; then
|
|
extra+=(-n)
|
|
fi
|
|
logit d "Updating timestamps on $(wc -l < master-updatetimestamps) dirs"
|
|
do_rsync $REMOTE/$MASTERMODULE/ $DESTD master-updatetimestamps extra
|
|
fi
|
|
|
|
# We've completed a run, so save the timestamp
|
|
save_state
|
|
|
|
# Mirrormanager Checkin and Callout
|
|
# =================================
|
|
# At this point we know that we had a clean run with no complaints from rsync,
|
|
# and as far as we're concerned the run is now complete and recorded.
|
|
#
|
|
# So for each module we mirrored, the filtered file list is correct. This
|
|
# means that the alldirs-$module file is accurate and we can simply report its
|
|
# contents to mirrormanager.
|
|
if [[ -z $skipcheckin || -n $dumpmmcheckin ]]; then
|
|
db2 Performing mirrormanager checkin
|
|
logit m "mirrormanager checkin start"
|
|
|
|
# Check in just the changed modules
|
|
for module in $changed_modules; do
|
|
checkin_module $module
|
|
done
|
|
|
|
logit m "mirrormanager checkin end"
|
|
fi
|
|
finish 0 yes
|