mirror/quick-fedora-mirror

#!/bin/zsh
# Simple script to grab the file list from Fedora and rsync everything that's
# changed since the last time we pulled.
#
# Originally written by Jason Tibbitts <tibbs@math.uh.edu> in 2016.
# Donated to the public domain.  If you require a statement of license, please
# consider this work to be licensed as "CC0 Universal", any version you choose.

# Variables in upper case are user configurables.

# ZSHISM? Turn on empty globs and globbing of dots
set -G -4
export LANG=C
# ZSHISM? newline for IFS.
IFS=$'\n'

# Declare globals
typeset -A tcounts   # Transfer counts

# Do this very early
starttime=$(date +%s)

# Debug output;
# Level 0: nothing except errors.
# Level 1: lvl0 unless there is a tranfer, and then basic info and times.
# Output goes to a file which may be spit out at the end of the run.
# Level >= 2: Always some info, output to the terminal.
db1 () {
    if (( VERBOSE >= 2 )); then
        echo $*
    elif (( VERBOSE >= 1 )); then
        echo $* >> $outfile
    fi
    # Otherwise output nothing....
}
db1f () { db1 $(printf $*); }

db2 () { (( VERBOSE >= 2 )) && echo $*}
db2f () { (( VERBOSE >= 2 )) && printf $*}
db3 () { (( VERBOSE >= 3 )) && echo '>>' $*}
db4 () { (( VERBOSE >= 4 )) && echo '>>>>' $*}
sep () { (( VERBOSE >= 2 )) && echo '============================================================'}

logwrite () {
    # Send logging info to the right place
    if [[ -n $LOGJOURNAL ]]; then
        echo $* >&3
    elif [[ -n $LOGFILE && -w $LOGFILE ]]; then
        echo $(date '+%b %d %T') $* >> $LOGFILE
    fi
}

logit () {
    # Basic logging function
    local item=$1
    shift
    local err=''
    [[ $item == 'E' ]] && err='ERR:'
    [[ $item == 'e' ]] && err='Err:'

    echo "$item  $err $@" >> $sessionlog

    if [[ $LOGITEMS =~ $item || $LOGITEMS =~ '@' ]]; then
        logwrite $err $*
    fi
    if (( VERBOSE >= 3 )); then
        db3 Log: $err $*
    fi

    # XXX Consider sending errors to stdout
    #if [[ -n $err ]]; then
    #    (>&2 echo $*)
    #fi
}

retcheck () {
    local ret=$1
    local prg=''
    [[ -n $2 ]] && prg="$2 "

    if [[ $ret -ne 0 ]]; then
        db1 "${prg}failed at $functrace[1]: with return $ret"
        logit E "${prg}call failed at $functrace[1]: with return $ret"
    fi
}

lock () {
    eval "exec 9>>$1"
    flock -n 9 && return 0
    return 1
}

save_state () {
    # Doing an mv here actually undoes the locking.  Could use cp instead.
    # Currently the unlocking is a good thing because it allows the checkin to
    # proceed without the next run waiting.  But this should be audited.
    if [[ -z $skiptimestamp ]]; then
        db2 Saving mirror time to $TIMEFILE
        if [[ -e $TIMEFILE ]]; then
            mv $TIMEFILE $TIMEFILE.prev
        fi
        echo LASTTIME=$starttime > $TIMEFILE

        if (( ? != 0 )); then
            (>&2 echo Problem saving timestamp file $TIMEFILE)
            logit E "Failed to update timestamp file"
            exit 1
        fi
    else
        db2 Skipping timestamp save.
    fi
}

append_state () {
    # Think about how to save extra state in the timestamp file or some
    # associated file.  Should we even do this?
    # Should this be saved to a separate status file instead?


    # Cannot rewrite the file or else the locking breaks.  Updating it should
    # be OK.
    # Save things in a format that can be sourced (VAR=value).
    # Repeated uses (VAR=value2) are OK and overwrite the previous value when the file is sourced.

    # What would use this?  A separate status program or some other monitor?
    #
    # Save data about the current transfer:
    # The current point in the process (
    # Counts
    # The current tempdir
    # Important transfer list files
    # The current rsync output file (for tailing and counting) since this is random.

}

cat_or_email () {
    # Output the contents of a file, either to stdout or in an email
    local file=$1

    if [[ ( ! -t 0 ) && ( -n "$EMAILDEST" ) ]]; then
        mail -E -s "$EMAILSUBJECT" "$EMAILDEST" < $file
    else
        cat $file
    fi
}

finish () {
    # Finish up, either dumping output to stdout or, if email is configured and
    # not running interactively, send email.
    #
    # Takes two optional arguments.  The first is the return value; the script
    # will exit with that value and will dump the output file to stdout if the
    # value is nonzero.  If the second is nonempty, the output will be
    # dumped/mailed regardless of the return value.
    local ret=$1
    local out=$2
    db1 "========================="
    db1 "Mirror finished: $(date) ($ret)"
    logit R "Run end; exiting $ret."
    if [[ $ret -gt 0 || -n $out ]]; then
        cat_or_email $outfile
    fi
    exit $ret
}

filter () {
    # Client-side file list filtering.
    if [[ -n $FILTEREXP ]]; then
        db4 filtering $1
        sed --in-place=-prefilter -r -e "\,$FILTEREXP,d" $1
    fi
}

hr_b () {
    # Produce human-readable byte counts
    # Yes, this has a bug at 1024EB
    typeset -F2 out

    if [[ $1 -lt 1024 ]]; then
        echo ${1}B
        return
    fi

    out=$(( $1 / 1024. ))
    for unit in KB MB GB TB PB EB; do
        (( $out < 1024 )) && break
        out=$(( out / 1024. ))
    done

    echo ${out}${unit}
}

hr_s () {
    # Produce human-readable second counts
    typeset -F2 out=$1

    if [[ $1 -lt 60 ]]; then
        echo ${1}s
        return
    fi

    out=$(( $1 / 60. ))
    if [[ $out -lt 60 ]]; then
        echo ${out}m
        return
    fi

    out=$(( $out / 60. ))
    echo ${out}h
}

parse_rsync_stats () {
    # Parse some of the statistics that rsync gives us.
    # Takes an rsync output log (stdout) as an argument.
    # No return value, but sill set several global variables:
    #   rsfilestransferred
    #   rsfilesize
    #   rstotalbytesreceived
    #   rstotalbytessent
    #   rsfilelistgentime
    #   rsfilelisttransfertime
    #   rstransferspeed
    #   rsspeedup
    # These will all be set unset if not present in the given log.
    #
    # Here's the full block of info that rsync provides:
    #
    # rsync[30399] (receiver) heap statistics:
    #   arena:         311296   (bytes from sbrk)
    #   ordblks:            2   (chunks not in use)
    #   smblks:             1
    #   hblks:              2   (chunks from mmap)
    #   hblkhd:        532480   (bytes from mmap)
    #   allmem:        843776   (bytes from sbrk + mmap)
    #   usmblks:            0
    #   fsmblks:           48
    #   uordblks:      178272   (bytes used)
    #   fordblks:      133024   (bytes free)
    #   keepcost:      131200   (bytes in releasable chunk)
    #
    # rsync[30394] (generator) heap statistics:
    #   arena:         311296   (bytes from sbrk)
    #   ordblks:            2   (chunks not in use)
    #   smblks:             1
    #   hblks:              2   (chunks from mmap)
    #   hblkhd:        532480   (bytes from mmap)
    #   allmem:        843776   (bytes from sbrk + mmap)
    #   usmblks:            0
    #   fsmblks:           48
    #   uordblks:      178208   (bytes used)
    #   fordblks:      133088   (bytes free)
    #   keepcost:      131200   (bytes in releasable chunk)
    #
    # Number of files: 11,140 (reg: 9,344, dir: 1,796)
    # Number of created files: 1,329 (reg: 1,327, dir: 2)
    # Number of deleted files: 0
    # Number of regular files transferred: 1,182
    # Total file size: 165,405,056,029 bytes
    # Total transferred file size: 3,615,178,247 bytes
    # Literal data: 3,229,943,512 bytes
    # Matched data: 385,234,735 bytes
    # File list size: 468,791
    # File list generation time: 0.217 seconds
    # File list transfer time: 0.000 seconds
    # Total bytes sent: 1,249,286
    # Total bytes received: 3,231,373,895
    #
    # sent 1,249,286 bytes  received 3,231,373,895 bytes  81,838,561.54 bytes/sec
    # total size is 165,405,056,029  speedup is 51.17

    local log=$1

    # Number of regular files transferred: 1
    unset rsfilestransferred
    rsfilestransferred=$(awk '/^Number of regular files transferred:/ {print $6; exit}' $log)

    # Total file size: 10,174,746 bytes
    unset rsfilesize
    rsfilesize=$(awk '/^Total file size: (.*) bytes/ {print $4; exit}' $log | sed -e 's/,//g')

    # Total bytes received: 2,425,728
    unset rstotalbytesreceived
    rstotalbytesreceived=$(awk '/^Total bytes received: (.*)/ {print $4; exit}' $log | sed -e 's/,//g')

    # Total bytes sent: 384,602
    unset rstotalbytessent
    rstotalbytessent=$(awk '/^Total bytes sent: (.*)/ {print $4; exit}' $log | sed -e 's/,//g')

    # File list generation time: 0.308 seconds
    unset rsfilelistgentime
    rsfilelistgentime=$(awk '/^File list generation time: (.*) seconds/ {print $5; exit}' $log)

    # File list transfer time: 0.000 seconds
    unset rsfilelisttransfertime
    rsfilelisttransfertime=$(awk '/^File list transfer time: (.*) seconds/ {print $5; exit}' $log)

    # sent 71 bytes  received 2,425,728 bytes  156,503.16 bytes/sec
    unset rstransferspeed
    rstransferspeed=$(awk '/^sent .* bytes .* received .* bytes (.*) bytes\/sec$/ {print $7; exit}' $log \
                      | sed -e 's/,//g')

    # total size is 10,174,746  speedup is 4.19
    unset rsspeedup
    rsspeedup=$(awk '/^total size is .* speedup is (.*)$/ {print $7; exit}' $log)
}

do_rsync () {
    # The main function to do a transfer
    # Accepts four options:
    #   1) The source repository
    #   2) The destination directory
    #   3) The list of files
    #   4) The name of an array containing additional rsync options
    #
    # This may sleep and retry when receiving specific errors.
    # Returns the rsync return code (where 0 indicates full success, but other
    # values may indicate a finished copy).

    local src=$1 dest=$2 files=$3 opts=$4
    local runcount=0
    local log=$(mktemp -p . rsync-out-XXXXXX.log)
    local errlog=$(mktemp -p . rsync-err-XXXXXX.log)
    local sleep rr rvbash rvzsh
    local rsyncto="--timeout=$RSYNCTIMEOUT"

    local -a verboseopts flopts allopts

    # These add to the default rsync verbosity
    (( VERBOSE >= 7 )) && verboseopts+=(--progress)
    (( VERBOSE >= 5 )) && verboseopts+=(-v)
    (( VERBOSE >= 4 )) && verboseopts+=(-v)

    # Usually we won't want to see this.
    (( VERBOSE <= 3 )) && verboseopts+=(--no-motd)

   verboseopts+=(--progress)
   # verboseopts+=(--info=progress2)

    flopts=("--files-from=$files")
    allopts=($rsyncto $RSYNCOPTS $verboseopts $flopts ${(P)opts} $src $dest)

    while true; do
        runcount=$(( runcount+1 ))
        # ZSHISM:  (P) flag to act on a variable by name.  Sadly, bash has
        # broken array handling.   bash 4.3 has local -n for this.  Older bash
        # needs hacks, or eval.  More info:
        # https://stackoverflow.com/questions/1063347/passing-arrays-as-parameters-in-bash
        # Or just use a freaking global.

        # We have to do this separately because you can't redirect to /dev/stderr when running under sudo.
        # ZSHISM Teeing both stderr and stdout while keeping the return code is
        # easy in zsh with multios but seems to be terribly difficult under bash.
        db3 Calling $RSYNC $allopts
        logit c calling $RSYNC $allopts
        # XXX background, then save $!, write it to the session log and wait on it.
        if (( VERBOSE >= 5 )); then
            $RSYNC $allopts 1>&1 2>&2 >> $log 2>> $errlog
        elif (( VERBOSE >= 2 )); then
            $RSYNC $allopts >> $log 2>&2 2>> $errlog
        else
            $RSYNC $allopts >> $log 2>> $errlog
        fi
        rr=$?

        # Check return values
        if (( rr == 0 )); then
            logit C rsync call completed succesfully with return $rr
            parse_rsync_stats $log
            return 0

        elif (( rr == 24 )); then
            # 24: Partial transfer due to vanished source files
            logit e "rsync says source files vanished."
            return $rr

        elif (( rr == 5 || rr == 10 || rr == 23 || rr == 30 || rr == 35 )); then
            # Most of these are retryable network issues
            #  5: Error starting client-server protocol
            # 10: Error in socket I/O
            # 30: Timeout in data send/receive
            # 35: Timeout waiting for daemon connection
            # 23: Partial transfer due to error
            #     (could be a file list problem)
            if [[ $rr -eq 23 && -f $errlog ]] ; then
                # See if it we tried to tranfer files that don't exist
                grep -q '^rsync: link_stat .* failed: No such file or directory (2)$' $errlog
                if (( ? == 0 )); then
                    logit e "Looks like the file list is outdated."
                    (>&2 echo "Looks like the file list is outdated.")
                    [[ -f $errlog ]] && (>&2 cat $errlog)
                    return $rr
                fi
            fi

            # It's not one of those special 23 errors, so we may retry.  First
            # see if we've already tried too many times.
            if (( runcount >= MAXRETRIES )); then
                logit E rsync from $REMOTE failed
                (>&2 echo "Could not sync from $REMOTE")
                [[ -f $errlog ]] && (>&2 cat $errlog)
                return $rr
            fi

            # Then sleep for a bit
            sleep=$(( 2 ** runcount ))
            logit e "rsync returned $rr (retryable), sleeping for $sleep"
            db2 rsync failed: sleeping for $sleep
            sleep $sleep
            continue
        fi

        # We only get here if we got a return we didn't expect
        logit E "rsync returned $rr, which was not expected."
        (>&2 echo "rsync returned $rr, which was not expected."
            [[ -f $errlog ]] && cat $errlog
        )
        return $rr
    done
}

usage () {
    cat <<END
Usage: quick-fedora-mirror [OPTION]

Update a local mirror of Fedora content via rsync and perform a mirrormanager
checkin.

Requires a configuration file; will search for this file in the following
locations:

  The path provided by -c/--config.
  /etc/quick-fedora-mirror/quick-fedora-mirror.conf
  /etc/quick-fedora-mirror.conf
  ~/.config/quick-fedora-mirror.conf
  quick-fedora-mirror.conf in the same directory as this script.
  quick-fedora-mirror.conf in the current directory.

Options:
  -a, --alwayscheck     Always compare local content with file lists, even if
                        file lists have not changed.
  -c, --config PATH     Specify configuration file instead of searching.
  -d LEVEL              Specify debugging level (0-9).
  -h, --help            This message.
  -n, --dry-run         Show what would be transferred, but do not actually
                        transfer, delete or check in.
  -N, --transfer-only   Download, but do not delete or check in.
  -t TIMESTAMP          Use TIMESTAMP (in seconds since epuch) as the last
                        mirror time.
  -T, --backdate TIME   Use TIME (a human readable date) as the last mirror
                        time.
END
  #--checkin-only        Force a mirrormanager checkin for all modules, but do
  #                      not transfer, delete or update the timestamp.
  #--dir-times           Update all directory times. (Not implemented.)
  #--refresh REGEX       Re-transfer all paths matching REGEX.  (Not implemented.)
}

parse_args () {
    # Process arguments, setting all sorts of globals
    while [[ $# > 0 ]]; do
        opt=$1
        case $opt in
            -a | --alwayscheck)
                alwayscheck=1
                ;;
            -c | --config)
                cfgfile=$2
                shift
                if [[ ! -r $cfgfile ]]; then
                    (>&2 echo Cannot read $cfgfile)
                    exit 1
                fi
                ;;
            -d) # Debugging
                verboseopt=$2
                shift
                ;;
            -h | --help)
                usage
                exit 1
                ;;
            -n | --dry-run)
                rsyncdryrun=1
                skipdelete=1
                skiptimestamp=1
                ;;
            -N | --transfer-only)
                skipdelete=1
                skiptimestamp=1
                ;;
            -t )
                backdate=$2
                alwayscheck=1
                shift
                ;;
            -T | --backdate)
                backdate=$(date -d "$2" +%s)
                alwayscheck=1
                shift
                ;;
            --checkin-only)
                skiptransfer=1
                skipdelete=1
                skiptimestamp=1
                forcecheckin=1
                ;;
            --dir-times)
                updatealldirtimes=1
                alwayscheck=1
                ;;
            --refresh)
                skipdelete=1
                skiptimestamp=1
                skipcheckin=1
                refreshpattern=$2
                shift
                ;;
            --dump-mm-checkin)
                # Just for the test suite; dump the raw payload to the given
                # filename with the module name appended.
                dumpmmcheckin=$2
                shift
                ;;
            --no-paranoia)
                # Don't backdate the last mirrortime
                noparanoia=1
                ;;
            *)
                (>&2 echo "Unrecognized argument.")
                exit 1
                ;;
        esac
        shift
    done
}

read_config () {
    # As a convenience, make sure $HOSTNAME is set
    if [[ -z "$HOSTNAME" ]]; then
        HOSTNAME=$(hostname)
    fi
    # Load up the configuration file from any of a number of locations
    local file
    for file in \
        $cfgfile \
        /etc/quick-fedora-mirror/quick-fedora-mirror.conf \
        /etc/quick-fedora-mirror.conf \
        ~/.config/quick-fedora-mirror.conf \
        $(dirname $0)/quick-fedora-mirror.conf \
        ./quick-fedora-mirror.conf; \
    do
        if [[ -r $file ]]; then
            source $file
            cfgfile=$file
            break
        fi
    done

    # Override some settings with previously parsed command-line options
    [[ -n $verboseopt ]] && VERBOSE=$verboseopt

    # Check that the required parameters were provided
    if [[ -z $DESTD ]]; then
        (>&2 echo "You must define DESTD in your configuration file ($cfgfile).")
    fi
    if [[ -z $TIMEFILE ]]; then
        (>&2 echo "You must define TIMEFILE in your configuration file ($cfgfile).")
    fi

    # Set some other general variables based on the value of provided
    # configuration settings
    [[ -z $CHECKIN_SITE ]] && skipcheckin=1
    [[ -z $MAXCHECKINRETRIES ]] && MAXCHECKINRETRIES=$MAXRETRIES
}

set_default_vars () {
    # Set various defaults before the configuration file is loaded.

    # Mapping from module names to directories under fedora-buffet
    # ZSHISM (initialize associative array)
    typeset -g -A MODULEMAPPING
    typeset -g -A MIRRORMANAGERMAPPING
    typeset -g -A MIRRORMANAGERMODULEMAPPING

    MODULEMAPPING=(
    fedora-alt          alt
    fedora-archive      archive
    fedora-enchilada    fedora
    fedora-epel         epel
    fedora-secondary    fedora-secondary
    )

    MIRRORMANAGERMAPPING=(
    fedora-alt          'fedora other'
    fedora-archive      'fedora archive'
    fedora-enchilada    'fedora linux'
    fedora-epel         'fedora epel'
    fedora-secondary    'fedora secondary arches'
    )

    # Mirrormanager has a weird prefix for "fedora-enchilada", so copy the
    # existing module mapping and alter it
    MIRRORMANAGERMODULEMAPPING=(${(kv)MODULEMAPPING})
    MIRRORMANAGERMODULEMAPPING[fedora-enchilada]="fedora/linux"

    # Default arguments; override in quick-fedora-mirror.conf
    VERBOSE=0
    LOGITEMS=aeElrR

    DESTD=
    TIMEFILE=

    CHECKIN_HOST=$(hostname)
    CURL=/usr/bin/curl
    FILELIST='fullfiletimelist-$mdir'
    EXTRAFILES=(fullfilelist imagelist-\$mdir)
    MIRRORMANAGER=https://admin.fedoraproject.org/mirrormanager/xmlrpc
    REMOTE=rsync://dl.fedoraproject.org
    RSYNC=/usr/bin/rsync
    RSYNCTIMEOUT=$((60 * 10))
    WARNDELAY=$((60 * 60 * 24))
    MAXRETRIES=10

    rsyncver=$(rsync --version | head -1 | awk '{print $3}')
    if [[ $rsyncver == 3.1.3 ]]; then
        # 3.1.3 has broken support for --preallocate and -S (--sparse) together
        RSYNCOPTS=(-aSH -f 'R .~tmp~' --stats --delay-updates --out-format='@ %i %10l  %n%L')
    elif [[ $rsyncver == 3.1* ]]; then
        RSYNCOPTS=(-aSH -f 'R .~tmp~' --stats --preallocate --delay-updates --out-format='@ %i %10l  %n%L')
    else
        RSYNCOPTS=(-aSH -f 'R .~tmp~' --stats --delay-updates --out-format='@ %i %10l  %n%L')
    fi

    MASTERMODULE=fedora-buffet
    MODULES=(fedora-enchilada fedora-epel)
}

check_file_list_version () {
    # Look at the file list to see if we can handle it
    #
    # Takes the file list name.
    # Returns 0 if we can handle it, 1 if we can't.
    local max_fl_version=3
    local fl=$1

    if [[ ! -f $fl ]]; then
        (>&2 echo "Cannot check file list \"$fl\".  Exiting.")
        exit 1
    fi

    local flversion=$(awk -F '\t' '/^\[Version/ {s=1; next} /^$/ {exit} {if (s) print $0}' < $fl)
    if [[ "$flversion" -le $max_fl_version ]]; then
        return
    fi

    # Either it is too new or we just can't parse it, so quit.
    (>&2 echo "File list from the mirror cannot be processed by this script.  Exiting.")
    exit 1
}

clean_all_transfer_temps () {
    # Delete temporary transfer files, but not any log files.
    # Be sure to add any extra generated temporaries here.
    # XXX Is it OK that this doesn't delete the file lists?  They will just get
    # copied over.
    rm -f *.old
    for i in ${(v)MODULEMAPPING} alldirs allfiles allfilesizes changedpaths \
        changes checksumfailed checksums deletedirs deletefiles flist \
        localdirs localfiles localfilesizes localfulllist master missingdirs \
        missingfiles newdirs newfiles staletmpdirs staletmpfiles \
        transferlist updatedfiles updatetimestamps; do
        rm -f $i-*
    done
}

clean_single_rsync_temp () {
    # Move a single rsync temporary file one directory up in the hierarchy
    #
    # rsync (at least version 3.2.3) appears to have some sort of bug which
    # causes it to fail to sync some files.  The working theory is that this
    # happens for small files which need timestamp changes.  It has been
    # observed for various .treeinfo (max 1550b), .discinfo (46b max) and a
    # README.html (479b) file.  When this manifests, a run will never complete
    # because rsync will fail to transfer the file and move it into the .~tmp~
    # directory, while q-f-m will move it back where it will tail to transfer
    # again.
    #
    # As a workaround for this, we simply delete "small" files (2kb) instead of
    # moving them.  Since the number of problem files appears to be small and
    # small files will transfer quickly, this should have little effect on the
    # overall transfer.
    local file=$1
    local size=$(stat -c '%s' $1)

    db3 "XXXXXX $file $size"

    if [[ -n $RSYNC_PARTIAL_DIR_BUG && "$size" -lt 2048 ]]; then
        logit A Deleting small previous download $file
        db3 Deleting small previous download: $file
        rm -f $file
    elif [[ ! -f ../$file ]]; then
        logit A Saving previous download $file
        db3 Saving previous download: $file
        mv $file ..
    elif [[ -n $RSYNC_PARTIAL_DIR_BUG ]]; then
        logit A Deleting partial download $file
        db3 Deleting partial download: $file
        rm -f $file
    fi
}

clean_stale_rsync_temps () {
    # Clean up temporaries left over from a previous aborted rsync run.
    local mod=$1

    db2 Possibly aborted rsync run.  Cleaning up.
    logit a "cleaning up previous aborted run: $(wc -l < staletmpfiles-$mod) file(s)."

    # Move the files in those tmpdirs a level up if a file with the same name
    # doesn't exist (and just delete the temp file if it does).  We don't
    # update the file lists because we want rsync to re-check those files and
    # possibly fix up the permissions.  The dirs will be cleaned up later.
    #
    # Note that this _may_ leave a few files around which should not be there,
    # and of course the content (and even partial content) will be visible
    # before it technically should be.  But that's better than nothing getting
    # done because aborted runs cause an endless buildup of partial transfers.
    # Extra files, if present, will of course be cleaned up at the next run and
    # rsync sill handle completing any partial downloads.
    #
    # XXX We could do better by comparing the stale files against the
    # to-be-fransferred list and only move things which are going to be
    # download in the run, but it's probably not worth it.
    for dir in $(cat staletmpdirs-$mod); do
        pushd "$DESTD/$dir"
        for file in *; do
            clean_single_rsync_temp $file
        done
        popd
        # It may be useful to clean up the temp directory, but in many cases
        # rsync will just recreate it and in any case it really should now be
        # empty.
        # rmdir $DESTD/$dir
    done
}

fetch_file_lists () {
    # Download the file list for each configred module
    # Will set the global variable "checksums" containing the checksum of the
    # file list of each module that exists on the client at the beginning of the transfer.

    local extra flname module rsyncreturn

    sep
    logit o Remote file list download start
    db2 Downloading file lists
    # ZSHISM (declare associative array)
    typeset -g -A checksums
    checksums=()
    for module in $MODULES; do
        # ZSHISM? (associative array indexing)
        moduledir=$MODULEMAPPING[$module]
        mkdir $moduledir
        flname=${FILELIST/'$mdir'/$moduledir}
        if [[ -f $DESTD/$moduledir/$flname ]]; then
            cp -p $DESTD/$moduledir/$flname $moduledir
            ln $moduledir/$flname $moduledir/$flname.old
            # ZSHISM (assign assoc. array value)
            checksums[$module]=$(sha1sum $DESTD/$moduledir/$flname | cut -d' ' -f1)
        fi

        echo $moduledir/$flname >> filelist-transferlist
    done

    extra=(--no-dirs --relative --compress)
    do_rsync $REMOTE/$MASTERMODULE/ . filelist-transferlist extra
    rsyncreturn=$?
    if [[ $rsyncreturn -ne 0 ]]; then
        (>&2 echo "rsync finished with nonzero exit status.\nCould not retrieve file lists.")
        logit E Aborting due to rsync failure while retrieving file lists
        finish 1
    fi

    # Log very basic stats
    logit s "File list download: $(hr_b $rstotalbytesreceived) received, $(hr_b $rstransferspeed)/s"

    # Check that we can handle the downloaded lists
    for module in $MODULES; do
        moduledir=$MODULEMAPPING[$module]
        flname=${FILELIST/'$mdir'/$moduledir}
        check_file_list_version $moduledir/$flname
    done

    # rsync won't transfer those files to the current directory, so move them and
    # clean up.
    mv */* .
    rmdir * 2> /dev/null
    logit o Remote file list download: end
}

checkin_build_inner_payload () {
    # Build the inner json payload
    # Takes the module name and the name of the output file to use
    local module=$1
    local mm=$2
    local checkinhost=$3

    local moduledir=$MIRRORMANAGERMODULEMAPPING[$module]
    local mmcheckin=$MIRRORMANAGERMAPPING[$module]

    cat >$mm <<EOF
{
    "$mmcheckin": {
        "dirtree": {
EOF

    # Output the data for each directory.  MM doesn't want the
    # directory name.
    for l in $(cat alldirs-$module); do
        echo "            \"${l/$moduledir\/}\": {}," >>$mm
    done

    # The data sent by report_mirror always includes a blank directory; add it
    # manually here which conveniently means we don't have to deal with the
    # trailing comma.  And after that, the various parameters mirrormanager
    # wants.
    cat >>$mm <<EOF
            "": {}
        },
        "enabled": "1"
    },
    "global": {
        "enabled": "1",
        "server": "$MIRRORMANAGER"
    },
    "host": {
        "enabled": "1",
        "name": "$checkinhost"
    },
    "site": {
        "enabled": "1",
        "name": "$CHECKIN_SITE",
        "password": "$CHECKIN_PASSWORD"
    },
    "stats": {},
    "version": 0
}
EOF
}

checkin_encode_inner_payload () {
    # Compress and encode the inner payload.
    # Takes the input and output filenames

    local in=$1
    local out=$2

    # The xmlrpc endpoint requires that the payload be bzip2 compressed
    bzip2 $mm

    # base64 encode
    base64 --wrap=0 $in.bz2 > $in.bz2.b64

    # change '+' to '-'  and '/' to '_'
    tr '+/' '-_' < $in.bz2.b64 > $out

    rm $in.bz2 $in.bz2.b64
}

checkin_build_outer_payload () {
    # Wrap the encoded payload in just the right xml
    # Takes input and output filenames

    local in=$1
    local out=$2

    cat >>$out <<EOF
<?xml version='1.0'?>
<methodCall>
<methodName>checkin</methodName>
<params>
<param>
EOF
    echo -n "<value><string>" >>$out

    cat <$in >>$out

    cat >>$out <<EOF
</string></value>
</param>
</params>
</methodCall>
EOF
}

checkin_upload_payload () {
    # Now actually upload the payload
    # We have to remove the Expect: header that curl sends but which mirrormanager cannot handle
    local payload=$1
    local module=$2
    local -a curlopts
    local curlret

    logit M "Making xmlrpc call for $module"
    curlopts=(--silent)
    curl --help | grep -q http1\.1
    (( ? == 0 )) && curlopts+=(--http1.1)
    (( VERBOSE >= 4 )) && curlopts=(--verbose)
    db3 "$CURL $curlopts -H \"Expect:\" -H \"Content-Type: text/xml\" --data @$mx $MIRRORMANAGER"
    $CURL $curlopts -H "Expect:" -H "Content-Type: text/xml" --data @$mx $MIRRORMANAGER > curl.out
    curlret=$?
    if [[ $curlret -ne 0 ]]; then
        logit e "Checkin failure: curl returned $curlret"
        (>&2 echo "Checkin failure: curl returned $curlret")
        return 2
    fi

    # Parse the output to see if we got any useful return
    # The sed call attempts to strip xml tags.  Easily fooled but we don't expect
    # any complicated return from mirrormanager.
    sed -e 's/<[^>]*>//g' curl.out > curl.noxml
    grep -q -i successful curl.noxml

    if [[ $? -ne 0 ]]; then
        db1 "Mirrormanager checkin for $module did not appear to succeed."
        logit e "Doesn't look like we got a good return from mirrormanager."
        logit e $(cat curl.noxml)
        return 1
    fi
    return 0
}

checkin_module () {
    # Perform the mirrormanager checkin for a particular module
    local module=$1

    local mm=mirrormanager-payload-$module
    local mx=mirrormanager-xmlrpc-$module
    local moduledir=$MODULEMAPPING[$module]

    if [[ ! -f alldirs-$module ]]; then
        # We were asked to check in a module that we hadn't previously
        # processed, which should not happen.
        logit E "Cannot perform checkin for $module; no directory list exists."
        return
    fi

    # Determine the "mirrormanager hostname" to use for this checkin.
    # Different modules can be set up under different "hosts" in mirrormanager,
    # even though these might all be on the same machine.  This works around
    # problems mirrormanager has when crawling machines which mirror
    # everything.
    # ZSHISM: This uses "(P)"; the equivalent in bash is "!".
    local checkinhost=$CHECKIN_HOST
    local hostspecificvar=CHECKIN_HOST_${module//-/_}
    if [[ -n ${(P)hostspecificvar} ]]; then
        checkinhost=${(P)hostspecificvar}
    fi

    db3 "Performing mirrormanager checkin for $module (in $moduledir) as $checkinhost"
    logit M "Processing $module (in $moduledir) as $checkinhost"

    # Construct the checkin payload
    checkin_build_inner_payload $module $mm $checkinhost
    checkin_encode_inner_payload $mm $mm.enc
    checkin_build_outer_payload $mm.enc $mx

    # For the test suite, just dump the checkin info and bail
    if [[ -n $dumpmmcheckin ]]; then
        cat $mx > $dumpmmcheckin-$module
        return
    fi

    # Try to check in until we've retried too often.
    local retries=1
    while true; do
        checkin_upload_payload $mx $module

        if [[ $? -eq 0 ]]; then
            break
        fi

        if (( retries >= MAXRETRIES )); then
            logit E "Could not complete checkin after $MAXCHECKINRETRIES tries."
            break
        fi

        logit e "Checkin attempt $retries failed.  Will retry."
        retries=$(( retries +1 ))
        sleep $(( 2*retries ))
    done

    logit M "Processing $module: end"
}

awk_extract_file_list () {
    local inf=$1
    local outf=$inf.flist
    [[ -n $2 ]] && outf=$2

    awk ' \
        /^\[Files/ {s=1;next}
        /^$/       {if (s==1) exit}
        s          {print}' \
        < $inf > $outf
    retcheck $? awk
}

awk_extract_paths_from_file_list_restricted () {
    local inf=$1
    local outf=$2
    local mdir=$3

    # We can just ignore the type and permissions completely
    awk -F '\t' "{print \"$mdir/\" \$4}" < $inf > $outf
    retcheck $? awk
}

awk_extract_paths_from_file_list_norestricted () {
    local inf=$1
    local outf=$2
    local mdir=$3

    awk -F '\t' " \
        { if (\$2 == \"d\" || \$2 == \"f\" || \$2 == \"l\") \
            print \"$mdir/\" \$4 \
        }" < $inf > $outf
    retcheck $? awk
}

awk_extract_newer_dirs_restricted () {
    local inf=$1
    local outf=$2
    local mdir=$3

    local last=0
    [[ -n $4 ]] && last=$4

    awk -F '\t' " \
        /\\[Files/ {s=1;next}
        /^\$/ {s=0;next}
        { if (s && \$1 >= $last \
            && (\$2 == \"d\" || \$2 == \"d-\" || \$2 == \"d*\")) \
            print \"$mdir/\" \$4 \
        }" \
        < $inf > $outf
    retcheck $? awk
}

awk_extract_newer_dirs_no_restricted () {
    local inf=$1
    local outf=$2
    local mdir=$3

    local last=0
    [[ -n $4 ]] && last=$4

     awk -F '\t' " \
        /\\[Files/ {s=1;next} \
        /^\$/ {s=0;next} \
        { if (s && \$1 >= $last \
            && (\$2 == \"d\")) \
            print \"$mdir/\" \$4 \
        }" \
        < $inf > $outf
    retcheck $? awk
}

awk_extract_newer_files_restricted () {
    local inf=$1
    local outf=$2
    local mdir=$3

    local last=0
    [[ -n $4 ]] && last=$4

    awk -F '\t' "/\\[Files/ {s=1;next} \
        /^\$/ {s=0;next} \
        {if (s && \$1 >= $last && \
            (\$2 == \"f\" || \$2 == \"f-\" || \$2 == \"f*\" \
            || \$2 == \"l\" || \$2 == \"l-\" || \$2 == \"l*\" \
            )) \
            print \"$mdir/\" \$4 \"\t\" \$3 \
        } \
        " $inf > $outf
    retcheck $? awk
}

awk_extract_newer_files_no_restricted () {
    local inf=$1
    local outf=$2
    local mdir=$3

    local last=0
    [[ -n $4 ]] && last=$4

    awk -F '\t' "/\\[Files/ {s=1;next} \
        /^\$/ {s=0;next} \
        {if (s && \$1 >= $last && \
            (\$2 == \"f\" \
            || \$2 == \"l\" \
            )) \
            print \"$mdir/\" \$4 \"\t\" \$3 \
        } \
        " $inf > $outf
    retcheck $? awk
}

process_file_list_diff () {
    # Extract and then diff the old and new file lists for a module
    # Creates changedfiles-$module file

    local fl=$1
    local mod=$2
    local mdir=$3

    local oldflist=flist-old-$mod
    local newflist=flist-new-$mod

    logit l "Generating database diff start: $mod"

    # Extract the file list part of old and new file lists.
    awk_extract_file_list $fl.old flist-old-$mod
    awk_extract_file_list $fl flist-new-$mod

    # sort each by path
    sort -t$'\t' -k4 $oldflist > $oldflist.sorted
    sort -t$'\t' -k4 $newflist > $newflist.sorted

    # compute the changes
    diff --changed-group-format='%>' --unchanged-group-format='' $oldflist.sorted $newflist.sorted > changes-$mod

    # Extract path from changes
    if [[ -n $PREBITFLIP ]]; then
        awk_extract_paths_from_file_list_restricted changes-$mod changedpaths-$mod $mdir
    else
        awk_extract_paths_from_file_list_norestricted changes-$mod changedpaths-$mod $mdir
    fi

    # We must filter here so that files we don't want to transfer won't appear
    # to have changed.
    filter changedpaths-$mod

    logit l "Generating database diff end: $mod"
}

compute_file_list_stats () {
    # Calculate and log counts of the various generated lists
    local mod=$1
    local -a stats
    stats=(allfiles alldirs newfiles newdirs changedpaths localfiles \
        localdirs deletefiles deletedirs missingfiles missingdirs \
        updatedfiles updatetimestamps checksumfailed)

    for i in stats; do
        counts[$i]=0
        [[ -f $i-$mod ]] && counts[$i]=$(wc -l < $i-$mod)
    done

    counts[totaltransfer]=$(wc -l transferlist-$mod)

    # Until the rest of the code is fixed up
    counts[extrafiles]=$counts[deletefiles]
    counts[extradirs]=$counts[deletedirs]
    counts[sizechanged]=$counts[updatedfiles]
    counts[allserverfiles]=$counts[allfiles]
    counts[allserverdirs]=$counts[alldirs]
    counts[newserverfiles]=$counts[newfiles]
    counts[newserverdirs]=$counts[newdirs]

    # Previously these two were printed before generating the local file lists
    db2f "Total on server:       %7d files, %4d dirs.\n" $cntallserverfiles $cntallserverdirs
    db2f "New on server:         %7d files, %4d dirs.\n" $cntnewserverfiles $cntnewserverdirs

    db2f "Total on client:       %7d files, %4d dirs.\n" $counts[localfiles $counts[localdirs]
    db2f "Not present on server: %7d files, %4d dirs.\n" $counts[extrafiles] $counts[extradirs]
    db2f "Missing on client:     %7d files, %4d dirs.\n" $counts[missingfiles] $counts[missingdirs]
    db2f "Size Changed:          %7d files.\n" $counts[sizechanged]
    db2f "Timestamps to restore: %7d files.\n" $counts[updatetimestamps]
    db2f "Checksum Failed:       %7d files.\n" $counts[checksumfailed]
    db2f "Filelist changes:      %7d paths.\n" $counts[changedpaths]
    db2f "Total to transfer:     %7d paths.\n" $counts[totaltransfer]

    logit L "Counts for $mod: Svr:$counts[allserverfiles]/$counts[allserverdirs] Loc:$counts[localfiles]/$counts[localdirs] Diff:$counts[changedpaths] New:$counts[newserverfiles]/$counts[newserverdirs] Xtra:$counts[extrafiles]/$counts[extradirs] Miss:$counts[missingfiles]/$counts[missingdirs] Size:$counts[sizechanged] Csum:$counts[checksumfailed] Dtim:$counts[updatetimestamps]"

}

generate_local_file_list () {
    # Generate lists of what the client has.
    local mod=$1
    local mdir=$2

    db3 Generating local file/dir list
    logit l "Generating local file list start: $mod"

    # Traverse the filesystem only once
    pushd $DESTD
    find $mdir/* -printf '%y\t%p\t%s\n' > $tempd/localfulllist-$mod
    popd

    # Now extract file and dir lists from that
    awk -F '\t' '{if ($1 == "d") {print $2}}' < localfulllist-$mod > localdirs-$mod
    awk -F '\t' '{if ($1 == "f" || $1 == "l") {print $2}}' < localfulllist-$mod > localfiles-$mod
    awk -F '\t' '{if ($1 == "f" || $1 == "l") {print $2 "\t" $3}}' < localfulllist-$mod > localfilesizes-$mod

    # Look for stray .~tmp~ dirs
    if [[ -z $NORSYNCRECOVERY ]]; then
        grep '\.~tmp~' localdirs-$mod > staletmpdirs-$mod
        grep '\.~tmp~' localfiles-$mod > staletmpfiles-$mod
    fi

    logit l "Generating local file list end: $mod"
}

process_local_file_list () {
    # Compare what the client has to what the server has, and generate more
    # lists based on that.
    # Generates the fillowing file lists:
    #     deletefiles-$module
    #     deletedirs-$module
    #     updatetimestamps-$module
    #     missingfiles-$module
    #     missingdirs-$module
    #     updatedfiles-$module
    #     checksumfailed-$module

    # XXX Don't do any master transferlist manipulation here.
    local mod=$1
    local mdir=$2

    # Find files on the client which don't exist on the server
    sort allfiles-$mod allfiles-$mod localfiles-$mod \
        | uniq -u  > deletefiles-$mod
    remove_filelists_from_file deletefiles-$mod $mdir

    # Find dirs on the client which don't exist on the server
    sort alldirs-$mod alldirs-$mod localdirs-$mod \
        | uniq -u > deletedirs-$mod

    # Extract dirnames of every file and dir in the delete lists, and all of their parents.
    if [[ -n $updatealldirtimes ]]; then
        echo $mdir > updatetimestamps-$mod
        cat alldirs-$mod >> updatetimestamps-$mod
    else
        awk '{dn($0)} function dn(p) { while (sub(/\/[^\/]*\]?$/, "", p)) print p }' \
            deletefiles-$mod deletedirs-$mod \
            | sort -u > updatetimestamps-$mod
    fi

    # Find files on the server which are missing on the client
    sort localfiles-$mod localfiles-$mod allfiles-$mod \
        | uniq -u > missingfiles-$mod

    # Find dirs on the server which are missing on the client
    sort localdirs-$mod localdirs-$mod alldirs-$mod \
        | uniq -u > missingdirs-$mod

    # Find files which have changed size
    sort allfilesizes-$mod localfilesizes-$mod \
        | uniq -u | awk -F '\t' '{print $1}' \
        | uniq -d > updatedfiles-$mod

    # Extract and verify checksums
    awk -F '\t' "/^\[Checksums/ {s=1; next} /^$/ {s=0; next} {if (s) print \$1 \"  $mdir/\" \$2}" $fl > checksums-$mod
    pushd $DESTD > /dev/null 2>&1
    sha1sum --check --quiet $tempd/checksums-$mod 2> /dev/null \
        | grep -i 'failed$' \
        | awk -F: '{print $1}' > $tempd/checksumfailed-$mod
    popd > /dev/null 2>&1
}

process_remote_file_list () {
    # Extract various file and directory lists from the master file list
    #
    # This will also handle ignoring restricted or pre-bitflip content if
    # necessary.
    #
    # Will create the following files:
    #   allfilesizes-$module
    #   allfiles-$module
    #   alldirs-$module
    #   newdirs-$module

    local fl=$1
    local module=$2
    local moduledir=$3

    db3 Extracting file and directory lists for $module.

    if [[ -n $PREBITFLIP ]]; then
        db4 "Directories (pre-bitflip included)"
        awk_extract_newer_dirs_restricted $fl alldirs-$module $moduledir

        db4 "New dirs (pre-bitflip included)"
        awk_extract_newer_dirs_restricted $fl newdirs-$module $moduledir $LASTTIME

        db4 "Files (pre-bitflip included)"
        awk_extract_newer_files_restricted $fl allfilesizes-$module $moduledir

        db4 "New files (pre-bitflip included)"
        awk_extract_newer_files_restricted $fl newfilesizes-$module $moduledir $LASTTIME
    else
        # All dirs, unrestricted only
        db4 "Directories (pre-bitflip excluded)"
        awk_extract_newer_dirs_no_restricted $fl alldirs-$module $moduledir

        db4 "New dirs (pre-bitflip excluded)"
        awk_extract_newer_dirs_no_restricted $fl newdirs-$module $moduledir $LASTTIME

        db4 "Files (pre-bitflip excluded)"
        awk_extract_newer_files_no_restricted $fl allfilesizes-$module $moduledir

        db4 "New files (pre-bitflip excluded)"
        awk_extract_newer_files_no_restricted $fl newfilesizes-$module $moduledir $LASTTIME
    fi

    # Filter the lists if needed
    filter alldirs-$module
    filter newdirs-$module
    filter allfilesizes-$module
    filter newfilesizes-$module

    # Produce the file lists without sizes.
    awk -F '\t' '{print $1}' allfilesizes-$module > allfiles-$module; retcheck $? awk
    awk -F '\t' '{print $1}' newfilesizes-$module > newfiles-$module; retcheck $? awk
}

update_master_file_lists () {
    # Simply append various per-module lists to the master lists
    cat deletefiles-$module >> master-deletefiles
    cat deletedirs-$module >> master-deletedirs
    cat updatetimestamps-$module >> master-updatetimestamps
    cat missingfiles-$module >> transferlist-$module
    cat missingdirs-$module >> transferlist-$module
    cat updatedfiles-$module >> transferlist-$module
    cat checksumfailed-$module >> transferlist-$module
}

remove_filelists_from_file () {
    # Remove the file from $FILELIST and anything given by $EXTRAFILES.
    # Takes:
    #   file to modify
    #   directory of current module (for substituting $mdir)
    # Modifies the file directly
    # Calls egrep -v in a loop.  Generally this is called on files of no more
    # than a few thousand lines, so performance shouldn't be an issue.

    local f=$1
    local moduledir=$2
    local tmp=$f.rfff
    local fl

    for fl in $FILELIST $EXTRAFILES; do
        fl=${fl/'$mdir'/$moduledir}
        egrep -v "^[^/]*/$fl" $f > $tmp
        mv $tmp $f
    done

    rm -f $tmp
}

process_module () {
    # Determine what needs to be transferred and removed from a single module.
    #
    # Takes the name of the module to process, returns nothing.
    #
    # Sets the following globals:
    # changed_modules
    #
    # Will leave the following lists in the temporary dir for use by other
    # functions: (all of them; currently deletes nothing)
    #
    # May leave other files, but don't depend on them.
    #
    # The various status variables, for logging:
    #   cntallserverfiles/cntallserverdirs - total files/dirs on server.
    #   cntnewserverfiles/cntnewserverdirs - new files/dirs on server (since last mirror time)
    #   cntlocalfiles/cntlocaldirs - total files/dirs on client.
    #   cntextrafiles/cntextradirs - files/dirs on client but not server.
    #   cntmissingfiles/cntmissingdirs - files/dirs on server but not client.
    #   cntsizechanged - files where size differs between server/client.
    #   cntupdatetimestamps - dir timestamps to restore
    #   cntchecksumfailed - files where checksum differs between server/client.
    #   cntchangedpaths - count of all differences between file lists.

    local module=$1
    # ZSHISM? (associative array indexing)
    local moduledir=$MODULEMAPPING[$module]

    local fl=${FILELIST/'$mdir'/$moduledir}
    local cntallserverfiles cntallserverdirs cntnewserverfiles cntnewserverdirs
    local cntchangedpaths cntlocalfiles cntlocaldirs cntextrafiles cntextradirsi
    local cntmissingfiles cntmissingdirs cntsizechanged cntupdatetimestamps cntchecksumfailed
    local extra

    if [[ -z $alwayscheck && \
            -n $checksums[$module] && \
            $(sha1sum $fl | cut -d' ' -f1) == $checksums[$module] ]]; then
        logit N No change in file list for $module
        db2 No change in file list checksum.  Skipping $module.
        continue
    fi

    sep
    logit P Processing start: $module
    db2 Processing $module
    changed_modules+=$module

    # Make sure the list is complete.
    tail -2 $fl | grep -q '^\[End\]$'
    if (( ? != 0 )); then
        logit e "Invalid file list; skipping $module"
        (>&2 echo "No end marker.  Corrupted file list?"
        echo Skipping $module.)
        return
    fi

    process_remote_file_list $fl $module $moduledir

    cntallserverfiles=$(wc -l < allfiles-$module)
    cntallserverdirs=$(wc -l < alldirs-$module)
    db2f "Total on server:       %7d files, %4d dirs.\n" $cntallserverfiles $cntallserverdirs

    cntnewserverfiles=$(wc -l < newfiles-$module)
    cntnewserverdirs=$(wc -l < newdirs-$module)
    db2f "New on server:         %7d files, %4d dirs.\n" $cntnewserverfiles $cntnewserverdirs

    # Add extra files to the transfer list
    echo $moduledir/$fl >> newfiles-$module
    for extra in $EXTRAFILES; do
        extra=${extra/'$mdir'/$moduledir}
        echo $moduledir/$extra >> newfiles-$module
    done
    cat newfiles-$module >> transferlist-$module
    cat newdirs-$module >> transferlist-$module

    if [[ -d $DESTD/$moduledir ]]; then
        db3 Finding file list changes since last run
        process_file_list_diff $fl $module $moduledir
        cat changedpaths-$module >> transferlist-$module

        generate_local_file_list $module $moduledir

        if [[ -s staletmpdirs-$module ]]; then
            clean_stale_rsync_temps $module
        fi

        # Find files on the client which don't exist on the server
        process_local_file_list $module $moduledir
        update_master_file_lists $module

        # Count some things we want to use for stats later.
        cntchangedpaths=$(wc -l < changedpaths-$module)
        cntlocalfiles=$(wc -l < localfiles-$module)
        cntlocaldirs=$(wc -l < localdirs-$module)
        cntextrafiles=$(wc -l < deletefiles-$module)
        cntextradirs=$(wc -l < deletedirs-$module)
        cntmissingfiles=$(wc -l < missingfiles-$module)
        cntmissingdirs=$(wc -l < missingdirs-$module)
        cntsizechanged=$(wc -l < updatedfiles-$module)
        cntupdatetimestamps=$(wc -l < updatetimestamps-$module)
        cntchecksumfailed=$(wc -l < checksumfailed-$module)

        db2f "Total on client:       %7d files, %4d dirs.\n" $cntlocalfiles $cntlocaldirs
        db2f "Not present on server: %7d files, %4d dirs.\n" $cntextrafiles $cntextradirs
        db2f "Missing on client:     %7d files, %4d dirs.\n" $cntmissingfiles $cntmissingdirs
        db2f "Size Changed:          %7d files.\n" $cntsizechanged
        db2f "Timestamps to restore: %7d files.\n" $cntupdatetimestamps
        db2f "Checksum Failed:       %7d files.\n" $cntchecksumfailed
        db2f "Filelist changes:      %7d paths.\n" $cntchangedpaths
    fi

    sort -u transferlist-$module >> transferlist-sorted-$module
    cat transferlist-sorted-$module >> master-transferlist
    local cnttotaltransfer=$(wc -l < transferlist-sorted-$module)
    db2f "Total to transfer:     %7d paths.\n" $cnttotaltransfer

    logit L "Counts for $module: Svr:$cntallserverfiles/$cntallserverdirs Loc:$cntlocalfiles/$cntlocaldirs Diff:$cntchangedpaths New:$cntnewserverfiles/$cntnewserverdirs Xtra:$cntextrafiles/$cntextradirs Miss:$cntmissingfiles/$cntmissingdirs Size:$cntsizechanged Csum:$cntchecksumfailed Dtim:$cntupdatetimestamps"
    logit P Processing end: $module
    db2 Finished processing $module.

    # Some basic info about the transfer.
    db1 Changes in $module: $cnttotaltransfer files/dirs
    if (( cnttotaltransfer <= 5 )); then
        for i in $(cat transferlist-sorted-$module); do
            db1 "    $i"
        done
    fi

    # XXX We should clean some things up at this point, but we also need some
    # files for the checkin later.
    # Should be able to delete all *-$module, except for the dirlists, to give
    # the current mirrormanager versions the things it needs.
    #if (( VERBOSE <= 4 )); then
    #    rm *-$module
    #fi
}


# Main program execution
# ======================
parse_args "$@"
set_default_vars
read_config

# XXX check_dependencies

# Paranoia; give us a few extra seconds.
[[ -z $noparanoia ]] && starttime=$(($starttime-5))

# Find the previous mirror time, and backdate if necessary
LASTTIME=0
if [[ -r $TIMEFILE ]]; then
    source $TIMEFILE
fi
if [[ -n $backdate ]]; then
    LASTTIME=$backdate
fi

# Make a temp dir and clean it up unless we're doing a lot of debugging
if [[ -z $TMPDIR ]]; then
    tempd=$(mktemp -d -t quick-mirror.XXXXXXXXXX)
else
    tempd=$(mktemp -d -p $TMPDIR -t quick-mirror.XXXXXXXXXX)
fi

if [[ $? -ne 0 ]]; then
    (>&2 echo "Creating temporary directory failed?")
    exit 1
fi
if (( VERBOSE <= 8 )); then
    trap "rm -rf $tempd" EXIT
fi

# Set up a FIFO for logging.  Just calling systemd-cat repeatedly just gives us
# a different PID every time, which is annoying.
if [[ -n $LOGJOURNAL ]]; then
    logfifo=$tempd/journal.fifo
    mkfifo $logfifo
    systemd-cat -t quick-fedora-mirror < $logfifo &
    exec 3>$logfifo
fi

outfile=$tempd/output
touch $outfile

sessionlog=$tempd/sessionlog
touch $sessionlog

touch $tempd/started-run

cd $tempd

# At this point we can acquire the lock
lock $TIMEFILE
if (( ? != 0 )); then
    db4 Could not acquire lock.
    logit k lock contention
    # Maybe we haven't been able to mirror for some time....
    delay=$(( starttime - LASTTIME ))
    if [[ -n $backdate || $LASTTIME -eq 0 ]]; then
        delay=0
    fi

    if (( delay > WARNDELAY )); then
        (>&2 echo No completed run since $(date -d @$LASTTIME ).)
        logit E No completed run since $(date -d @$LASTTIME ).
    fi
    exit 1
fi

db1 "Mirror starting: $(date)"
logit r Run start: cfg $cfgfile, tmp $tempd

if [[ -n $MIRRORBUFFET ]]; then
    # We want to mirror everything, so save the admin from listing the
    # individual modules.
    # ZSHISM (get keys from an associative array with (k))
    MODULES=(${(k)MODULEMAPPING})
    # BASHEQ MODULES=${!MODULEMAPPING[@]}
    # bash3 equivalent is terrible
fi

if (( VERBOSE >= 6 )); then
    echo Times:
    echo LASTTIME=$LASTTIME
    echo starttime=$starttime
    echo TIMEFILE=$TIMEFILE
    echo Dirs:
    echo tempd=$tempd
    echo DESTD=$DESTD
    echo Rsync:
    echo REMOTE=$REMOTE
    echo MASTERMODULE=$MASTERMODULE
    echo RSYNC=$RSYNC
    echo RSYNCOPTS=$RSYNCOPTS
    echo Modules:
    echo MODULES=$MODULES
    echo MODULEMAPPING=$MODULEMAPPING
    echo Misc:
    echo VERBOSE=$VERBOSE
fi

(( VERBOSE >= 8 )) && set -x

fetch_file_lists

logit p Processing start
changed_modules=()
for module in $MODULES; do
    process_module $module
done

if [[ ! -e master-transferlist ]]; then
    logit n No changes to synchronize
    db2 No changed files.
    finish 0
fi

if [[ -n $MIRRORBUFFET ]]; then
    echo DIRECTORY_SIZES.txt >> master-transferlist

    # If there's an rsync temp directory in the top level, delete it to work
    # around a potential rsync bug.
    if [[ -n $RSYNC_PARTIAL_DIR_BUG ]]; then
        rm -rf $DESTD/.~tmp~
    fi
fi

# The actual transfer
# ===================
sort -u master-transferlist > master-transferlist.sorted
linecount=$(wc -l < master-transferlist.sorted)
sep; sep
db2 Transferring $linecount files.
# XXX send total count to log as well

touch $tempd/started-transfer

# Now we have a list of everything which has changed recently in every module
# we want, pass that to rsync (non recursive mode!) and it should transfer just
# the changed files without having to pull the entire huge file list.
extra=()
if [[ -n $rsyncdryrun ]]; then
    extra+=(-n)
fi
do_rsync $REMOTE/$MASTERMODULE/ $DESTD master-transferlist.sorted extra
if (( ? != 0 )); then
    (>&2 echo "rsync failed; aborting run.\nWill not check in or delete anything.")
    logit "E Skipping further operations due to rsync failure."
    finish 1
fi

# Total downloaded file count, bytes received, transfer speed
logit s "stat: downloaded $rsfilestransferred files"
logit s "stat: received $(hr_b $rstotalbytesreceived)"
logit s "stat: transfer speed $(hr_b $rstransferspeed)/s"

# Everything we can extract from rsync
logit S "stat: sent $(hr_b $rstotalbytessent)"
logit S "stat: speedup: $rsspeedup"
logit S "stat: total size of transferred files: $(hr_b $rsfilesize)"
logit S "stat: file list gen time $(hr_s $rsfilelistgentime)"
logit S "stat: file list transfer time $(hr_s $rsfilelisttransfertime)"

db1 "========================="
db1 "Main transfer statistics:"
db1 "    Downloaded files: $rsfilestransferred"
db1 "    Total size of those files: $(hr_b $rsfilesize)"
db1 "    Received: $(hr_b $rstotalbytesreceived)"
db1 "    Sent: $(hr_b $rstotalbytessent)"
db1 "    Speedup: $rsspeedup"
db1 "    Trasfer speed: $(hr_b $rstransferspeed)/s"
db1 "    File list generation time: $(hr_s $rsfilelistgentime)"
db1 "    File list transfer time: $(hr_s $rsfilelisttransfertime)"

# Local dir/file deletion
# =======================
if [[ -s master-deletedirs ]]; then
    linecount=$(wc -l < master-deletedirs)

    if [[ -n $skipdelete && $VERBOSE -ge 2 ]]; then
        logit d Directory deletion skipped
        echo "Not deleting  $linecount directories.  Delete list is:"
        cat master-deletedirs
        echo
    else
        logit d Directory deletion start: $linecount directories
        db2 Removing $linecount stale directories.
        for nuke in $(cat master-deletedirs); do
            if [[ -d "$DESTD/$nuke" ]]; then
                logit D Deleting directory $nuke
                db4 Removing $nuke
                rm -rf "$DESTD/$nuke"
                deletedsomething=1
            fi
        done
        logit d Directory deletion end
    fi
else
    db2 No stale directories to delete.
fi

if [[ -s master-deletefiles ]]; then
    linecount=$(wc -l < master-deletefiles)

    if [[ -n $skipdelete ]]; then
        logit d File deletion skipped
        echo Not deleting $linecount stale files.  Delete list is:
        cat master-deletefiles
        echo
    else
        logit d File deletion begin: $linecount files
        db2 Removing $linecount stale files.
        # xopts=()
        # (( VERBOSE >= 4 )) && xopts=(-t)
        tr '\n' '\0' < master-deletefiles \
            | (pushd $DESTD; xargs $xopts -0 rm -f ; popd)
        # for nuke in $(cat master-deletefiles); do
        #     logit D Deleting file $nuke
        #     rm -f "$DESTD/$nuke"
        # done
        deletedsomething=1
        logit d File deletion end
    fi
else
    db2 No stale files to delete.
fi

if [[ ( -n $KEEPDIRTIMES || -n $updatealldirtimes ) && -s master-updatetimestamps ]]; then
    extra=()
    if [[ -n $rsyncdryrun ]]; then
        extra+=(-n)
    fi
    logit d "Updating timestamps on $(wc -l < master-updatetimestamps) dirs"
    do_rsync $REMOTE/$MASTERMODULE/ $DESTD master-updatetimestamps extra
fi

# We've completed a run, so save the timestamp
save_state

# Mirrormanager Checkin and Callout
# =================================
# At this point we know that we had a clean run with no complaints from rsync,
# and as far as we're concerned the run is now complete and recorded.
#
# So for each module we mirrored, the filtered file list is correct.  This
# means that the alldirs-$module file is accurate and we can simply report its
# contents to mirrormanager.
if [[ -z $skipcheckin || -n $dumpmmcheckin ]]; then
    db2 Performing mirrormanager checkin
    logit m "mirrormanager checkin start"

    # Check in just the changed modules
    for module in $changed_modules; do
        checkin_module $module
    done

    logit m "mirrormanager checkin end"
fi
finish 0 yes