weed.sh 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. #!/usr/bin/env bash
  2. # Weed old backups. See HELP for details.
  3. # Tests for this script can be found in:
  4. # bookwyrm/postgres-docker/tests/testing-entrypoint.sh
  5. set -euo pipefail
  6. DAILY_THRESHOLD=14
  7. WEEKLY_THRESHOLD=4
  8. MONTHLY_THRESHOLD=-1
  9. HELP="\
  10. NAME
  11. weed -- remove old backups from the backups directory
  12. SYNOPSIS
  13. weed.sh [-d threshold] [-w threshold] [-m threshold] [-l] backup_directory
  14. DESCRIPTION
  15. Reduce the number of backups by only keeping a certain number of daily backups before \
  16. reducing the frequency to weekly, monthly, and then finaly annually.
  17. For each threshold, setting it to 0 will skip that frequency (e.g., setting weekly to \
  18. 0 will mean backups go directly from daily to monthly), and setting it to -1 will \
  19. never reduce backups to a lower frequency (e.g., setting weekly to -1 will mean \
  20. backups never are reduced to monthly backups).
  21. -d threshold: Store this many daily backups before switching to weekly \
  22. (default $DAILY_THRESHOLD)
  23. -w threshold: Store this many weekly backups before switching to monthly \
  24. (default $WEEKLY_THRESHOLD)
  25. -m threshold: Store this many monthly backups before switching to annual \
  26. (default $MONTHLY_THRESHOLD)
  27. -l: Dry run. List the files that would be deleted.
  28. "
  29. # fail <message>
  30. # Write a message to stderr then exit
  31. function fail {
  32. echo -e "weed: $1" >&2
  33. exit 1
  34. }
  35. # parse_threshold <hopefully-a-number>
  36. # Thresholds should be a non-negative number (or -1 for no threshold)
  37. function parse_threshold {
  38. if [[ ! $1 =~ ^-?[0-9]+$ || $1 -lt -1 ]]; then
  39. fail "Invalid threshold: $1"
  40. fi
  41. echo "$1"
  42. }
  43. # weed_directory <directory> <daily_threshold> <weekly_threshold> <monthly_threshold>
  44. # List files to be deleted
  45. function weed_directory {
  46. local directory=$1
  47. local daily_threshold=$2
  48. local weekly_threshold=$3
  49. local monthly_threshold=$4
  50. local count=0
  51. local thresholds=("$daily_threshold" "$weekly_threshold" "$monthly_threshold" -1)
  52. local date_formats=("%Y %m %d" "%Y %W" "%Y %m" "%Y")
  53. local index=0
  54. local last_date=""
  55. local last_format=""
  56. local date=""
  57. # We would like to loop through all the backup files in the backup directory in
  58. # reverse-chronological order. Bookwyrm backup files are named such that
  59. # chronological and lexical order match. So we should be safe to find all backup
  60. # files and reverse sort them. We should be terrified of deleting a backup an
  61. # instance maintainer wants to keep, so we will be extra cautious. We're ignoring
  62. # any subdirectories in case someone moves an important backup into a meaningfully
  63. # named folder. We are also prepending the date to the path before sorting so that
  64. # the ordering would be correct even if we were allowed to find backup files in
  65. # subdirectories where chronological and lexical order don't match.
  66. for date_file in $(
  67. find "$directory" \
  68. -maxdepth 1 \
  69. -name 'backup_[a-z]*_[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]\.sql' \
  70. | sed 's/\(^.*backup_[a-z]*_\([0-9-]*\)\.sql$\)/\2\1/' \
  71. | sort --reverse
  72. ); do
  73. date="${date_file:0:10}"
  74. file="${date_file:10}"
  75. date="${date_file:0:10}"
  76. file="${date_file:10}"
  77. # We can't fall off the end because we set annual backups to unlimited. It seems
  78. # unlikely that instance maintainers would have enough concern about the space
  79. # one backup/year takes to warrant supporting a cutoff.
  80. while [[ ${thresholds[index]} -ne -1 && $count -ge ${thresholds[index]} ]]; do
  81. index=$((index + 1))
  82. last_format=""
  83. count=0
  84. done
  85. if [[ -z "$last_date" ]]; then
  86. count=$((count + 1))
  87. last_date=$date
  88. last_format=""
  89. else
  90. if [[ -z "$last_format" ]]; then
  91. last_format=$(date --date="$last_date" +"${date_formats[index]}")
  92. fi
  93. format=$(date --date="$date" +"${date_formats[index]}")
  94. if [[ "$format" == "$last_format" ]]; then
  95. echo "$file"
  96. else
  97. count=$((count + 1))
  98. last_date="$date"
  99. last_format="$format"
  100. fi
  101. fi
  102. done
  103. }
  104. function main(){
  105. local daily_threshold=$DAILY_THRESHOLD
  106. local weekly_threshold=$WEEKLY_THRESHOLD
  107. local monthly_threshold=$MONTHLY_THRESHOLD
  108. local dry_run=""
  109. while getopts "hd:w:m:l" OPTION; do
  110. case "$OPTION" in
  111. h)
  112. echo "$HELP";
  113. exit
  114. ;;
  115. d)
  116. daily_threshold=$(parse_threshold "$OPTARG")
  117. ;;
  118. w)
  119. weekly_threshold=$(parse_threshold "$OPTARG")
  120. ;;
  121. m)
  122. monthly_threshold=$(parse_threshold "$OPTARG")
  123. ;;
  124. l)
  125. dry_run="true"
  126. ;;
  127. :)
  128. fail "Missing argument for '$OPTARG'. To see help run: weed.sh -h"
  129. ;;
  130. ?)
  131. fail "Unknown option '$OPTION'. To see help run: weed.sh -h"
  132. esac
  133. done
  134. shift "$((OPTIND - 1))"
  135. if [[ $# -ne 1 ]]; then
  136. fail "expected a single argument, directory"
  137. fi
  138. local count=0
  139. for file in $(weed_directory "$1" "$daily_threshold" "$weekly_threshold" "$monthly_threshold"); do
  140. count=$((count + 1))
  141. if [[ -n "$dry_run" ]]; then
  142. echo "$file"
  143. else
  144. echo "deleting $file" >&2
  145. rm "$file"
  146. fi
  147. done
  148. if [[ -n "$dry_run" ]]; then
  149. optional_words="would be "
  150. else
  151. optional_words=""
  152. fi
  153. echo -e "$count files ${optional_words}deleted" >&2
  154. }
  155. if [ "${BASH_SOURCE[0]}" -ef "$0" ]; then
  156. main "$@"
  157. fi