repmgr+PostgreSQL故障自动转移


创建dockerfile

# 使用 CentOS 7 作为基础镜像
FROM centos:7

# 设置环境变量
ENV PG_MODE=primary \
 PG_USER=postgres \
 PG_PASSWORD=postgres \
 NODE_ROLE=master \
 MASTER_NAME=master \
 MASTER_PORT=5432 \
 RE_USER=repmgr \
 NODE_ID=1 \
 #NET_SEGMENT=192.168.0 \
 NODE_NAME=master1 \
 PG_DATADIR=/home/postgres/pgdata \
 PG_REPMGR_CONF=/home/postgres/repmgr.conf \
 PG_BINDIR=/usr/pgsql-12/bin \
 PG_CONFIGDIR=/home/postgres/pgdata/postgresql.conf \
 PRIORITY=1 \
 CONNINFO_HOST=master \
 CONNINFO_PORT=5432 \
 PATH="/usr/pgsql-12/bin:${PATH}"

# 创建postgres用户和组
RUN groupadd -r postgres \
 && useradd -r -g postgres postgres

# 安装所需的软件和工具
RUN yum -y install epel-release \
 && yum -y install https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm \
 && yum -y install postgresql12 postgresql12-server postgresql12-contrib repmgr12 \
 && yum -y install which sudo iproute hostname \
 && yum -y clean all

# 创建脚本存放目录
RUN mkdir -p /home/runtime/ \
 && chown -R postgres:postgres /home/runtime/

# 将 functions 文件和 entrypoint.sh 添加到镜像中
#COPY functions /home/postgres/runtime/functions
COPY functions /home/runtime/functions
#COPY entrypoint.sh /home/postgres/runtime/entrypoint.sh
COPY entrypoint.sh /home/runtime/entrypoint.sh

# 修改文件权限与属性
RUN mkdir -p /home/postgres/pgdata \
 && chown -R postgres:postgres /home/postgres \
 #&& chown -R postgres:postgres /home/postgres/pgdata \
 #&& chown 777 /home/postgres/pgdata \
 #&& chmod +x /home/postgres/runtime/entrypoint.sh \
 && chmod +x /home/runtime/entrypoint.sh \
 #&& chmod +x /home/postgres/runtime/functions
 && chmod +x /home/runtime/functions 
 #&& usermod -a -G root postgres \
 #&& chmod 770 /home/postgres

RUN echo 'postgres ALL=(ALL) NOPASSWD: /bin/chown' >> /etc/sudoers

# 切换到 postgres 用户
USER postgres

# 初始化 PostgreSQL 数据目录
#RUN initdb -D ${PG_DATADIR} -U${PG_USER}

# 暴露 PostgreSQL 端口
EXPOSE 5432

# 设置启动脚本
#ENTRYPOINT ["/home/postgres/runtime/entrypoint.sh"]
ENTRYPOINT ["/home/runtime/entrypoint.sh"]

打包成镜像

docker run build -t centos7-pgsql-repmgr:v1.0

docker-compose部署
master节点的dockercompose.yaml

version: "3"

services:
 pg-master:
 image: centos7-pgsql-repmgr:v1.9.27
 hostname: master1
 container_name: pg-master
 environment:
 PG_MODE: primary
 PG_USER: postgres
 PG_PASSWORD: postgres
 NODE_ROLE: master
 NODE_ID: 1
 NODE_NAME: master1
 #MASTER_NAME: 172.18.41.8
 #MASTER_PORT: 25432
 CONNINFO_HOST: 172.18.41.8
 CONNINFO_PORT: 25432
 PRIORITY: 10
 POSTGRES_DB: repmgr
 ports:
 - "25432:5432"
 volumes:
 - ./pg-data:/home/postgres

slave节点的dockercompose.yaml
version: "3"

services:
 pg-slave:
 image: centos7-pgsql-repmgr:v1.9.27
 container_name: pg-salve
 hostname: salve1
 #network_mode: host
 environment:
 PG_MODE: salve
 PG_USER: postgres
 PG_PASSWORD: postgres
 PGPASSWORD: postgres
 RE_USER: repmgr
 NODE_ROLE: slave
 MASTER_NAME: 172.18.41.8
 MASTER_PORT: 25432
 NODE_ID: 2
 NODE_NAME: salve1
 CONNINFO_HOST: 172.18.41.2
 CONNINFO_PORT: 25432
 PRIORITY: 15
 POSTGRES_DB: repmgr
 ports:
 - "25432:5432"
 volumes:
 - ./pg-data:/home/postgres
~ 

master故障修复后已salve启动的dockercompose.yaml
version: "3"

services:
 pg-slave:
 image: centos7-pgsql-repmgr:v1.9.27
 hostname: salve1
 container_name: pg-salve
 environment:
 MASTER_NAME: 172.18.41.8
 PGPASSWORD: postgres
 MASTER_PORT: 25432
 CONNINFO_HOST: 172.18.41.2
 CONNINFO_PORT: 25432
 NODE_ID: 1
 PRIORITY: 15
 POSTGRES_DB: repmgr
 ports:
 - "25432:5432"
 volumes:
 - ./pg-data:/home/postgres

注：恢复前请做好数据备份，防止数据丢失
附：functions脚本函数
#!/bin/bash

# 容器调用入口函数，根据传入命令不同，执行注册或启动主库、注册或启动备库，从新加入集群的操作。
configure_repmgr()
{
 case ${NODE_ROLE} in
 master)
 echo 'master'
 initialize_master
 ;;
 slave)
 echo 'slave'
 initialize_slave
 ;;
 # rejoin)
 # echo 'rejoin'
 # rejoin_node
 # ;;
 master_slave)
 echo 'master_slave'
 master_slave
 ;;
 esac
}

# 用于设置 PostgreSQL 配置文件 'postgresql.conf' 中的参数。
# 该函数接受两个参数：参数名 (param_name) 和参数值 (param_value)，
# 并使用 'sed' 命令在 'postgresql.conf' 文件中查找以参数名开头的行
# （行前可能有一个 # 注释符号），然后用新的参数值替换该行。

set_postgresql_param() {
 param_name="$1"
 param_value="$2"
 sed -i "/^#${param_name} =/c ${param_name} = ${param_value}" ${PG_DATADIR}/postgresql.conf
}

# 这个函数用来向 PostgreSQL 的 pg_hba.conf 文件中添加一行新的配置
# ${1}：你想要添加的配置行
# ${PG_DATADIR}：pg_hba.conf 文件的位置
set_hba_param() {
 # 判断是否传入了配置行
 if [ -z "${1}" ]; then
 echo "No configuration line provided."
 return 1
 fi

 # 检查 pg_hba.conf 文件是否存在
 if [ ! -e "${PG_DATADIR}/pg_hba.conf" ]; then
 echo "${PG_DATADIR}/pg_hba.conf does not exist."
 return 1
 fi

 # 检查是否已经拥有写权限，如果没有则尝试获取
 if [ ! -w "${PG_DATADIR}/pg_hba.conf" ]; then
 chmod u+w ${PG_DATADIR}/pg_hba.conf || {
 echo "Could not set write permissions on ${PG_DATADIR}/pg_hba.conf"
 return 1
 }
 fi

 # 添加配置行到 pg_hba.conf 文件
 echo "${1}" >> ${PG_DATADIR}/pg_hba.conf || {
 echo "Could not write to ${PG_DATADIR}/pg_hba.conf"
 return 1
 }

 return 0
}

master_slave() {
 # 检查pgdata-bak是否存在，如果存在则警告并直接启动pgsql
 if [ -d "${PG_DATADIR}-bak" ]; then
 echo "Warning: ${PG_DATADIR}-bak already exists. Starting pgsql directly."
 pg_ctl -D $PG_DATADIR start
 else
 #export PGPASSFILE=~/.pgpass
 #write_pgpass
 IP=`ping ${MASTER_NAME} -c 1 -w 1 | sed '1{s/[^(]*(//;s/).*//;q}'`
 # 1. 备份pgsql数据
 mv $PG_DATADIR ${PG_DATADIR}-bak

 # 2. 创建pgsql数据目录
 mkdir $PG_DATADIR

 # 3. 从现在的主库获取备份数据
 #repmgr -f $PG_REPMGR_CONF -h ${MASTER_NAME} -U repmgr -d repmgr -D $PG_DATADIR standby clone
 echo "repmgr -h ${IP} -p ${MASTER_PORT} -U repmgr -d repmgr -f ${PG_REPMGR_CONF} standby clone --dry-run"
 repmgr -h ${IP} -p ${MASTER_PORT} -U repmgr -d repmgr -f ${PG_REPMGR_CONF} standby clone --dry-run
 echo "repmgr -h ${IP} -p ${MASTER_PORT} -U repmgr -d repmgr -f ${PG_REPMGR_CONF} standby clone"
 repmgr -h ${IP} -p ${MASTER_PORT} -U repmgr -d repmgr -f ${PG_REPMGR_CONF} standby clone

 # 4. 启动pgsql
 echo "pg_ctl -D $PG_DATADIR start"
 pg_ctl -D $PG_DATADIR start

 # 5. 重新注册节点
 echo "repmgr -f $PG_REPMGR_CONF standby register --force"
 repmgr -f $PG_REPMGR_CONF standby register --force
 fi
 echo "repmgrd -f ${PG_REPMGR_CONF} --pid-file /tmp/repmgrd.pid --daemonize=false"
 repmgrd -f ${PG_REPMGR_CONF} --pid-file /tmp/repmgrd.pid --daemonize=false
}

# 注册和执行主库操作，包括修改配置文件、创建用户、数据库及插件等，并设置守护进程用于自动故障转移。
initialize_master()
{
 if [[ ! -f ${PG_DATADIR}/PG_VERSION ]]; then
 initdb -D /home/${PG_USER}/pgdata -U${PG_USER}
 write_postgresql_config
 write_pg_hba_conf
 write_pgpass
 pg_ctl -D /home/${PG_USER}/pgdata -w start >/dev/null
 psql -U ${PG_USER} -d postgres -h localhost -c "ALTER USER ${PG_USER} WITH PASSWORD '${PG_PASSWORD}';" >/dev/null
 psql -U ${PG_USER} -d postgres -h localhost -c "create database repmgr;" >/dev/null
 psql -U ${PG_USER} -d postgres -h localhost -c "create extension repmgr;" >/dev/null
 psql -U ${PG_USER} -d repmgr -h localhost -c "create user repmgr with superuser;" >/dev/null
 psql -U ${PG_USER} -d repmgr -h localhost -c "alter user repmgr password '${PG_PASSWORD}';" >/dev/null
 write_repmgr_conf
 repmgr -f ${PG_REPMGR_CONF} primary register
 else
 pg_ctl -D /home/${PG_USER}/pgdata -w start >/dev/null
 fi
 repmgrd -f ${PG_REPMGR_CONF} --pid-file /tmp/repmgrd.pid --daemonize=false
}

# 注册和执行备库操作，克隆主库，加入repmgr集群，并设置守护进程用于自动故障转移。
initialize_slave()
{
 if [[ ! -f ${PG_DATADIR}/PG_VERSION ]]; then
 write_repmgr_conf
 write_pgpass
 IP=`ping ${MASTER_NAME} -c 1 -w 1 | sed '1{s/[^(]*(//;s/).*//;q}'`
 repmgr -h ${IP} -p ${MASTER_PORT} -U repmgr -d repmgr -f ${PG_REPMGR_CONF} standby clone --dry-run
 repmgr -h ${IP} -p ${MASTER_PORT} -U repmgr -d repmgr -f ${PG_REPMGR_CONF} standby clone
 pg_ctl -D ${PG_DATADIR} -w start >/dev/null
 repmgr -f ${PG_REPMGR_CONF} standby register 
 repmgrd -f ${PG_REPMGR_CONF} --pid-file /tmp/repmgrd.pid --daemonize=false
 else
 pg_ctl -D ${PG_DATADIR} -w start >/dev/null
 repmgrd -f ${PG_REPMGR_CONF} --pid-file /tmp/repmgrd.pid --daemonize=false
 fi
}

# 将已有节点重新加入到集群，启动守护进行用于自动切换。
rejoin_node()
{
 if [[ -f ${PG_DATADIR}/PG_VERSION ]]; then
 IP=`ping ${MASTER_NAME} -c 1 -w 1 | sed '1{s/[^(]*(//;s/).*//;q}'`
 if [[ -d /home/${PG_USER}/pgdata-bak ]];then
 rm -fr /home/${PG_USER}/pgdata-bak
 fi
 cp -a /home/${PG_USER}/pgdata /home/${PG_USER}/pgdata-bak
 rm -fr /home/postgres/pgdata/postmaster.pid
 # pg_resetwal -f /home/${PG_USER}/pgdata
 repmgr node rejoin -d "host=${IP} dbname=repmgr user=repmgr" --force-rewind --config-files="postgresql.conf,postgresql.auto.conf" -f ${PG_REPMGR_CONF} --verbose --dry-run
 repmgr node rejoin -d "host=${IP} dbname=repmgr user=repmgr" --force-rewind --config-files="postgresql.conf,postgresql.auto.conf" -f ${PG_REPMGR_CONF} --verbose
 fi
}

# 修改postgresql.conf文件
write_postgresql_config()
{
 set_postgresql_param "wal_log_hints" "on"
 set_postgresql_param "archive_mode" "on"
 set_postgresql_param "archive_command" "\'test ! -f /home/${PG_USER}/pgarch/%f && cp %p /home/${PG_USER}/pgarch/%f\'"
 set_postgresql_param "wal_level" "hot_standby"
 set_postgresql_param "listen_addresses" "\'*\'"
 set_postgresql_param "hot_standby" "on"
 set_postgresql_param "max_wal_senders" "10"
 set_postgresql_param "wal_keep_segments" "10"
 set_postgresql_param "port" "${PG_PORT:-5432}"
 set_postgresql_param "max_connections " "100"
 set_postgresql_param "superuser_reserved_connections" "10"
 set_postgresql_param "full_page_writes" "on"
 set_postgresql_param "max_replication_slots" "10"
 set_postgresql_param "synchronous_commit" "on"
 set_postgresql_param "shared_preload_libraries" "repmgr"
 set_postgresql_param "log_destination" "csvlog"
 set_postgresql_param "logging_collector" "on"
 set_postgresql_param "log_directory" "on"
 set_postgresql_param "log_filename" "postgresql-%Y-%m-%d_%H%M%S"
 set_postgresql_param "log_rotation_age" "1d"
 set_postgresql_param "log_rotation_size" "10MB"
 set_postgresql_param "log_statement" "mod"
 #set_postgresql_param "data_directory" "/home/pgsqlData"
}

# 修改repmgr.conf文件
write_repmgr_conf()
{
 echo "node_id=${NODE_ID}" > ${PG_REPMGR_CONF}
 echo "node_name='${NODE_NAME}'" >> ${PG_REPMGR_CONF}
 #echo "conninfo='host=${CONNINFO_HOST} user=repmgr dbname=repmgr connect_timeout=2'" >> ${PG_REPMGR_CONF}
 echo "conninfo='host=${CONNINFO_HOST} port=${CONNINFO_PORT} user=repmgr dbname=repmgr connect_timeout=2'" >> ${PG_REPMGR_CONF}
 echo "data_directory='${PG_DATADIR}'" >> ${PG_REPMGR_CONF}
 echo "config_directory='${PG_CONFIGDIR}'" >> ${PG_REPMGR_CONF}
 echo "use_replication_slots=true" >> ${PG_REPMGR_CONF}
 echo "reconnect_attempts=4" >> ${PG_REPMGR_CONF}
 echo "reconnect_interval=5" >> ${PG_REPMGR_CONF}
 echo "monitor_interval_secs=2" >> ${PG_REPMGR_CONF}
 echo "retry_promote_interval_secs=300" >> ${PG_REPMGR_CONF}
 echo "pg_bindir='${PG_BINDIR}'" >> ${PG_REPMGR_CONF}
 echo "log_level='INFO'" >> ${PG_REPMGR_CONF}
 echo "log_status_interval=300" >> ${PG_REPMGR_CONF}
 echo "log_facility='STDERR'" >> ${PG_REPMGR_CONF}
 #echo "event_notification_command='${PG_EVENT_NOTIFICATION_SCRIPT}'" >> ${PG_REPMGR_CONF}
 echo "promote_command='repmgr standby promote -f ${PG_REPMGR_CONF}'" >> ${PG_REPMGR_CONF}
 echo "follow_command='repmgr standby follow -f ${PG_REPMGR_CONF} -W --log-to-file'" >> ${PG_REPMGR_CONF}
 echo "failover='automatic'" >> ${PG_REPMGR_CONF}
 echo "priority=${PRIORITY}" >> ${PG_REPMGR_CONF}
 echo "degraded_monitoring_timeout=-1" >> ${PG_REPMGR_CONF}
}

# 修改pg_hba.conf文件
write_pg_hba_conf()
{
 set_hba_param " local replication ${PG_USER} trust "
 set_hba_param " host replication ${PG_USER} 127.0.0.1/32 trust "
 set_hba_param " local repmgr ${PG_USER} trust "
 set_hba_param " host repmgr ${PG_USER} 127.0.0.1/32 trust "
 #set_hba_param " host replication ${PG_USER} ${NET_SEGMENT}/24 md5 "
 #set_hba_param " host repmgr ${PG_USER} ${NET_SEGMENT}/24 md5 "
 #set_hba_param " host repmgr repmgr ${NET_SEGMENT}/24 md5 "
 set_hba_param " host replication ${RE_USER} 0.0.0.0/0 md5 "
 set_hba_param " host repmgr ${PG_USER} 0.0.0.0/0 md5 "
 set_hba_param " host repmgr repmgr 0.0.0.0/0 md5 "
 set_hba_param " host all all 0.0.0.0/0 md5 "
}

# 修改.pgpass文件
write_pgpass()
{
 if [ -f ~/.pgpass ]
 then
 rm -f ~/.pgpass
 fi
 echo "*:*:*:${PG_USER}:${PG_PASSWORD}" >> ~/.pgpass
 echo "*:*:repmgr:repmgr:${PG_PASSWORD}" >> ~/.pgpass
 chmod 600 ~/.pgpass
}

附2：entrypoint.sh
#!/bin/bash
set -e

# shellcheck source=runtime/functions
#source "/home/postgres/runtime/functions"
source "/home/runtime/functions"
sudo chown -R postgres:postgres /home/postgres
if [ ! -d /home/postgres/pgarch/ ];then
 mkdir -p /home/postgres/pgarch/
fi

if [ -f /tmp/repmgrd.pid ];then
 rm -fr /tmp/repmgrd.pid
fi
#运行repmgr
configure_repmgr

