PG fast模式停库在归档过慢及有发送延迟会被阻塞

最新推荐文章于 2025-01-04 13:11:45 发布

转载最新推荐文章于 2025-01-04 13:11:45 发布 · 2.2k 阅读

0 ·

本内容遵循CC 4.0 BY-SA版权协议

原文链接：http://mysql.taobao.org/monthly/2019/09/10/

标签

#postgresql #源码学习 #归档 #主从延迟 #停库

性能同时被 3 个专栏收录

149 篇文章

订阅专栏

PostgreSQL

148 篇文章

订阅专栏

源码学习

72 篇文章

订阅专栏

尝试停库时遇到的两个现象：

当archiver process仍有.ready文件待处理时，执行fast模式执行stop时，必须等其处理完
当walsender进程仍有wal日志未发送至从库时，必须等其发送完

本来打算自己研究一下，发现阿里云早就有文章研究过了，学习记录一下。

原文链接：数据库内核月报

如果数据库开启了归档，smart, fast 停库时会怎么处理pgarch进程

发起最后一次archive周期，将所有.ready的wal进行归档，除非中间archive_command遇到错误，否则要等所有的.ready文件都触发并执行完成archive_command。

如果有walsender进程在，smart, fast 停库时会怎么处理walsender进程

如果有walsender进程存在（例如有standby，有pg_basebackup，有pg_receivewal等利用流复制协议的客户端就有walsender进程），那么要等这个walsender将所有未发送完的wal日志都发送给下游。

src/backend/postmaster/postmaster.c

/*  
 * Reaper -- signal handler to cleanup after a child process dies.  
 */  
static void  
reaper(SIGNAL_ARGS)  
{  
  
.....................  
        while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)  
        {  
  
.......................  
  
                /*  
                 * Was it the checkpointer?  
                 */  
                if (pid == CheckpointerPID)  
                {  
                        CheckpointerPID = 0;  
                        if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)  
                        {  
                                /*  
                                 * OK, we saw normal exit of the checkpointer after it's been  
                                 * told to shut down.  We expect that it wrote a shutdown  
                                 * checkpoint.  (If for some reason it didn't, recovery will  
                                 * occur on next postmaster start.)  
                                 *  
                                 * At this point we should have no normal backend children  
                                 * left (else we'd not be in PM_SHUTDOWN state) but we might  
                                 * have dead_end children to wait for.  
                                 *  
                                 * If we have an archiver subprocess, tell it to do a last  
                                 * archive cycle and quit. Likewise, if we have walsender  
                                 * processes, tell them to send any remaining WAL and quit.  
                                 */  
                                Assert(Shutdown > NoShutdown);  
  
                                /* 唤醒归档进程 进行一轮归档 */  
                                /* Waken archiver for the last time */  
                                if (PgArchPID != 0)  
                                        signal_child(PgArchPID, SIGUSR2);  
  
                                /* wal sender，发送完所有未发送的redo */  
                                /*  
                                 * Waken walsenders for the last time. No regular backends  
                                 * should be around anymore.  
                                 */  
                                SignalChildren(SIGUSR2);  
  
                                pmState = PM_SHUTDOWN_2;  
  
                                /*  
                                 * We can also shut down the stats collector now; there's  
                                 * nothing left for it to do.  
                                 */  
                                if (PgStatPID != 0)  
                                        signal_child(PgStatPID, SIGQUIT);  
                        }

唤醒归档

src/backend/postmaster/pgarch.c

/* SIGUSR2 signal handler for archiver process */  
static void  
pgarch_waken_stop(SIGNAL_ARGS)  
{  
        int                     save_errno = errno;  
  
        /* set flag to do a final cycle and shut down afterwards */  
        /* 停库，触发最后一轮归档周期 */  
        ready_to_stop = true;  
        SetLatch(MyLatch);  
  
        errno = save_errno;  
}

/*  
 * pgarch_MainLoop  
 *  
 * Main loop for archiver  
 */  
static void  
pgarch_MainLoop(void)  
{  
        pg_time_t       last_copy_time = 0;  
        bool            time_to_stop;  
  
        /*  
         * We run the copy loop immediately upon entry, in case there are  
         * unarchived files left over from a previous database run (or maybe the  
         * archiver died unexpectedly).  After that we wait for a signal or  
         * timeout before doing more.  
         */  
        wakened = true;  
  
        /*  
         * There shouldn't be anything for the archiver to do except to wait for a  
         * signal ... however, the archiver exists to protect our data, so she  
         * wakes up occasionally to allow herself to be proactive.  
         */  
        do  
        {  
                ResetLatch(MyLatch);  
  
                /* When we get SIGUSR2, we do one more archive cycle, then exit */  
                /* 停库，触发最后一轮归档周期 */  
                time_to_stop = ready_to_stop;  
  
                /* Check for config update */  
                if (got_SIGHUP)  
                {  
                        got_SIGHUP = false;  
                        ProcessConfigFile(PGC_SIGHUP);  
                }  
  
                /*  
                 * If we've gotten SIGTERM, we normally just sit and do nothing until  
                 * SIGUSR2 arrives.  However, that means a random SIGTERM would  
                 * disable archiving indefinitely, which doesn't seem like a good  
                 * idea.  If more than 60 seconds pass since SIGTERM, exit anyway, so  
                 * that the postmaster can start a new archiver if needed.  
                 */  
                if (got_SIGTERM)  
                {  
                        time_t          curtime = time(NULL);  
  
                        if (last_sigterm_time == 0)  
                                last_sigterm_time = curtime;  
                        else if ((unsigned int) (curtime - last_sigterm_time) >=  
                                         (unsigned int) 60)  
                                break;  
                }  
  
                /* Do what we're here for */  
                if (wakened || time_to_stop)  
                {  
                        wakened = false;  
                        pgarch_ArchiverCopyLoop();   // 最后一次循环  
                        last_copy_time = time(NULL);  
                }  
  
                /*  
                 * Sleep until a signal is received, or until a poll is forced by  
                 * PGARCH_AUTOWAKE_INTERVAL having passed since last_copy_time, or  
                 * until postmaster dies.  
                 */  
                if (!time_to_stop)              /* Don't wait during last iteration */  
                {  
                        pg_time_t       curtime = (pg_time_t) time(NULL);  
                        int                     timeout;  
  
                        timeout = PGARCH_AUTOWAKE_INTERVAL - (curtime - last_copy_time);  
                        if (timeout > 0)  
                        {  
                                int                     rc;  
  
                                rc = WaitLatch(MyLatch,  
                                                           WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,  
                                                           timeout * 1000L,  
                                                           WAIT_EVENT_ARCHIVER_MAIN);  
                                if (rc & WL_TIMEOUT)  
                                        wakened = true;  
                                if (rc & WL_POSTMASTER_DEATH)  
                                        time_to_stop = true;  
                        }  
                        else  
                                wakened = true;  
                }  
  
                /*  
                 * The archiver quits either when the postmaster dies (not expected)  
                 * or after completing one more archiving cycle after receiving  
                 * SIGUSR2.  
                 */  
        } while (!time_to_stop);  /* 停库，触发最后一轮归档周期 */  
}

归档所有未归档日志，直到全部的.ready对应调度wal都归档完成，或者报错

/*  
 * pgarch_ArchiverCopyLoop  
 *  
 * Archives all outstanding xlogs then returns  
 */  
static void  
pgarch_ArchiverCopyLoop(void)  
{  
        char            xlog[MAX_XFN_CHARS + 1];  
  
        /*  
         * loop through all xlogs with archive_status of .ready and archive  
         * them...mostly we expect this to be a single file, though it is possible  
         * some backend will add files onto the list of those that need archiving  
         * while we are still copying earlier archives  
         */  
        while (pgarch_readyXlog(xlog))  
        {  
                int                     failures = 0;  
                int                     failures_orphan = 0;  
  
                for (;;)  
                {  
                        struct stat stat_buf;  
                        char            pathname[MAXPGPATH];  
  
                        /*  
                         * Do not initiate any more archive commands after receiving  
                         * SIGTERM, nor after the postmaster has died unexpectedly. The  
                         * first condition is to try to keep from having init SIGKILL the  
                         * command, and the second is to avoid conflicts with another  
                         * archiver spawned by a newer postmaster.  
                         */  
                        if (got_SIGTERM || !PostmasterIsAlive())  
                                return;  
  
                        /*  
                         * Check for config update.  This is so that we'll adopt a new  
                         * setting for archive_command as soon as possible, even if there  
                         * is a backlog of files to be archived.  
                         */  
                        if (got_SIGHUP)  
                        {  
                                got_SIGHUP = false;  
                                ProcessConfigFile(PGC_SIGHUP);  
                        }  
  
                        /* can't do anything if no command ... */  
                        if (!XLogArchiveCommandSet())  
                        {  
                                ereport(WARNING,  
                                                (errmsg("archive_mode enabled, yet archive_command is not set")));  
                                return;  
                        }  
  
                        /*  
                         * Since archive status files are not removed in a durable manner,  
                         * a system crash could leave behind .ready files for WAL segments  
                         * that have already been recycled or removed.  In this case,  
                         * simply remove the orphan status file and move on.  unlink() is  
                         * used here as even on subsequent crashes the same orphan files  
                         * would get removed, so there is no need to worry about  
                         * durability.  
                         */  
                        snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);  
                        if (stat(pathname, &stat_buf) != 0 && errno == ENOENT)  
                        {  
                                char            xlogready[MAXPGPATH];  
  
                                StatusFilePath(xlogready, xlog, ".ready");  
                                if (unlink(xlogready) == 0)  
                                {  
                                        ereport(WARNING,  
                                                        (errmsg("removed orphan archive status file \"%s\"",  
                                                                        xlogready)));  
  
                                        /* leave loop and move to the next status file */  
                                        break;  
                                }  
  
                                if (++failures_orphan >= NUM_ORPHAN_CLEANUP_RETRIES)  
                                {  
                                        ereport(WARNING,  
                                                        (errmsg("removal of orphan archive status file \"%s\" failed too many times, will try again later",  
                                                                        xlogready)));  
  
                                        /* give up cleanup of orphan status files */  
                                        return;  
                                }  
  
                                /* wait a bit before retrying */  
                                pg_usleep(1000000L);  
                                continue;  
                        }  
  
                        if (pgarch_archiveXlog(xlog))  
                        {  
                                /* successful */  
                                pgarch_archiveDone(xlog);  
  
                                /*  
                                 * Tell the collector about the WAL file that we successfully  
                                 * archived  
                                 */  
                                pgstat_send_archiver(xlog, false);  
  
                                break;                  /* out of inner retry loop */  
                        }  
                        else  
                        {  
                                /*  
                                 * Tell the collector about the WAL file that we failed to  
                                 * archive  
                                 */  
                                pgstat_send_archiver(xlog, true);  
  
                                if (++failures >= NUM_ARCHIVE_RETRIES)  
                                {  
                                        ereport(WARNING,  
                                                        (errmsg("archiving write-ahead log file \"%s\" failed too many times, will try again later",  
                                                                        xlog)));  
                                        return;         /* give up archiving for now */  
                                }  
                                pg_usleep(1000000L);    /* wait a bit before retrying */  
                        }  
                }  
        }  
}