Memcache源码阅读（6）---数据存储

最新推荐文章于 2026-04-16 02:59:56 发布

原创最新推荐文章于 2026-04-16 02:59:56 发布 · 1k 阅读

0 ·

本内容遵循CC 4.0 BY-SA版权协议

标签

#memcached #源码

计算机同时被 3 个专栏收录

20 篇文章

订阅专栏

C++语言

19 篇文章

订阅专栏

网络编程

10 篇文章

订阅专栏

本文深入探讨了Memcached如何管理内存，包括数据的存储结构、LRU算法的应用、内存分配流程及item对象的管理等核心内容。

我看的源码版本是1.2.4

前面第4篇讲到了memcached怎么去管理内存的，memcached将内存分为不同大小的chunk，不同大小的数据就放到能装下那个数据的最小chunk中。现在我来讲讲数据是以怎样的形式存储在内存中的。

item的结构体

#define ITEM_key(item) ((char*)&((item)->end[0])) //获得这个item的key
#define ITEM_suffix(item) ((char*) &((item)->end[0]) + (item)->nkey + 1)
#define ITEM_data(item) ((char*) &((item)->end[0]) + (item)->nkey + 1 + (item)->nsuffix)
#define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 + (item)->nsuffix + (item)->nbytes)


typedef struct _stritem {
    struct _stritem *next;    //用于LRU队列
    struct _stritem *prev;  //用于LRU队列
    struct _stritem *h_next;    /* hash chain next */ //解决哈希表冲突拉链法的指针
    rel_time_t      time;       /* least recent access */
    rel_time_t      exptime;    /* expire time */
    int             nbytes;     /* size of data */
    unsigned short  refcount;
    uint8_t         nsuffix;    /* length of flags-and-length string */
    uint8_t         it_flags;   /* ITEM_* above */
    uint8_t         slabs_clsid;/* which slab class we're in */
    uint8_t         nkey;       /* key length, w/terminating null and padding */
    uint64_t        cas_id;     /* the CAS identifier */
    void * end[];
    /* then null-terminal key */
    /* then " flags length\r\n" (no terminating null) */
    /* then data with terminating \r\n (no terminating null; it's binary!) */
} item;

下图是item的存储结构，memcached为数据寻找适配的chunk是根据ntotal的大小来找的。
memcached_item_store

结构体最后那个void* end[]叫结构体末尾空数组，这是一个很常用的技术。它用来存储key，suffix（flags，binary data len），还有binary data。

它的优点是：不需要初始化就可以指向字符串，不需要占用内存空间。

item的初始化

memcached默认使用最近最小使用算法（LRU）来管理内存。使用这种方法管理内存就必须维护一个最近使用的队列，memcached也维护着这样的一个队列，他为每个chunk大小都维护了一个对应大小的队列。

static item *heads[LARGEST_ID];
static item *tails[LARGEST_ID];
static unsigned int sizes[LARGEST_ID];
void item_init(void) {
    int i;
    for(i = 0; i < LARGEST_ID; i++) {
        heads[i] = NULL;
        tails[i] = NULL;
        sizes[i] = 0;
    }
}

LRU队列管理

队列头的是最近访问过的item,对item有操作就会将item放到LRU的队头。

//将一个item放到队头
static void item_link_q(item *it) { /* item is the new head */
    item **head, **tail;
    /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */
    assert((it->it_flags & ITEM_SLABBED) == 0);

    head = &heads[it->slabs_clsid];
    tail = &tails[it->slabs_clsid];
    assert(it != *head);
    assert((*head && *tail) || (*head == 0 && *tail == 0));
    it->prev = 0;
    it->next = *head;
    if (it->next) it->next->prev = it;
    *head = it;
    if (*tail == 0) *tail = it;
    sizes[it->slabs_clsid]++;
    return;
}

//将一个item移出队列
static void item_unlink_q(item *it) {
    item **head, **tail;
    /* always true, warns: assert(it->slabs_clsid <= LARGEST_ID); */
    head = &heads[it->slabs_clsid];
    tail = &tails[it->slabs_clsid];

    if (*head == it) {
        assert(it->prev == 0);
        *head = it->next;
    }
    if (*tail == it) {
        assert(it->next == 0);
        *tail = it->prev;
    }
    assert(it->next != it);
    assert(it->prev != it);

    if (it->next) it->next->prev = it->prev;
    if (it->prev) it->prev->next = it->next;
    sizes[it->slabs_clsid]--;
    return;
}

为item分配chunk空间

为item分配chunk的流程大致为：

有新连接到memcached，为连接创建一个item，这个时候就会调用do_item_alloc来获得一个chunk。
从网络中读到的数据都是写到上面创建的item里
将item移交到hash表管理，使用的是引用计数技术，交给hash表时(调用store_item，下一节讲)，引用计数加1，然后连接自己这里释放管理权，引用计数减一（调用do_item_remove）。

item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes) {
    uint8_t nsuffix;
    item *it;
    char suffix[40];
    //构造item的结构
    size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);

    unsigned int id = slabs_clsid(ntotal);

    it = slabs_alloc(ntotal);
    //如果想slabs请求内存返回失败，memcached就会从LRU队列中找出最少使用的item，
    //并且该item没有被引用中拿出一个chunk来存放数据
    //不会将refcount > 0的置换出来吗？？ 怎么才回使它减？
    //是的，refcount>0就证明有人在使用它。
    //如果它超时，并且每人访问它，那么它就不会被清除？？好像是这样的。。
    if (it == 0) {
        int tries = 50;
        item *search;

        if (tails[id] == 0) return NULL;
        //尝试找50次
        for (search = tails[id]; tries > 0 && search != NULL; tries--, search=search->prev) {
                //如果一个数据项一直没有被访问到，那么它也不会减少它的refcount，就一直没有被删掉，这是个bug？
            if (search->refcount == 0) {
               if (search->exptime == 0 || search->exptime > current_time) {
                       STATS_LOCK();
                       stats.evictions++;
                       STATS_UNLOCK();
                }
                do_item_unlink(search);
                break;
            }
        }
        //再次尝试向slab申请内存
        it = slabs_alloc(ntotal);
        if (it == 0) return NULL;
    }
    it->slabs_clsid = id;
    it->next = it->prev = it->h_next = 0;
    it->refcount = 1;     /* the caller will have a reference */
    it->it_flags = 0;
    it->nkey = nkey;
    it->nbytes = nbytes;
    strcpy(ITEM_key(it), key);
    it->exptime = exptime;
    memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
    it->nsuffix = nsuffix;
    return it;
}

store_item

store_item分为ADD，SET，REPLACE，APPEND，PREPEND这些，

memcached先找hash map中有没有数据，还根据不同的命令，memecached进行不同的操作，下面就是具体的代码。

//如果在哈希表中找到这个key的对象，命令是ADD就更新它（将它放到LRU队头） 
//如果找到这个key，命令是NREAD_APPEND或NREAD_PREPEND，那么找到一个新的chunk块装item，然后将数据复制过去。然后插入到hash表
//如果找到这个key,命令是SET,直接插入到hash表
//如果找不到这个key(包含超时),comm是修改，那么不做任何操作
//我觉得这段代码写得很乱，很难理解
int do_store_item(item *it, int comm) {
    char *key = ITEM_key(it);
    bool delete_locked = false;
    item *old_it = do_item_get_notedeleted(key, it->nkey, &delete_locked);
    int stored = 0;

    item *new_it = NULL;
    int flags;

    if (old_it != NULL && comm == NREAD_ADD) {
        /* add only adds a nonexistent item, but promote to head of LRU */
        do_item_update(old_it);
    } else if (!old_it && (comm == NREAD_REPLACE
        || comm == NREAD_APPEND || comm == NREAD_PREPEND))
    {
        /* replace only replaces an existing value; don't store */
    } else if (delete_locked && (comm == NREAD_REPLACE || comm == NREAD_ADD
        || comm == NREAD_APPEND || comm == NREAD_PREPEND))
    {
        /* replace and add can't override delete locks; don't store */
    } else if (comm == NREAD_CAS) {
        /* validate cas operation */
        if (delete_locked)
            old_it = do_item_get_nocheck(key, it->nkey);

        if(old_it == NULL) {
          // LRU expired
          stored = 3;
        }
        else if(it->cas_id == old_it->cas_id) {
          // cas validates
          do_item_replace(old_it, it);
          stored = 1;
        }
        else
        {
          stored = 2;
        }
    } else {
        /*
         * Append - combine new and old record into single one. Here it's
         * atomic and thread-safe.
         */

        if (comm == NREAD_APPEND || comm == NREAD_PREPEND) {

            /* we have it and old_it here - alloc memory to hold both */
            /* flags was already lost - so recover them from ITEM_suffix(it) */

            flags = (int) strtol(ITEM_suffix(old_it), (char **) NULL, 10);

            new_it = do_item_alloc(key, it->nkey, flags, old_it->exptime, it->nbytes + old_it->nbytes - 2 /* CRLF */);

            if (new_it == NULL) {
                /* SERVER_ERROR out of memory */
                return 0;
            }

            /* copy data from it and old_it to new_it */

            if (comm == NREAD_APPEND) {
                memcpy(ITEM_data(new_it), ITEM_data(old_it), old_it->nbytes);
                memcpy(ITEM_data(new_it) + old_it->nbytes - 2 /* CRLF */, ITEM_data(it), it->nbytes);
            } else {
                /* NREAD_PREPEND */
                memcpy(ITEM_data(new_it), ITEM_data(it), it->nbytes);
                memcpy(ITEM_data(new_it) + it->nbytes - 2 /* CRLF */, ITEM_data(old_it), old_it->nbytes);
            }

            it = new_it;
        }

        if (delete_locked)
            old_it = do_item_get_nocheck(key, it->nkey);

        if (old_it != NULL)
            do_item_replace(old_it, it);
        else
            do_item_link(it);

        stored = 1;
    }

    if (old_it != NULL)
        do_item_remove(old_it);         /* release our reference */
    if (new_it != NULL)
        do_item_remove(new_it);

    return stored;
}

//这个函数被store_item调用，如果找到，并且没有超过有效期则返回这个对象，
//返回这个对象只是对引用计数+1，并返回地址。
//如果超过有效期，就将其删除
item *do_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked) {
    item *it = assoc_find(key, nkey);

    if (it != NULL && settings.oldest_live != 0 && settings.oldest_live <= current_time &&
        it->time <= settings.oldest_live) {
        do_item_unlink(it);           /* MTSAFE - cache_lock held */
        it = NULL;
    }
    if (it != NULL && it->exptime != 0 && it->exptime <= current_time) {
        do_item_unlink(it);           /* MTSAFE - cache_lock held */
        it = NULL;
    }

    if (it != NULL) {
        it->refcount++;
        DEBUG_REFCNT(it, '+');
    }
    return it;
}