/* @brief Find the value associated with the given key in the bucket. */ auto Find(const K &key, V &value) -> bool;
/* @brief Given the key, remove the corresponding key-value pair in the bucket. */ auto Remove(const K &key) -> bool;
/* * @brief Insert the given key-value pair into the bucket. * 1. If a key already exists, the value should be updated. * 2. If the bucket is full, do nothing and return false. */ auto Insert(const K &key, const V &value) -> bool;
template <typename K, typename V> auto ExtendibleHashTable<K, V>::Bucket::Find(const K &key, V &value) -> bool { for (auto it = list_.begin(); it != list_.end(); ++it) { if (it->first == key) { value = it->second; return true; } } return false; }
template <typename K, typename V> auto ExtendibleHashTable<K, V>::Bucket::Remove(const K &key) -> bool { for (auto it = list_.begin(); it != list_.end(); ++it) { if (it->first == key) { list_.erase(it); return true; } } return false; }
template <typename K, typename V> auto ExtendibleHashTable<K, V>::Bucket::Insert(const K &key, const V &value) -> bool { for (auto it = list_.begin(); it != list_.end(); ++it) { if (it->first == key) { it->second = value; return true; } } if (!IsFull()) { list_.emplace_back(key, value); return true; } return false; }
ExtendibleHashTable类
1 2 3 4 5
int global_depth_; // The global depth of the directory size_t bucket_size_; // The size of a bucket int num_buckets_; // The number of buckets in the hash table mutable std::mutex latch_; std::vector<std::shared_ptr<Bucket>> dir_; // The directory of the hash table
template <typename K, typename V> void ExtendibleHashTable<K, V>::Insert(const K &key, const V &value) { std::scoped_lock<std::mutex> lock(latch_); size_t index = IndexOf(key);
// If a key already exists, the value should be updated. V val; // no use,just for calling Bucket::Find() if (dir_[index]->Find(key, val)) { dir_[index]->Insert(key, value); return; }
// If the bucket is full while (dir_[index]->IsFull()) { int local_depth = dir_[index]->GetDepth(); // local_depth == global_depth_ , double dir size if (local_depth == global_depth_) { size_t dir_size = dir_.size(); dir_.reserve(2 * dir_size); std::copy_n(dir_.begin(), dir_size, std::back_inserter(dir_)); // Copy the shared_ptr directly from the first half to the second half ++global_depth_; // that's why we need to updata index = IndexOf(key) in the end of the while loop }
// make two new buckets auto b0 = NewBucket(local_depth + 1); auto b1 = NewBucket(local_depth + 1); ++num_buckets_; // only add 1 , because the old Bucket will be replaced by the new Bucket int local_mask = 1 << local_depth; /*Since the elements belong to the same bucket, the original index (k) must be the same. Therefore, we only need to compare the 'local_depth' at that position */
// redistribute items from old bucket for (auto &[k, v] : dir_[index]->GetItems()) { size_t item_hash = std::hash<K>()(k); // ExtendibleHashTable::indexOf() use std::hash<K>() if (static_cast<bool>(item_hash & local_mask)) { b1->Insert(k, v); } else { b0->Insert(k, v); } } // adjust dir_ points to the old bucket for (size_t i = (std::hash<K>()(key) & (local_mask - 1)); i < dir_.size(); i += local_mask) { if (static_cast<bool>(i & local_mask)) { dir_[i] = b1; } else { dir_[i] = b0; } } // updata index of the incoming item after split index = IndexOf(key); } dir_[index]->Insert(key, value); }
测试
之前贴的博客,博主写了几个测试样例,可以加进去测一下
1 2
make extendible_hash_table_test -j2 ./test/extendible_hash_table_test
为了解决这个缺陷,LRU-K算法应运而生。LRU-K算法在LRU算法的基础上,增加了缓存项在缓存中存在的时间,即K值,在该实验中我们用缓存页的访问次数代替存在时间,即hit_count。我们使用两个队列来实现LRU-K算法,一个队列history_list_储存hit_count < k 的页面,另一个队列cache_list_储存hit_count >= k 的页面。在遇到需要淘汰页面时,首先利用FIFO策略淘汰history_list_队首的页面,如果history_list_队列没有可淘汰的页面,则利用LRU策略淘汰cache_list_中的页面。为了达到O(1)的时间效率,我们需要用unordered_map来存储页和他在队列中的迭代器的映射关系。同时,在淘汰页面的过程中我们要注意evictable_,是否可淘汰的问题。
综上,我们构造LRUKReplacer的类成员如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// add this struct to record the status of the frame struct FrameEntry { size_t hit_count_{0}; bool evictable_{false}; std::list<frame_id_t>::iterator pos_; }; std::list<frame_id_t> history_list_; // hit_count_ < k , replace use FIFO std::list<frame_id_t> cache_list_; // hit_count_ >= k , only when historyList is empty replace use LRU std::unordered_map<frame_id_t, FrameEntry> entries_;
size_t curr_size_{0}; // cache size size_t replacer_size_; // frame_id need to be smaller than replacer_size_ size_t k_; std::mutex latch_;
// @brief Find the frame with largest backward k-distance and evict that frame. Only frames // that are marked as 'evictable_' are candidates for eviction. auto Evict(frame_id_t *frame_id) -> bool;
// @brief Record the event that the given frame id is accessed at current timestamp. // Create a new entry for access history if frame id has not been seen before. void RecordAccess(frame_id_t frame_id);
// @brief Toggle whether a frame is evictable_ or non-evictable_. This function also // controls replacer's size. Note that size is equal to number of evictable_ entries. void SetEvictable(frame_id_t frame_id, bool set_evictable_);
// @brief Remove an evictable_ frame from replacer, along with its access history. // This function should also decrement replacer's size if removal is successful. void Remove(frame_id_t frame_id);
auto LRUKReplacer::Evict(frame_id_t *frame_id) -> bool { std::scoped_lock<std::mutex> lock(latch_); if (curr_size_ == 0) { return false; } if (!history_list_.empty()) { for (auto rit = history_list_.rbegin(); rit != history_list_.rend(); ++rit) { // fisrt in first out if (entries_[*rit].evictable_) { *frame_id = *rit; history_list_.erase(std::next(rit).base()); // erase的入参只接受正向迭代器 entries_.erase(*frame_id); --curr_size_; return true; } } } if (!cache_list_.empty()) { for (auto rit = cache_list_.rbegin(); rit != cache_list_.rend(); ++rit) { // in RecordAccess() ,we have insure the first frame is the latest one if (entries_[*rit].evictable_) { *frame_id = *rit; cache_list_.erase(std::next(rit).base()); entries_.erase(*frame_id); --curr_size_; return true; } } } return false; }
void LRUKReplacer::RecordAccess(frame_id_t frame_id) { std::scoped_lock<std::mutex> lock(latch_); if (frame_id > static_cast<frame_id_t>(replacer_size_)) { throw std::invalid_argument(std::string("Invalid frame_id: " + std::to_string(frame_id))); } // for new frames , default hit count is 0
size_t count = ++entries_[frame_id].hit_count_; // 新页在此添加进entries if (count == 1) { // new frame // LOG_INFO("%d NEW FRAME",frame_id); // ++curr_size_; // bug1,不应该++,因为默认的evictable_是false history_list_.emplace_front(frame_id); entries_[frame_id].pos_ = history_list_.begin(); } else { if (count == k_) { // reach k-hit , move from history_list_ to the front of cache_list_ // LOG_INFO("No.%d FRAME MOVE FROM HIST TO CACHE" , frame_id); history_list_.erase(entries_[frame_id].pos_); cache_list_.emplace_front(frame_id); entries_[frame_id].pos_ = cache_list_.begin(); } else if (count > k_) { // move to the front of cache_list_ , insure the LRU cache_list_.erase(entries_[frame_id].pos_); cache_list_.emplace_front(frame_id); entries_[frame_id].pos_ = cache_list_.begin(); } } // count < k , keep in history_list_ , do not move because the FIFO // LOG_INFO("RECORD: %d" , frame_id); }
/** * @brief Creates a new BufferPoolManagerInstance. * @param pool_size the size of the buffer pool * @param disk_manager the disk manager * @param replacer_k the lookback constant k for the LRU-K replacer * @param log_manager the log manager (for testing only: nullptr = disable logging). Please ignore this for P1. */ BufferPoolManagerInstance(size_t pool_size, DiskManager *disk_manager, size_t replacer_k = LRUK_REPLACER_K, LogManager *log_manager = nullptr);
/** Number of pages in the buffer pool. */ const size_t pool_size_; /** The next page id to be allocated */ std::atomic<page_id_t> next_page_id_ = 0; /** Bucket size for the extendible hash table */ const size_t bucket_size_ = 4;
/** Array of buffer pool pages. */ Page *pages_; // frame_id在这个数组中起到index的作用,可以就理解为内存中的页框(Page Frame,或页帧) /** Pointer to the disk manager. */ DiskManager *disk_manager_ __attribute__((__unused__)); /** Pointer to the log manager. Please ignore this for P1. */ LogManager *log_manager_ __attribute__((__unused__)); /** Page table for keeping track of buffer pool pages. */ ExtendibleHashTable<page_id_t, frame_id_t> *page_table_; /** Replacer to find unpinned pages for replacement. */ LRUKReplacer *replacer_; /** List of free frames that don't have any pages on them. */ std::list<frame_id_t> free_list_; /** This latch protects shared data structures. We recommend updating this comment to describe what it protects. */ std::mutex latch_;
// @brief Create a new page in the buffer pool. Set page_id to the new page's id, or nullptr if all frames // are currently in use and not evictable (in another word, pinned). auto NewPgImp(page_id_t *page_id) -> Page * override;
// @brief Fetch the requested page from the buffer pool. Return nullptr if page_id needs to be fetched from the disk // but all frames are currently in use and not evictable (in another word, pinned). auto FetchPgImp(page_id_t page_id) -> Page * override;
// @brief Unpin the target page from the buffer pool. If page_id is not in the buffer pool or its pin count is already 0, return false. auto UnpinPgImp(page_id_t page_id, bool is_dirty) -> bool override;
// @brief Flush the target page to disk. // Use the DiskManager::WritePage() method to flush a page to disk, REGARDLESS of the dirty flag. // Unset the dirty flag of the page after flushing. auto FlushPgImp(page_id_t page_id) -> bool override;
// @brief Flush all the pages in the buffer pool to disk void FlushAllPgsImp() override;
// @brief Delete a page from the buffer pool. If page_id is not in the buffer pool, do nothing and return true. If the // page is pinned and cannot be deleted, return false immediately. auto DeletePgImp(page_id_t page_id) -> bool override;
auto BufferPoolManagerInstance::UnpinPgImp(page_id_t page_id, bool is_dirty) -> bool { std::scoped_lock<std::mutex> lock(latch_); frame_id_t frame_id; // If page_id is not in the buffer pool or its pin count is already 0, return false if (!page_table_->Find(page_id, frame_id)) { return false; } if (pages_[frame_id].pin_count_ <= 0) { return false; } if (--pages_[frame_id].pin_count_ == 0) { replacer_->SetEvictable(frame_id, true); } if (is_dirty) { pages_[frame_id].is_dirty_ = is_dirty; } return true; }