Example: Implementing std::vector in C++

This is my learning notes from the course - Mastering C++ Standard Library Features.

Example: Implementing `std::vector` in C++

An example implementation of std::vector, covering:
- Rule of three
- Rule of five
- Rule of zero
- Move-awareness

In the following snippet, we’ll look at a barebones std::vector implementation, designed as if move semantics did not exist. This will provide a real-world example of the “rule of three“ in use.

Please be aware that the code below:

Does NOT deal with exception safety
Does NOT deal with deferred construction of objects
Does NOT provide the entire std::vector interface

This is just an example focused on resource management and should not be seen as a valid std::vector implementation.

Rule of Three

Let’s follow the rule of three for our example.

#include <algorithm>
#include <cassert>
#include <cstddef>

template <typename T>
class vector
{
public:
    vector() = default;
    ~vector() { delete[] _data; }

    vector(const vector& rhs) : _size{rhs._size}, _capacity{rhs._capacity}
    {
        _data = new T[_capacity];
        std::copy(rhs._data, rhs._data + _size, _data);
    }

    vector& operator=(const vector& rhs)
    {
        _size = rhs._size;
        _capacity = rhs._capacity;

        _data = new T[_capacity];
        std::copy(rhs._data, rhs._data + _size, _data);

        return *this;
    }

    void push_back(const T& x)
    {
        if(_capacity == _size)
        {
            const auto new_capacity = _capacity + 10;
            T* tmp = new T[new_capacity];
            std::copy(_data, _data + _capacity, tmp);
            std::swap(tmp, _data);
            delete[] tmp;

            _capacity = new_capacity;
        }

        _data[_size] = x;
        ++_size;
    }

    const auto& at(std::size_t i) const
    {
        assert(i < _size);
        return _data[i];
    }

    auto size() const { return _size; }
    auto capacity() const { return _capacity; }

private:
    T* _data{nullptr};
    std::size_t _size{0}, _capacity{0};
};

Rule of Five

In the next code snippet, we’ll add move operations to our barebones std::vector implementation. We will follow the “rule of five“.

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <utility>

template <typename T>
class vector
{
public:
    vector() = default;
    ~vector() { delete[] _data; }

    vector(vector&& rhs) : _data{std::exchange(rhs._data, nullptr)},
                           _size{rhs._size},
                           _capacity{rhs._capacity}
    {
    }

    vector& operator=(vector&& rhs)
    {
        _data = std::exchange(rhs._data, nullptr);
        _size = rhs._size;
        _capacity = rhs._capacity;

        return *this;
    }

    vector(const vector& rhs) : _size{rhs._size}, _capacity{rhs._capacity}
    {
        _data = new T[_capacity];
        std::copy(rhs._data, rhs._data + _size, _data);
    }

    vector& operator=(const vector& rhs)
    {
        _size = rhs._size;
        _capacity = rhs._capacity;

        _data = new T[_capacity];
        std::copy(rhs._data, rhs._data + _size, _data);

        return *this;
    }

    void push_back(const T& x)
    {
        if(_capacity == _size)
        {
            const auto new_capacity = _capacity + 10;
            T* tmp = new T[new_capacity];
            std::copy(_data, _data + _capacity, tmp);
            std::swap(tmp, _data);
            delete[] tmp;

            _capacity = new_capacity;
        }

        _data[_size] = x;
        ++_size;
    }

    const auto& at(std::size_t i) const
    {
        assert(i < _size);
        return _data[i];
    }

    auto size() const { return _size; }
    auto capacity() const { return _capacity; }

private:
    T* _data{nullptr};
    std::size_t _size{0}, _capacity{0};
};

int main()
{
    vector<int> v0;
    assert(v0.size() == 0);
    assert(v0.capacity() == 0);

    v0.push_back(5);
    assert(v0.size() == 1);
    assert(v0.capacity() == 10);
    assert(v0.at(0) == 5);

    auto v1 = v0;
    assert(v1.size() == 1);
    assert(v1.capacity() == 10);
    assert(v1.at(0) == 5);

    auto v2 = std::move(v0);
    assert(v2.size() == 1);
    assert(v2.capacity() == 10);
    assert(v2.at(0) == 5);
}

Rule of Zero

Let’s think about the design of our vector clone. The vector class is both responsible for the management of a dynamically-allocated buffer and a _size that keeps track of how many elements are in the container.

Let’s try to apply the rule of zero and move as much resource management code as possible in a separate class. Luckily, std::unique_ptr is a great fit here.

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <memory>
#include <utility>

template <typename T>
auto copy_uptr_array(const std::unique_ptr<T[]>& src,
                     std::size_t capacity,
                     std::size_t size)
{
    auto result = std::make_unique<T[]>(capacity);
    std::copy(src.get(), src.get() + size, result.get());
    return result;
}

template <typename T>
class vector
{
public:
    vector() = default;

    // using `= default` here forces the compiler to automatically generate
    // the move operations for us, even though implicitly generation was suppressed
    // due to the presence of copy operations.

    vector(vector&& rhs) = default;
    vector& operator=(vector&& rhs) = default;

    vector(const vector& rhs) : _size{rhs._size}, _capacity{rhs._capacity}
    {
        _data = copy_uptr_array(rhs._data, _capacity, _size);
    }

    vector& operator=(const vector& rhs)
    {
        _size = rhs._size;
        _capacity = rhs._capacity;
        _data = copy_uptr_array(rhs._data, _capacity, _size);

        return *this;
    }

    void push_back(const T& x)
    {
        if(_capacity == _size)
        {
            const auto new_capacity = _capacity + 10;
            _data = copy_uptr_array(_data, new_capacity, _size);
            _capacity = new_capacity;
        }

        _data[_size] = x;
        ++_size;
    }

    const auto& at(std::size_t i) const
    {
        assert(i < _size);
        return _data[i];
    }

    auto size() const { return _size; }
    auto capacity() const { return _capacity; }

private:
    std::unique_ptr<T[]> _data;
    std::size_t _size{0}, _capacity{0};
};

Due to the fact that our copy operations have to take the _size member variable into account, the “rule of zero“ cannot be completely applied in this situation, at least not without significant refactoring.

Being able to = default the move operations however prevents possible mistakes and increases maintainability and readability of the code. Getting rid of the manual memory management prevents potential security-critical pitfalls that we’ve seen in the previous lecture.

Move-awareness

At a last major improvement, let’s make our push_back function move-aware: it will move T instances inside the container instead of moving whenever possible.

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <memory>
#include <utility>

template <typename T>
auto copy_uptr_array(const std::unique_ptr<T[]>& src,
                     std::size_t capacity,
                     std::size_t size)
{
    auto result = std::make_unique<T[]>(capacity);
    std::copy(src.get(), src.get() + size, result.get());
    return result;
}

template <typename T>
class vector
{
public:
    vector() = default;

    // using `= default` here forces the compiler to automatically generate
    // the move operations for us, even though implicitly generation was suppressed
    // due to the presence of copy operations.

    vector(vector&& rhs) = default;
    vector& operator=(vector&& rhs) = default;

    vector(const vector& rhs) : _size{rhs._size}, _capacity{rhs._capacity}
    {
        _data = copy_uptr_array(rhs._data, _capacity, _size);
    }

    vector& operator=(const vector& rhs)
    {
        _size = rhs._size;
        _capacity = rhs._capacity;
        _data = copy_uptr_array(rhs._data, _capacity, _size);

        return *this;
    }

    template <typename U>
    void push_back(U&& x)
    {
        if(_capacity == _size)
        {
            const auto new_capacity = _capacity + 10;
            _data = copy_uptr_array(_data, new_capacity, _size);
            _capacity = new_capacity;
        }

        _data[_size] = std::forward<U>(x);
        ++_size;
    }

    const auto& at(std::size_t i) const
    {
        assert(i < _size);
        return _data[i];
    }

    auto size() const { return _size; }
    auto capacity() const { return _capacity; }

private:
    std::unique_ptr<T[]> _data;
    std::size_t _size{0}, _capacity{0};
};

By using perfect-forwarding, we can avoid code repetition and provide a member function that works for both lvalues and rvalues, moving where possible in order to increase performance and provide support for move-only types.

Note that, a real vector implementation would have used “placement new“ to conditionally construct objects in the buffer instead of invoking the copy/move constructors.

Summary

Learned that the Standard Library provides full move semantics support for most of its containers.
Discussed that it also provides utilities that are defined in terms of move semantics, like std::exchange.
Understand that you should make your classes movable: prefer the rule of zero where possible, otherwise follow the rule of five.

Reference Link

Mastering C++ Standard Library Features