0%

Example: Implementing std::vector in C++

This is my learning notes from the course - Mastering C++ Standard Library Features.

Example: Implementing std::vector in C++

In the following snippet, we’ll look at a barebones std::vector implementation, designed as if move semantics did not exist. This will provide a real-world example of the “rule of three“ in use.

Please be aware that the code below:

  • Does NOT deal with exception safety
  • Does NOT deal with deferred construction of objects
  • Does NOT provide the entire std::vector interface

This is just an example focused on resource management and should not be seen as a valid std::vector implementation.

Rule of Three

Let’s follow the rule of three for our example.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <algorithm>
#include <cassert>
#include <cstddef>

template <typename T>
class vector
{
public:
vector() = default;
~vector() { delete[] _data; }

vector(const vector& rhs) : _size{rhs._size}, _capacity{rhs._capacity}
{
_data = new T[_capacity];
std::copy(rhs._data, rhs._data + _size, _data);
}

vector& operator=(const vector& rhs)
{
_size = rhs._size;
_capacity = rhs._capacity;

_data = new T[_capacity];
std::copy(rhs._data, rhs._data + _size, _data);

return *this;
}

void push_back(const T& x)
{
if(_capacity == _size)
{
const auto new_capacity = _capacity + 10;
T* tmp = new T[new_capacity];
std::copy(_data, _data + _capacity, tmp);
std::swap(tmp, _data);
delete[] tmp;

_capacity = new_capacity;
}

_data[_size] = x;
++_size;
}

const auto& at(std::size_t i) const
{
assert(i < _size);
return _data[i];
}

auto size() const { return _size; }
auto capacity() const { return _capacity; }

private:
T* _data{nullptr};
std::size_t _size{0}, _capacity{0};
};

Rule of Five

In the next code snippet, we’ll add move operations to our barebones std::vector implementation. We will follow the “rule of five“.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <utility>

template <typename T>
class vector
{
public:
vector() = default;
~vector() { delete[] _data; }

vector(vector&& rhs) : _data{std::exchange(rhs._data, nullptr)},
_size{rhs._size},
_capacity{rhs._capacity}
{
}

vector& operator=(vector&& rhs)
{
_data = std::exchange(rhs._data, nullptr);
_size = rhs._size;
_capacity = rhs._capacity;

return *this;
}

vector(const vector& rhs) : _size{rhs._size}, _capacity{rhs._capacity}
{
_data = new T[_capacity];
std::copy(rhs._data, rhs._data + _size, _data);
}

vector& operator=(const vector& rhs)
{
_size = rhs._size;
_capacity = rhs._capacity;

_data = new T[_capacity];
std::copy(rhs._data, rhs._data + _size, _data);

return *this;
}

void push_back(const T& x)
{
if(_capacity == _size)
{
const auto new_capacity = _capacity + 10;
T* tmp = new T[new_capacity];
std::copy(_data, _data + _capacity, tmp);
std::swap(tmp, _data);
delete[] tmp;

_capacity = new_capacity;
}

_data[_size] = x;
++_size;
}

const auto& at(std::size_t i) const
{
assert(i < _size);
return _data[i];
}

auto size() const { return _size; }
auto capacity() const { return _capacity; }

private:
T* _data{nullptr};
std::size_t _size{0}, _capacity{0};
};

int main()
{
vector<int> v0;
assert(v0.size() == 0);
assert(v0.capacity() == 0);

v0.push_back(5);
assert(v0.size() == 1);
assert(v0.capacity() == 10);
assert(v0.at(0) == 5);

auto v1 = v0;
assert(v1.size() == 1);
assert(v1.capacity() == 10);
assert(v1.at(0) == 5);

auto v2 = std::move(v0);
assert(v2.size() == 1);
assert(v2.capacity() == 10);
assert(v2.at(0) == 5);
}

Rule of Zero

Let’s think about the design of our vector clone. The vector class is both responsible for the management of a dynamically-allocated buffer and a _size that keeps track of how many elements are in the container.

Let’s try to apply the rule of zero and move as much resource management code as possible in a separate class. Luckily, std::unique_ptr is a great fit here.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <memory>
#include <utility>

template <typename T>
auto copy_uptr_array(const std::unique_ptr<T[]>& src,
std::size_t capacity,
std::size_t size)
{
auto result = std::make_unique<T[]>(capacity);
std::copy(src.get(), src.get() + size, result.get());
return result;
}

template <typename T>
class vector
{
public:
vector() = default;

// using `= default` here forces the compiler to automatically generate
// the move operations for us, even though implicitly generation was suppressed
// due to the presence of copy operations.

vector(vector&& rhs) = default;
vector& operator=(vector&& rhs) = default;

vector(const vector& rhs) : _size{rhs._size}, _capacity{rhs._capacity}
{
_data = copy_uptr_array(rhs._data, _capacity, _size);
}

vector& operator=(const vector& rhs)
{
_size = rhs._size;
_capacity = rhs._capacity;
_data = copy_uptr_array(rhs._data, _capacity, _size);

return *this;
}

void push_back(const T& x)
{
if(_capacity == _size)
{
const auto new_capacity = _capacity + 10;
_data = copy_uptr_array(_data, new_capacity, _size);
_capacity = new_capacity;
}

_data[_size] = x;
++_size;
}

const auto& at(std::size_t i) const
{
assert(i < _size);
return _data[i];
}

auto size() const { return _size; }
auto capacity() const { return _capacity; }

private:
std::unique_ptr<T[]> _data;
std::size_t _size{0}, _capacity{0};
};

Due to the fact that our copy operations have to take the _size member variable into account, the “rule of zero“ cannot be completely applied in this situation, at least not without significant refactoring.

Being able to = default the move operations however prevents possible mistakes and increases maintainability and readability of the code. Getting rid of the manual memory management prevents potential security-critical pitfalls that we’ve seen in the previous lecture.

Move-awareness

At a last major improvement, let’s make our push_back function move-aware: it will move T instances inside the container instead of moving whenever possible.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <memory>
#include <utility>

template <typename T>
auto copy_uptr_array(const std::unique_ptr<T[]>& src,
std::size_t capacity,
std::size_t size)
{
auto result = std::make_unique<T[]>(capacity);
std::copy(src.get(), src.get() + size, result.get());
return result;
}

template <typename T>
class vector
{
public:
vector() = default;

// using `= default` here forces the compiler to automatically generate
// the move operations for us, even though implicitly generation was suppressed
// due to the presence of copy operations.

vector(vector&& rhs) = default;
vector& operator=(vector&& rhs) = default;

vector(const vector& rhs) : _size{rhs._size}, _capacity{rhs._capacity}
{
_data = copy_uptr_array(rhs._data, _capacity, _size);
}

vector& operator=(const vector& rhs)
{
_size = rhs._size;
_capacity = rhs._capacity;
_data = copy_uptr_array(rhs._data, _capacity, _size);

return *this;
}

template <typename U>
void push_back(U&& x)
{
if(_capacity == _size)
{
const auto new_capacity = _capacity + 10;
_data = copy_uptr_array(_data, new_capacity, _size);
_capacity = new_capacity;
}

_data[_size] = std::forward<U>(x);
++_size;
}

const auto& at(std::size_t i) const
{
assert(i < _size);
return _data[i];
}

auto size() const { return _size; }
auto capacity() const { return _capacity; }

private:
std::unique_ptr<T[]> _data;
std::size_t _size{0}, _capacity{0};
};

By using perfect-forwarding, we can avoid code repetition and provide a member function that works for both lvalues and rvalues, moving where possible in order to increase performance and provide support for move-only types.

Note that, a real vector implementation would have used “placement new“ to conditionally construct objects in the buffer instead of invoking the copy/move constructors.

Summary

  • Learned that the Standard Library provides full move semantics support for most of its containers.
  • Discussed that it also provides utilities that are defined in terms of move semantics, like std::exchange.
  • Understand that you should make your classes movable: prefer the rule of zero where possible, otherwise follow the rule of five.