前缀和后缀++的性能差异

benchmark测试迭代器前缀和后缀++的性能

结论

经测试:

  • Release版本下并无差异
  • vector结果稳定
  • list测试波动大,由于内存不连续,影响因素多,但前缀和后缀写法区别不打

测试1(vector)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <benchmark/benchmark.h>
#include <vector>
#include <string>

struct MyClass {
std::string data;

MyClass() : data(100, 'x') {}
MyClass(const MyClass& other) : data(other.data) {}
MyClass& operator=(const MyClass& other) {
data = other.data;
return *this;
}
};

// 使用前缀 ++it
static void BM_IteratorPrefixIncrement(benchmark::State& state) {
std::vector<MyClass> vec(1000);
for (auto _ : state) {
for (auto it = vec.begin(); it != vec.end(); ++it) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_IteratorPrefixIncrement);

// 使用后缀 it++
static void BM_IteratorPostfixIncrement(benchmark::State& state) {
std::vector<MyClass> vec(1000);
for (auto _ : state) {
for (auto it = vec.begin(); it != vec.end(); it++) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_IteratorPostfixIncrement);

BENCHMARK_MAIN();
1
2
3
4
5
6
7
8
[main] 正在生成文件夹: d:/Vscode/code/test/build 
[build] 正在启动生成
[proc] 执行命令: D:\make_tools\cmake\bin\cmake.EXE --build d:/Vscode/code/test/build --config Release --target test_pow -j 18 --
[build] 适用于 .NET Framework MSBuild 版本 17.9.8+b34f75857
[build]
[build] test_pow.vcxproj -> D:\Vscode\code\test\build\Release\test_pow.exe
[driver] 生成完毕: 00:00:00.741
[build] 生成已完成,退出代码为 0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
PS D:\Vscode\code\test\build\Release> ."D:/Vscode/code/test/build/Release/test_pow.exe"
2025-07-03T23:54:07+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
----------------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------------
BM_IteratorPrefixIncrement 1450 ns 1444 ns 497778
BM_IteratorPostfixIncrement 1442 ns 1444 ns 497778
PS D:\Vscode\code\test\build\Release> ."D:/Vscode/code/test/build/Release/test_pow.exe"
2025-07-03T23:54:11+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
----------------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------------
BM_IteratorPrefixIncrement 1442 ns 1444 ns 497778
BM_IteratorPostfixIncrement 1447 ns 1444 ns 497778
PS D:\Vscode\code\test\build\Release>

测试2(HeavyContainer):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#include <benchmark/benchmark.h>
#include <vector>
#include <string>
#include <iostream>

// 模拟一个开销大的结构
struct Heavy {
std::string data;
Heavy() : data(100, 'x') {}
Heavy(const Heavy& other) : data(other.data) {}
Heavy& operator=(const Heavy& other) {
data = other.data;
return *this;
}
};

// 自定义容器的迭代器
class HeavyIterator {
public:
using iterator_category = std::forward_iterator_tag;
using value_type = Heavy;
using difference_type = std::ptrdiff_t;
using pointer = Heavy*;
using reference = Heavy&;

HeavyIterator(pointer ptr) : ptr_(ptr) {}
HeavyIterator(const HeavyIterator& other) : ptr_(other.ptr_) {}

reference operator*() const { return *ptr_; }

// 前缀 ++
HeavyIterator& operator++() {
++ptr_;
return *this;
}

// 后缀 ++,返回副本(模拟实际 STL)
HeavyIterator operator++(int) {
HeavyIterator tmp = *this;
++ptr_;
return tmp;
}

bool operator!=(const HeavyIterator& other) const {
return ptr_ != other.ptr_;
}

private:
pointer ptr_;
};

// 自定义容器(包装 vector)
class HeavyContainer {
public:
HeavyContainer(size_t n) : data_(n) {}

HeavyIterator begin() { return HeavyIterator(data_.data()); }
HeavyIterator end() { return HeavyIterator(data_.data() + data_.size()); }

private:
std::vector<Heavy> data_;
};


static void BM_CustomIteratorPrefix(benchmark::State& state) {
HeavyContainer container(1000);
for (auto _ : state) {
for (auto it = container.begin(); it != container.end(); ++it) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_CustomIteratorPrefix);

static void BM_CustomIteratorPostfix(benchmark::State& state) {
HeavyContainer container(1000);
for (auto _ : state) {
for (auto it = container.begin(); it != container.end(); it++) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_CustomIteratorPostfix);

BENCHMARK_MAIN();
1
2
3
4
5
6
7
8
[main] 正在生成文件夹: d:/Vscode/code/test/build 
[build] 正在启动生成
[proc] 执行命令: D:\make_tools\cmake\bin\cmake.EXE --build d:/Vscode/code/test/build --config Release --target test_pow -j 18 --
[build] 适用于 .NET Framework MSBuild 版本 17.9.8+b34f75857
[build]
[build] test_pow.vcxproj -> D:\Vscode\code\test\build\Release\test_pow.exe
[driver] 生成完毕: 00:00:00.704
[build] 生成已完成,退出代码为 0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
PS D:\Vscode\code\test\build\Release> ."D:/Vscode/code/test/build/Release/test_pow.exe"
2025-07-03T23:55:35+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
-------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------
BM_CustomIteratorPrefix 1903 ns 1883 ns 373333
BM_CustomIteratorPostfix 1885 ns 1842 ns 373333
PS D:\Vscode\code\test\build\Release> ."D:/Vscode/code/test/build/Release/test_pow.exe"
2025-07-03T23:56:03+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
-------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------
BM_CustomIteratorPrefix 1879 ns 1883 ns 373333
BM_CustomIteratorPostfix 1887 ns 1859 ns 344615
PS D:\Vscode\code\test\build\Release>

测试3(Vector、List、GenericVector):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#include <benchmark/benchmark.h>
#include <vector>
#include <list>
#include <string>

// ----------------------------
// 重型结构体(模拟拷贝代价)
struct Heavy {
std::string data;

Heavy() : data(100, 'x') {}
Heavy(const Heavy& other) : data(other.data) {}
Heavy& operator=(const Heavy& other) {
data = other.data;
return *this;
}
};

// ----------------------------
// 场景 1:vector迭代器对比
static void BM_VectorPrefix(benchmark::State& state) {
std::vector<Heavy> vec(1000);
for (auto _ : state) {
for (auto it = vec.begin(); it != vec.end(); ++it) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_VectorPrefix);

static void BM_VectorPostfix(benchmark::State& state) {
std::vector<Heavy> vec(1000);
for (auto _ : state) {
for (auto it = vec.begin(); it != vec.end(); it++) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_VectorPostfix);

// ----------------------------
// 场景 2:list迭代器对比(构造成本更真实)
static void BM_ListPrefix(benchmark::State& state) {
std::list<Heavy> lst(1000);
for (auto _ : state) {
for (auto it = lst.begin(); it != lst.end(); ++it) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_ListPrefix);

static void BM_ListPostfix(benchmark::State& state) {
std::list<Heavy> lst(1000);
for (auto _ : state) {
for (auto it = lst.begin(); it != lst.end(); it++) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_ListPostfix);

// ----------------------------
// 场景 3:真正使用后缀 ++ 返回值(阻止优化)
static void BM_UseReturnOfPostfix(benchmark::State& state) {
std::vector<Heavy> vec(1000);
for (auto _ : state) {
for (auto it = vec.begin(); it != vec.end(); ) {
auto prev = it++; // 强制保留临时副本
benchmark::DoNotOptimize(*prev);
}
}
}
BENCHMARK(BM_UseReturnOfPostfix);

// ----------------------------
// 场景 4:模板函数,模拟泛型代码调用
template<typename Iterator>
static void IteratePrefix(benchmark::State& state, Iterator begin, Iterator end) {
for (auto _ : state) {
for (auto it = begin; it != end; ++it)
benchmark::DoNotOptimize(*it);
}
}

template<typename Iterator>
static void IteratePostfix(benchmark::State& state, Iterator begin, Iterator end) {
for (auto _ : state) {
for (auto it = begin; it != end; it++)
benchmark::DoNotOptimize(*it);
}
}

static void BM_GenericVectorPrefix(benchmark::State& state) {
std::vector<Heavy> vec(1000);
IteratePrefix(state, vec.begin(), vec.end());
}
BENCHMARK(BM_GenericVectorPrefix);

static void BM_GenericVectorPostfix(benchmark::State& state) {
std::vector<Heavy> vec(1000);
IteratePostfix(state, vec.begin(), vec.end());
}
BENCHMARK(BM_GenericVectorPostfix);

// ----------------------------
BENCHMARK_MAIN();
1
2
3
4
5
6
7
8
[main] 正在生成文件夹: d:/Vscode/code/test/build 
[build] 正在启动生成
[proc] 执行命令: D:\make_tools\cmake\bin\cmake.EXE --build d:/Vscode/code/test/build --config Release --target test_pow -j 18 --
[build] 适用于 .NET Framework MSBuild 版本 17.9.8+b34f75857
[build]
[build] test_pow.vcxproj -> D:\Vscode\code\test\build\Release\test_pow.exe
[driver] 生成完毕: 00:00:00.708
[build] 生成已完成,退出代码为 0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
PS D:\Vscode\code\test\build\Release> ."D:/Vscode/code/test/build/Release/test_pow.exe"
2025-07-04T00:01:29+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------
BM_VectorPrefix 1690 ns 1688 ns 407273
BM_VectorPostfix 1678 ns 1674 ns 448000
BM_ListPrefix 2047 ns 2051 ns 320000
BM_ListPostfix 2001 ns 2002 ns 320000
BM_UseReturnOfPostfix 1442 ns 1444 ns 497778
BM_GenericVectorPrefix 1484 ns 1475 ns 497778
BM_GenericVectorPostfix 1496 ns 1507 ns 497778
PS D:\Vscode\code\test\build\Release> ."D:/Vscode/code/test/build/Release/test_pow.exe"
2025-07-04T00:01:36+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------
BM_VectorPrefix 1689 ns 1688 ns 407273
BM_VectorPostfix 1685 ns 1688 ns 407273
BM_ListPrefix 2001 ns 2002 ns 320000
BM_ListPostfix 2107 ns 2086 ns 344615
BM_UseReturnOfPostfix 1435 ns 1413 ns 497778
BM_GenericVectorPrefix 1483 ns 1507 ns 497778
BM_GenericVectorPostfix 1497 ns 1507 ns 497778
PS D:\Vscode\code\test\build\Release> ."D:/Vscode/code/test/build/Release/test_pow.exe"
2025-07-04T00:01:43+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------
BM_VectorPrefix 1698 ns 1709 ns 448000
BM_VectorPostfix 1684 ns 1688 ns 407273
BM_ListPrefix 2046 ns 2040 ns 344615
BM_ListPostfix 1989 ns 1995 ns 344615
BM_UseReturnOfPostfix 1443 ns 1430 ns 448000
BM_GenericVectorPrefix 1472 ns 1465 ns 448000
BM_GenericVectorPostfix 1475 ns 1465 ns 448000
PS D:\Vscode\code\test\build\Release>

测试4(List):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#include <benchmark/benchmark.h>
#include <list>
#include <string>

struct Heavy {
std::string data;
Heavy() : data(100, 'x') {}
Heavy(const Heavy& other) : data(other.data) {}
Heavy& operator=(const Heavy& other) {
data = other.data;
return *this;
}
};

static void BM_ListPrefix(benchmark::State& state) {
std::list<Heavy> lst(1000);
for (auto _ : state) {
for (auto it = lst.begin(); it != lst.end(); ++it) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_ListPrefix)->Repetitions(10)->ReportAggregatesOnly(true);

static void BM_ListPostfix(benchmark::State& state) {
std::list<Heavy> lst(1000);
for (auto _ : state) {
for (auto it = lst.begin(); it != lst.end(); it++) {
benchmark::DoNotOptimize(*it);
}
}
}
BENCHMARK(BM_ListPostfix)->Repetitions(10)->ReportAggregatesOnly(true);

BENCHMARK_MAIN();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
PS D:\Vscode\code\test\build\Release> ./test_pow.exe --benchmark_repetitions=10 --benchmark_report_aggregates_only=true
2025-07-04T00:07:06+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
---------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------
BM_ListPrefix/repeats:10_mean 2107 ns 2104 ns 10
BM_ListPrefix/repeats:10_median 2122 ns 2131 ns 10
BM_ListPrefix/repeats:10_stddev 71.9 ns 86.0 ns 10
BM_ListPostfix/repeats:10_mean 2065 ns 2058 ns 10
BM_ListPostfix/repeats:10_median 2067 ns 2063 ns 10
BM_ListPostfix/repeats:10_stddev 67.5 ns 74.7 ns 10
PS D:\Vscode\code\test\build\Release> ./test_pow.exe --benchmark_repetitions=10 --benchmark_report_aggregates_only=true
2025-07-04T00:07:23+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
---------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------
BM_ListPrefix/repeats:10_mean 2095 ns 2095 ns 10
BM_ListPrefix/repeats:10_median 2110 ns 2100 ns 10
BM_ListPrefix/repeats:10_stddev 112 ns 109 ns 10
BM_ListPostfix/repeats:10_mean 2138 ns 2131 ns 10
BM_ListPostfix/repeats:10_median 2120 ns 2131 ns 10
BM_ListPostfix/repeats:10_stddev 119 ns 115 ns 10
PS D:\Vscode\code\test\build\Release> ./test_pow.exe --benchmark_repetitions=10 --benchmark_report_aggregates_only=true
2025-07-04T00:07:40+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
---------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------
BM_ListPrefix/repeats:10_mean 2041 ns 2036 ns 10
BM_ListPrefix/repeats:10_median 2039 ns 2040 ns 10
BM_ListPrefix/repeats:10_stddev 60.7 ns 65.7 ns 10
BM_ListPostfix/repeats:10_mean 2103 ns 2104 ns 10
BM_ListPostfix/repeats:10_median 2082 ns 2086 ns 10
BM_ListPostfix/repeats:10_stddev 95.2 ns 91.2 ns 10
PS D:\Vscode\code\test\build\Release> ./test_pow.exe --benchmark_repetitions=10 --benchmark_report_aggregates_only=true
2025-07-04T00:08:05+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
---------------------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------------------
BM_ListPrefix/repeats:10_mean 2029 ns 2031 ns 10
BM_ListPrefix/repeats:10_median 2006 ns 2018 ns 10
BM_ListPrefix/repeats:10_stddev 100 ns 99.8 ns 10
BM_ListPostfix/repeats:10_mean 2071 ns 2072 ns 10
BM_ListPostfix/repeats:10_median 2060 ns 2063 ns 10
BM_ListPostfix/repeats:10_stddev 59.6 ns 52.6 ns 10
PS D:\Vscode\code\test\build\Release>

测试5(List):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#include <benchmark/benchmark.h>
#include <list>
#include <string>
#include <atomic>
#include <iostream>

struct Heavy {
std::string data;
static std::atomic<int> copy_count;

Heavy() : data(100, 'x') {}
Heavy(const Heavy& other) : data(other.data) {
++copy_count;
}
Heavy& operator=(const Heavy& other) {
data = other.data;
return *this;
}
};

std::atomic<int> Heavy::copy_count{0};

static void BM_ListPrefix(benchmark::State& state) {
for (auto _ : state) {
Heavy::copy_count = 0;
std::list<Heavy> lst(1000);
for (auto it = lst.begin(); it != lst.end(); ++it) {
benchmark::DoNotOptimize(*it);
}
benchmark::ClobberMemory();
// 把结果写进 state,方便输出
state.counters["CopyCount"] = Heavy::copy_count.load();
}
}
BENCHMARK(BM_ListPrefix)->Repetitions(5)->ReportAggregatesOnly(true);

static void BM_ListPostfix(benchmark::State& state) {
for (auto _ : state) {
Heavy::copy_count = 0;
std::list<Heavy> lst(1000);
for (auto it = lst.begin(); it != lst.end(); it++) {
benchmark::DoNotOptimize(*it);
}
benchmark::ClobberMemory();
state.counters["CopyCount"] = Heavy::copy_count.load();
}
}
BENCHMARK(BM_ListPostfix)->Repetitions(5)->ReportAggregatesOnly(true);

BENCHMARK_MAIN();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
PS D:\Vscode\code\test\build\Release> ."D:/Vscode/code/test/build/Release/test_pow.exe"
2025-07-04T00:09:58+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations UserCounters...
------------------------------------------------------------------------------------------
BM_ListPrefix/repeats:5_mean 74997 ns 74912 ns 5 CopyCount=0
BM_ListPrefix/repeats:5_median 74554 ns 74557 ns 5 CopyCount=0
BM_ListPrefix/repeats:5_stddev 1267 ns 1485 ns 5 CopyCount=0
BM_ListPostfix/repeats:5_mean 73618 ns 73382 ns 5 CopyCount=0
BM_ListPostfix/repeats:5_median 73442 ns 72545 ns 5 CopyCount=0
BM_ListPostfix/repeats:5_stddev 419 ns 1248 ns 5 CopyCount=0
PS D:\Vscode\code\test\build\Release> ."D:/Vscode/code/test/build/Release/test_pow.exe"
2025-07-04T00:10:17+08:00
Running D:\Vscode\code\test\build\Release\test_pow.exe
Run on (16 X 3194 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x8)
L1 Instruction 32 KiB (x8)
L2 Unified 512 KiB (x8)
L3 Unified 16384 KiB (x1)
------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations UserCounters...
------------------------------------------------------------------------------------------
BM_ListPrefix/repeats:5_mean 72748 ns 72782 ns 5 CopyCount=0
BM_ListPrefix/repeats:5_median 72344 ns 72782 ns 5 CopyCount=0
BM_ListPrefix/repeats:5_stddev 828 ns 1255 ns 5 CopyCount=0
BM_ListPostfix/repeats:5_mean 73727 ns 73661 ns 5 CopyCount=0
BM_ListPostfix/repeats:5_median 73452 ns 73940 ns 5 CopyCount=0
BM_ListPostfix/repeats:5_stddev 1375 ns 1167 ns 5 CopyCount=0
PS D:\Vscode\code\test\build\Release>

测试5的汇编分析:

Compiler Explorer

采用的编译语句:

1
-std=c++17 -mtune=core2 -march=nocona -O3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#include <list>
#include <string>
#include <atomic>
#include <iostream>

struct Heavy {
std::string data;
static std::atomic<int> copy_count;

Heavy() : data(100, 'x') {}
Heavy(const Heavy& other) : data(other.data) {
++copy_count;
}
Heavy& operator=(const Heavy& other) {
data = other.data;
return *this;
}
};

std::atomic<int> Heavy::copy_count{0};

void test_prefix() {
Heavy::copy_count = 0;
std::list<Heavy> lst(1000);
for (auto it = lst.begin(); it != lst.end(); ++it) {
volatile Heavy const& h = *it; // 防优化
(void)h;
}
std::cout << "Prefix increment copy count: " << Heavy::copy_count << "\n";
}

void test_postfix() {
Heavy::copy_count = 0;
std::list<Heavy> lst(1000);
for (auto it = lst.begin(); it != lst.end(); it++) {
volatile Heavy const& h = *it; // 防优化
(void)h;
}
std::cout << "Postfix increment copy count: " << Heavy::copy_count << "\n";
}

int main() {
test_prefix();
test_postfix();
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
std::__cxx11::_List_base<Heavy, std::allocator<Heavy>>::_M_clear():
push r12
push rbp
push rbx
mov rbp, QWORD PTR [rdi]
cmp rbp, rdi
je .L1
mov r12, rdi
.L4:
mov rbx, rbp
mov rbp, QWORD PTR [rbp+0]
mov rdi, QWORD PTR [rbx+16]
lea rax, [rbx+32]
cmp rdi, rax
je .L3
mov rax, QWORD PTR [rbx+32]
lea rsi, [rax+1]
call operator delete(void*, unsigned long)
.L3:
mov esi, 48
mov rdi, rbx
call operator delete(void*, unsigned long)
cmp rbp, r12
jne .L4
.L1:
pop rbx
pop rbp
pop r12
ret
.LC0:
.string "Prefix increment copy count: "
.LC1:
.string "\n"
test_prefix():
push r12
xor eax, eax
push rbp
push rbx
sub rsp, 32
xchg eax, DWORD PTR Heavy::copy_count[rip]
mov ebp, 1000
mov QWORD PTR [rsp+16], 0
movq xmm1, rsp
movddup xmm0, xmm1
movaps XMMWORD PTR [rsp], xmm0
.L9:
mov edi, 48
call operator new(unsigned long)
mov rbx, rax
mov edx, 120
mov esi, 100
lea rdi, [rax+16]
lea rax, [rax+32]
mov QWORD PTR [rbx+16], rax
call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct(unsigned long, char)
mov rsi, rsp
mov rdi, rbx
call std::__detail::_List_node_base::_M_hook(std::__detail::_List_node_base*)
add QWORD PTR [rsp+16], 1
sub rbp, 1
jne .L9
mov edx, 29
mov esi, OFFSET FLAT:.LC0
mov edi, OFFSET FLAT:std::cout
call std::basic_ostream<char, std::char_traits<char>>& std::__ostream_insert<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*, long)
mov edi, OFFSET FLAT:std::cout
mov esi, DWORD PTR Heavy::copy_count[rip]
call std::ostream::operator<<(int)
mov edx, 1
mov esi, OFFSET FLAT:.LC1
mov rdi, rax
call std::basic_ostream<char, std::char_traits<char>>& std::__ostream_insert<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*, long)
mov rbp, QWORD PTR [rsp]
cmp rbp, rsp
je .L8
.L15:
mov rbx, rbp
mov rbp, QWORD PTR [rbp+0]
mov rdi, QWORD PTR [rbx+16]
lea rax, [rbx+32]
cmp rdi, rax
je .L14
mov rax, QWORD PTR [rbx+32]
lea rsi, [rax+1]
call operator delete(void*, unsigned long)
.L14:
mov esi, 48
mov rdi, rbx
call operator delete(void*, unsigned long)
cmp rbp, rsp
jne .L15
.L8:
add rsp, 32
pop rbx
pop rbp
pop r12
ret
mov rbp, rax
jmp .L12
mov rbx, rax
jmp .L16
mov rbp, rax
jmp .L11
test_prefix() (.cold):
.LC3:
.string "Postfix increment copy count: "
test_postfix():
push r12
xor eax, eax
push rbp
push rbx
sub rsp, 32
xchg eax, DWORD PTR Heavy::copy_count[rip]
mov ebp, 1000
mov QWORD PTR [rsp+16], 0
movq xmm1, rsp
movddup xmm0, xmm1
movaps XMMWORD PTR [rsp], xmm0
.L25:
mov edi, 48
call operator new(unsigned long)
mov rbx, rax
mov edx, 120
mov esi, 100
lea rdi, [rax+16]
lea rax, [rax+32]
mov QWORD PTR [rbx+16], rax
call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct(unsigned long, char)
mov rsi, rsp
mov rdi, rbx
call std::__detail::_List_node_base::_M_hook(std::__detail::_List_node_base*)
add QWORD PTR [rsp+16], 1
sub rbp, 1
jne .L25
mov edx, 30
mov esi, OFFSET FLAT:.LC3
mov edi, OFFSET FLAT:std::cout
call std::basic_ostream<char, std::char_traits<char>>& std::__ostream_insert<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*, long)
mov edi, OFFSET FLAT:std::cout
mov esi, DWORD PTR Heavy::copy_count[rip]
call std::ostream::operator<<(int)
mov edx, 1
mov esi, OFFSET FLAT:.LC1
mov rdi, rax
call std::basic_ostream<char, std::char_traits<char>>& std::__ostream_insert<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*, long)
mov rbp, QWORD PTR [rsp]
cmp rbp, rsp
je .L24
.L31:
mov rbx, rbp
mov rbp, QWORD PTR [rbp+0]
mov rdi, QWORD PTR [rbx+16]
lea rax, [rbx+32]
cmp rdi, rax
je .L30
mov rax, QWORD PTR [rbx+32]
lea rsi, [rax+1]
call operator delete(void*, unsigned long)
.L30:
mov esi, 48
mov rdi, rbx
call operator delete(void*, unsigned long)
cmp rbp, rsp
jne .L31
.L24:
add rsp, 32
pop rbx
pop rbp
pop r12
ret
mov rbp, rax
jmp .L28
mov rbx, rax
jmp .L32
mov rbp, rax
jmp .L27
test_postfix() (.cold):
main:
sub rsp, 8
call test_prefix()
call test_postfix()
xor eax, eax
add rsp, 8
ret
Heavy::copy_count:
.zero 4

AI分析:

Claude

让我详细对比下两个函数的汇编代码结构:

  1. 函数入口部分完全相同:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    push    r12
    xor eax, eax
    push rbp
    push rbx
    sub rsp, 32
    xchg eax, DWORD PTR Heavy::copy_count[rip]
    mov ebp, 1000
    mov QWORD PTR [rsp+16], 0
    movq xmm1, rsp
    movddup xmm0, xmm1
    movaps XMMWORD PTR [rsp], xmm0
  2. 主循环部分完全相同:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    mov     edi, 48
    call operator new(unsigned long)
    mov rbx, rax
    mov edx, 120
    mov esi, 100
    lea rdi, [rax+16]
    lea rax, [rax+32]
    mov QWORD PTR [rbx+16], rax
    call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct(unsigned long, char)
    mov rsi, rsp
    mov rdi, rbx
    call std::__detail::_List_node_base::_M_hook(std::__detail::_List_node_base*)
    add QWORD PTR [rsp+16], 1
    sub rbp, 1
    jne .L9/.L25
  3. 输出部分除了字符串长度和内容外,结构完全相同:
    1
    2
    3
    4
    5
    // prefix用29,postfix用30
    mov edx, 29/30
    mov esi, OFFSET FLAT:.LC0/.LC3
    mov edi, OFFSET FLAT:std::cout
    call std::basic_ostream<char, std::char_traits<char>>& std::__ostream_insert<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*, long)
  4. 清理部分完全相同:
    1
    2
    3
    4
    5
    6
    7
    8
    9
            mov     rbp, QWORD PTR [rsp]
    cmp rbp, rsp
    je .L8/.L24
    ...
    add rsp, 32
    pop rbx
    pop rbp
    pop r12
    ret
  5. 错误处理部分完全相同:
    1
    2
    3
    4
    5
    6
    mov     rbp, rax
    jmp .L12/.L28
    mov rbx, rax
    jmp .L16/.L32
    mov rbp, rax
    jmp .L11/.L27

通过仔细分析,可以确认除了以下差异外:
6. 标签命名(如.L9 vs .L25)
7. 输出字符串的内容(”Prefix increment copy count: “ vs “Postfix increment copy count: “)
8. 输出字符串的长度(29 vs 30)

这两个函数的汇编代码是完全相同的。这进一步证实了在现代编译器优化下,前缀++和后缀++在这种场景下确实没有任何性能差异。编译器生成了完全相同的机器代码来处理这两种操作。

Qwen

从你提供的汇编代码中,test_prefix() 和 test_postfix() 的汇编逻辑几乎完全一致,除了以下几点:

1. 汇编差异点

(1) 函数名和输出字符串
• 函数名:test_prefix() vs test_postfix()(汇编中的标签不同)。
• 输出字符串:
◦ test_prefix() 输出 .LC0: “Prefix increment copy count: “
◦ test_postfix() 输出 .LC3: “Postfix increment copy count: “
(2) .cold 段的调用
• test_prefix() 和 test_postfix() 都有 .cold 段的调用,但地址不同(.L8 vs .L24),这是编译器将冷代码(如析构、异常处理)分离到 .cold 段的优化策略,与 ++i/i++ 无关。

2. 核心逻辑的汇编完全一致

关键循环体对比:
; test_prefix() 的循环体

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
.L9:
mov edi, 48
call operator new(unsigned long)
mov rbx, rax
mov edx, 120
mov esi, 100
lea rdi, [rax+16]
lea rax, [rax+32]
mov QWORD PTR [rbx+16], rax
call std::__cxx11::basic_string::_M_construct
mov rsi, rsp
mov rdi, rbx
call std::__detail::_List_node_base::_M_hook
add QWORD PTR [rsp+16], 1
sub rbp, 1
jne .L9

; test_postfix() 的循环体
.L25:
mov edi, 48
call operator new(unsigned long)
mov rbx, rax
mov edx, 120
mov esi, 100
lea rdi, [rax+16]
lea rax, [rax+32]
mov QWORD PTR [rbx+16], rax
call std::__cxx11::basic_string::_M_construct
mov rsi, rsp
mov rdi, rbx
call std::__detail::_List_node_base::_M_hook
add QWORD PTR [rsp+16], 1
sub rbp, 1
jne .L25

结论:
• 指令序列完全一致:从 operator new 到 _M_hook 的调用,再到循环计数器的更新(sub rbp, 1),两者的汇编代码 完全相同。
• 无额外的临时对象处理:后缀 i++ 通常会创建临时对象,但汇编中未看到任何额外的 mov、call 或 lea 指令(如拷贝构造或临时对象销毁)。

3. 为什么汇编中没有差异?

编译器优化(-O3)

• 内联优化:编译器将 std::list::iterator 的后缀 i++ 内联展开,消除了临时对象的开销。
• 死代码消除:Heavy::copy_count 的统计可能被优化掉,因为其结果未被实际使用(仅输出到控制台)。

std::list 迭代器的特殊性

• std::list 的迭代器通常是 双向指针结构(如 std::__detail::_List_node_base*),其后缀 i++ 只需复制指针值(rax),开销极低。

• 示例:

mov rax, QWORD PTR [rbx+16] ; 获取当前节点的 next 指针—

4. 是否存在隐藏的性能差异?

如果禁用优化(-O0)

• 后缀 i++ 会显式生成临时对象,汇编中会看到:
mov rsi, rax ; 复制当前迭代器
call Heavy::Heavy(Heavy const&) ; 拷贝构造临时对象• 性能差异显著:临时对象的创建和销毁会引入额外开销。

如果 Heavy 类未被优化

• 如果 Heavy 的拷贝构造未被内联或优化,后缀 i++ 会触发额外的 call 指令(如 Heavy::Heavy(Heavy const&))。

测试6(vector的汇编):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#include <vector>
#include <string>
#include <atomic>
#include <iostream>

struct MyClass {
std::string data;
static std::atomic<int> copy_count;

MyClass() : data(100, 'x') {}
MyClass(const MyClass& other) : data(other.data) {
++copy_count;
}
MyClass& operator=(const MyClass& other) {
data = other.data;
return *this;
}
};

std::atomic<int> MyClass::copy_count{0};

// 使用前缀 ++it
void test_prefix() {
MyClass::copy_count = 0;
std::vector<MyClass> vec(1000);
for (auto it = vec.begin(); it != vec.end(); ++it) {
volatile const MyClass& ref = *it; // 防止优化
(void)ref;
}
std::cout << "Prefix increment copy count: " << MyClass::copy_count << "\n";
}

// 使用后缀 it++
void test_postfix() {
MyClass::copy_count = 0;
std::vector<MyClass> vec(1000);
for (auto it = vec.begin(); it != vec.end(); it++) {
volatile const MyClass& ref = *it; // 防止优化
(void)ref;
}
std::cout << "Postfix increment copy count: " << MyClass::copy_count << "\n";
}

int main() {
test_prefix();
test_postfix();
return 0;
}
1
-std=c++17 -mtune=core2 -march=nocona -O3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
std::vector<MyClass, std::allocator<MyClass>>::~vector() [base object destructor]:
push r12
mov r12, rdi
push rbp
push rbx
mov rbp, QWORD PTR [rdi+8]
mov rbx, QWORD PTR [rdi]
cmp rbp, rbx
je .L2
.L4:
mov rdi, QWORD PTR [rbx]
lea rax, [rbx+16]
cmp rdi, rax
je .L3
mov rax, QWORD PTR [rbx+16]
lea rsi, [rax+1]
call operator delete(void*, unsigned long)
.L3:
add rbx, 32
cmp rbp, rbx
jne .L4
mov rbx, QWORD PTR [r12]
.L2:
test rbx, rbx
je .L1
mov rsi, QWORD PTR [r12+16]
mov rdi, rbx
sub rsi, rbx
pop rbx
pop rbp
pop r12
jmp operator delete(void*, unsigned long)
.L1:
pop rbx
pop rbp
pop r12
ret
.LC0:
.string "Prefix increment copy count: "
.LC1:
.string "\n"
test_prefix():
push r14
xor eax, eax
push r12
push rbp
push rbx
sub rsp, 40
xchg eax, DWORD PTR MyClass::copy_count[rip]
mov edi, 32000
call operator new(unsigned long)
lea rbp, [rax+32000]
mov r12, rax
mov rbx, rax
.L13:
lea rax, [rbx+16]
mov edx, 120
mov esi, 100
mov QWORD PTR [rbx], rax
mov rdi, rbx
call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct(unsigned long, char)
add rbx, 32
cmp rbx, rbp
jne .L13
mov edx, 29
mov esi, OFFSET FLAT:.LC0
mov edi, OFFSET FLAT:std::cout
call std::basic_ostream<char, std::char_traits<char>>& std::__ostream_insert<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*, long)
mov edi, OFFSET FLAT:std::cout
mov esi, DWORD PTR MyClass::copy_count[rip]
call std::ostream::operator<<(int)
mov edx, 1
mov esi, OFFSET FLAT:.LC1
mov rdi, rax
call std::basic_ostream<char, std::char_traits<char>>& std::__ostream_insert<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*, long)
cmp rbx, r12
je .L16
mov rbp, r12
.L18:
mov rdi, QWORD PTR [rbp+0]
lea rax, [rbp+16]
cmp rdi, rax
je .L17
mov rax, QWORD PTR [rbp+16]
lea rsi, [rax+1]
call operator delete(void*, unsigned long)
.L17:
add rbp, 32
cmp rbx, rbp
jne .L18
.L16:
add rsp, 40
mov rdi, r12
mov esi, 32000
pop rbx
pop rbp
pop r12
pop r14
jmp operator delete(void*, unsigned long)
mov r14, rax
jmp .L11
mov r14, rax
jmp .L19
test_prefix() (.cold):
.L11:
mov rbp, r12
.L12:
cmp rbp, rbx
je .L27
mov rdi, rbp
add rbp, 32
call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_dispose()
jmp .L12
.L19:
movq xmm1, rbx
mov rdi, rsp
movq xmm0, r12
mov QWORD PTR [rsp+16], rbp
punpcklqdq xmm0, xmm1
movaps XMMWORD PTR [rsp], xmm0
call std::vector<MyClass, std::allocator<MyClass>>::~vector() [complete object destructor]
mov rdi, r14
call _Unwind_Resume
.L27:
mov rdi, r12
mov esi, 32000
call operator delete(void*, unsigned long)
mov rdi, r14
call _Unwind_Resume
.LC3:
.string "Postfix increment copy count: "
test_postfix():
push r14
xor eax, eax
push r12
push rbp
push rbx
sub rsp, 40
xchg eax, DWORD PTR MyClass::copy_count[rip]
mov edi, 32000
call operator new(unsigned long)
lea rbp, [rax+32000]
mov r12, rax
mov rbx, rax
.L32:
lea rax, [rbx+16]
mov edx, 120
mov esi, 100
mov QWORD PTR [rbx], rax
mov rdi, rbx
call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct(unsigned long, char)
add rbx, 32
cmp rbx, rbp
jne .L32
mov edx, 30
mov esi, OFFSET FLAT:.LC3
mov edi, OFFSET FLAT:std::cout
call std::basic_ostream<char, std::char_traits<char>>& std::__ostream_insert<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*, long)
mov edi, OFFSET FLAT:std::cout
mov esi, DWORD PTR MyClass::copy_count[rip]
call std::ostream::operator<<(int)
mov edx, 1
mov esi, OFFSET FLAT:.LC1
mov rdi, rax
call std::basic_ostream<char, std::char_traits<char>>& std::__ostream_insert<char, std::char_traits<char>>(std::basic_ostream<char, std::char_traits<char>>&, char const*, long)
cmp rbx, r12
je .L35
mov rbp, r12
.L37:
mov rdi, QWORD PTR [rbp+0]
lea rax, [rbp+16]
cmp rdi, rax
je .L36
mov rax, QWORD PTR [rbp+16]
lea rsi, [rax+1]
call operator delete(void*, unsigned long)
.L36:
add rbp, 32
cmp rbx, rbp
jne .L37
.L35:
add rsp, 40
mov rdi, r12
mov esi, 32000
pop rbx
pop rbp
pop r12
pop r14
jmp operator delete(void*, unsigned long)
mov r14, rax
jmp .L30
mov r14, rax
jmp .L38
test_postfix() (.cold):
.L30:
mov rbp, r12
.L31:
cmp rbp, rbx
je .L46
mov rdi, rbp
add rbp, 32
call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_dispose()
jmp .L31
.L38:
movq xmm1, rbx
mov rdi, rsp
movq xmm0, r12
mov QWORD PTR [rsp+16], rbp
punpcklqdq xmm0, xmm1
movaps XMMWORD PTR [rsp], xmm0
call std::vector<MyClass, std::allocator<MyClass>>::~vector() [complete object destructor]
mov rdi, r14
call _Unwind_Resume
.L46:
mov rdi, r12
mov esi, 32000
call operator delete(void*, unsigned long)
mov rdi, r14
call _Unwind_Resume
main:
sub rsp, 8
call test_prefix()
call test_postfix()
xor eax, eax
add rsp, 8
ret
MyClass::copy_count:
.zero 4

AI分析:

让我们详细对比 test_prefix() 和 test_postfix() 在 vector 版本中的汇编代码:

  1. 函数入口和内存分配部分完全相同:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    push    r14
    xor eax, eax
    push r12
    push rbp
    push rbx
    sub rsp, 40
    xchg eax, DWORD PTR MyClass::copy_count[rip]
    mov edi, 32000
    call operator new(unsigned long)
  2. 初始化循环部分完全相同:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    .L13/.L32:
    lea rax, [rbx+16]
    mov edx, 120
    mov esi, 100
    mov QWORD PTR [rbx], rax
    mov rdi, rbx
    call std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::_M_construct(unsigned long, char)
    add rbx, 32
    cmp rbx, rbp
    jne .L13/.L32
  3. 清理部分完全相同:
    1
    2
    3
    4
    5
    6
    7
    8
    add     rsp, 40
    mov rdi, r12
    mov esi, 32000
    pop rbx
    pop rbp
    pop r12
    pop r14
    jmp operator delete(void*, unsigned long)
  4. 错误处理部分也完全相同,只是标签名不同。

唯一的区别仍然只是:

  1. 标签命名(如 .L13 vs .L32)
  2. 输出字符串内容(”Prefix increment copy count: “ vs “Postfix increment copy count: “)
  3. 输出字符串长度(29 vs 30)
    结论:
  4. 与 list 的情况类似,在 vector 中前缀++和后缀++也被编译器优化成了完全相同的代码
  5. 这是因为:
  • vector 的迭代器本质上是指针
  • 循环中没有使用迭代器的返回值
  • 编译器识别出这种模式后,生成了最优的代码
  • 由于是连续内存,迭代器递增就是简单的指针加法

所以,无论是 list 还是 vector,在现代编译器的优化下,前缀++和后缀++在这种简单遍历场景下并没有性能差异。但如果在更复杂的表达式中使用迭代器的返回值,或者是自定义的迭代器类型,可能还是会有区别。


前缀和后缀++的性能差异
http://example.com/2025/07/07/front_and_end_plus/
作者
icyyoung
发布于
2025年7月7日
许可协议