cifangyiquan 10 X 10
cifangyiquan:~/blog$ source "c++11 thread_local 用法.sh"

cifangyiquan:~/blog$ printenv
CREATED_DATE = 2019-01-10
UPDATED_DATE = 2023-04-16
TAGS = c++11 : c++
CATEGORIES = programming

cifangyiquan:~/blog$ grep -lr $TAGS post
2019-01-10 c++11 thread_local 用法
2019-03-06 C++中文字符处理(宽字符转换)
c++11 thread_local 用法

thread_local用法

这两天在使用xgboost时发现,xgboost预测是线程不安全。和架构的讨论了一下,短线方案是每个线程load一个model。辉辉提出了用thread_local实现代替自己构造线程队列的机制。我就学习了一下这个c++11的新特性。

thread_local是什么?

thread_local是一个存储期指定符(storage class specifier)。与它同是存储期指定符的还有4个,分别是:auto, register, static, extern

关键字 说明 备注
auto 自动存储期 c++11前, “auto int x; ” 在c++11起错误
register 自动存储期。指示编译器将此对象置于处理器的寄存器中。 c++17弃用
static 静态或者线程存储期的内部链接
extern 静态或者线程存储期的外部链接
thread_local 线程存储期 c++11起

下面是对thread_local的官方解释: >thread_local 关键词只对声明于命名空间作用域的对象、声明于块作用域的对象及静态数据成员允许。它指示对象拥有线程存储期。它能与 static 或 extern 结合,以分别指定内部或外部链接(除了静态数据成员始终拥有外部链接),但附加的 static 不影响存储期。 >线程存储期: 对象的存储在线程开始时分配,而在线程结束时解分配。每个线程拥有其自身的对象实例。唯有声明为 thread_local 的对象拥有此存储期。 thread_local 能与 static 或 extern 一同出现,以调整链接。 ---

下面分别试试thread_local的运行情况:

编译命令: g++ -std=c++11 -pthread thread_local.cpp

运行环境:

g++: gcc version 4.8.5 20150623 (Red Hat 4.8.5-16) (GCC)

OS: Linux version 3.10.0-693.el7.x86_64

实际应用情况

1. 全局变量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//需要包含的头文件
#include <iostream>
#include <thread>
#include <mutex>
std::mutex cout_mutex; //方便多线程打印

thread_local int x = 1;

void thread_func(const std::string& thread_name) {
for (int i = 0; i < 3; ++i) {
x++;
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "thread[" << thread_name << "]: x = " << x << std::endl;
}
return;
}

int main() {
std::thread t1(thread_func, "t1");
std::thread t2(thread_func, "t2");
t1.join();
t2.join();
return 0;
}

输出:

1
2
3
4
5
6
thread[t2]: x = 2
thread[t2]: x = 3
thread[t2]: x = 4
thread[t1]: x = 2
thread[t1]: x = 3
thread[t1]: x = 4
说明:可以看出x在每个线程里是分别自加互不干扰的。

2. 局部变量

我们把x改成for循环内的局部变量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
//需要包含的头文件
#include <iostream>
#include <thread>
#include <mutex>
std::mutex cout_mutex; //方便多线程打印

void thread_func(const std::string& thread_name) {
for (int i = 0; i < 3; ++i) {
thread_local int x = 1;
x++;
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "thread[" << thread_name << "]: x = " << x << std::endl;
}
return;
}

int main() {
std::thread t1(thread_func, "t1");
std::thread t2(thread_func, "t2");
t1.join();
t2.join();
return 0;
}

输出:

1
2
3
4
5
6
thread[t2]: x = 2
thread[t2]: x = 3
thread[t2]: x = 4
thread[t1]: x = 2
thread[t1]: x = 3
thread[t1]: x = 4
如果不加thread_local:
1
2
3
4
5
6
thread[t2]: x = 2
thread[t2]: x = 2
thread[t2]: x = 2
thread[t1]: x = 2
thread[t1]: x = 2
thread[t1]: x = 2
说明:thread_local的局部变量没有因为for循环作用域而重新赋值。这是因为线程存储期的变量都是和线程绑定的,所以只有第一次声明时被赋值。可以理解为线程的static变量。不过变量的作用域依然是在本身的作用域内。比如:在for循环外使用x就会编译时错误

1
2
3
4
5
6
7
8
9
10
void thread_func(const std::string& thread_name) {
for (int i = 0; i < 3; ++i) {
thread_local int x = 1;
x++;
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "thread[" << thread_name << "]: x = " << x << std::endl;
}
x++; //编译会出错:error: ‘x’ was not declared in this scope
return;
}

编译出错:

1
error: ‘x’ was not declared in this scope

3. 类对象

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#include <iostream>
#include <thread>
#include <mutex>
std::mutex cout_mutex;

//定义类
class A {
public:
A() {
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "create A" << std::endl;
}

~A() {
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "destroy A" << std::endl;
}

int counter = 0;
int get_value() {
return counter++;
}
};

void thread_func(const std::string& thread_name) {
for (int i = 0; i < 3; ++i) {
thread_local A* a = new A();
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "thread[" << thread_name << "]: a.counter:" << a->get_value() << std::endl;
}
return;
}

int main() {
std::thread t1(thread_func, "t1");
std::thread t2(thread_func, "t2");
t1.join();
t2.join();
return 0;
}

输出:

1
2
3
4
5
6
7
8
create A
thread[t1]: a.counter:0
thread[t1]: a.counter:1
thread[t1]: a.counter:2
create A
thread[t2]: a.counter:0
thread[t2]: a.counter:1
thread[t2]: a.counter:2
说明:可以看出虽然在循环中创建了A的实例a,但是并没有因为循环创建了多个。这个与局部变量的情况相同,创建的实例相对于thread是static的。这种情况在函数间或通过函数返回实例也是一样的,如:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
A* creatA() {
return new A();
}

void loopin_func(const std::string& thread_name) {
thread_local A* a = creatA();
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "thread[" << thread_name << "]: a.counter:" << a->get_value() << std::endl;
return;
}

void thread_func(const std::string& thread_name) {
for (int i = 0; i < 3; ++i) {
loopin_func(thread_name);
}
return;
}
输出:结果相同
1
2
3
4
5
6
7
8
create A
thread[t1]: a.counter:0
thread[t1]: a.counter:1
thread[t1]: a.counter:2
create A
thread[t2]: a.counter:0
thread[t2]: a.counter:1
thread[t2]: a.counter:2
但是如果没有在声明时进行赋值,就不一样了。如:
1
2
3
4
5
6
7
void loopin_func(const std::string& thread_name) {
thread_local A* a;
a = creatA();
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "thread[" << thread_name << "]: a.counter:" << a->get_value() << std::endl;
return;
}
输出:
1
2
3
4
5
6
7
8
9
10
11
12
create A
thread[t2]: a.counter:0
create A
thread[t2]: a.counter:0
create A
thread[t2]: a.counter:0
create A
thread[t1]: a.counter:0
create A
thread[t1]: a.counter:0
create A
thread[t1]: a.counter:0
很好理解,在循环执行时,每次都创建了一个A实例并对a进行赋值。 所有一般情况要求我们:thread_local对象声明时赋值

4. 类成员变量的情况

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include <iostream>
#include <thread>
#include <mutex>
std::mutex cout_mutex;

class B {
public:
B() {
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "create B" << std::endl;
}
~B() {}
//thread_local static int b_key;
thread_local int b_key;
int b_value = 24;
static int b_static;
};
int B::b_static = 36;
void thread_func(const std::string& thread_name) {
B b;
for (int i = 0; i < 3; ++i) {
b.b_key--;
b.b_value--;
b.b_static--; // not thread safe
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "thread[" << thread_name << "]: b_key:" << b.b_key << ", b_value:" << b.b_value << ", b_static:" << b.b_static << std::endl;
std::cout << "thread[" << thread_name << "]: B::key:" << B::b_key << ", b_value:" << b.b_value << ", b_static: " << B::b_static << std::endl;
return;
}

int main() {
std::thread t1(thread_func, "t1");
std::thread t2(thread_func, "t2");
t1.join();
t2.join();
return 0;
}

输出:编译时出错

1
2
error: storage class specified for ‘b_key’
thread_local int b_key;
说明:thread_local作为类成员变量时必须是static的,修改代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

class B {
public:
B() {
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "create B" << std::endl;
}
~B() {}
thread_local static int b_key;
//thread_local int b_key;
int b_value = 24;
static int b_static;
};

thread_local int B::b_key = 12;
int B::b_static = 36;

void thread_func(const std::string& thread_name) {
B b;
for (int i = 0; i < 3; ++i) {
b.b_key--;
b.b_value--;
b.b_static--; // not thread safe
std::lock_guard<std::mutex> lock(cout_mutex);
std::cout << "thread[" << thread_name << "]: b_key:" << b.b_key << ", b_value:" << b.b_value << ", b_static:" << b.b_static << std::endl;
std::cout << "thread[" << thread_name << "]: B::key:" << B::b_key << ", b_value:" << b.b_value << ", b_static: " << B::b_static << std::endl;
return;
}
输出:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
create B
thread[t2]: b_key:11, b_value:23, b_static:35
thread[t2]: B::key:11, b_value:23, b_static: 35
thread[t2]: b_key:10, b_value:22, b_static:34
thread[t2]: B::key:10, b_value:22, b_static: 34
thread[t2]: b_key:9, b_value:21, b_static:33
thread[t2]: B::key:9, b_value:21, b_static: 33
create B
thread[t1]: b_key:11, b_value:23, b_static:32
thread[t1]: B::key:11, b_value:23, b_static: 32
thread[t1]: b_key:10, b_value:22, b_static:31
thread[t1]: B::key:10, b_value:22, b_static: 31
thread[t1]: b_key:9, b_value:21, b_static:30
thread[t1]: B::key:9, b_value:21, b_static: 30
说明:可以看出thread_local作为类成员时也是对于每个thread分别分配了一个。而static则是全局一个。

总结

1. thread_local描述的对象在thread开始时分配,而在thread结束时分解。

2. 一般在声明时赋值,在本thread中只执行一次。

3. 描述的对象依然只在作用域内有效。

4. 描述类成员变量时,必须是static的。

参考

https://zh.cppreference.com/w/cpp/language/storage_duration

https://stackoverflow.com/questions/11983875/what-does-the-thread-local-mean-in-c11

http://trickness.github.io/morden%20c++/2015/09/26/C++11&14-thread_local/

https://www.cnblogs.com/pop-lar/p/5123014.html