std\sys\thread_local\key/
windows.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
//! Implementation of `LazyKey` for Windows.
//!
//! Windows has no native support for running destructors so we manage our own
//! list of destructors to keep track of how to destroy keys. We then install a
//! callback later to get invoked whenever a thread exits, running all
//! appropriate destructors (see the [`guard`](guard) module documentation).
//!
//! This will likely need to be improved over time, but this module attempts a
//! "poor man's" destructor callback system. Once we've got a list of what to
//! run, we iterate over all keys, check their values, and then run destructors
//! if the values turn out to be non null (setting them to null just beforehand).
//! We do this a few times in a loop to basically match Unix semantics. If we
//! don't reach a fixed point after a short while then we just inevitably leak
//! something.
//!
//! The list is implemented as an atomic single-linked list of `LazyKey`s and
//! does not support unregistration. Unfortunately, this means that we cannot
//! use racy initialization for creating the keys in `LazyKey`, as that could
//! result in destructors being missed. Hence, we synchronize the creation of
//! keys with destructors through [`INIT_ONCE`](c::INIT_ONCE) (`std`'s
//! [`Once`](crate::sync::Once) cannot be used since it might use TLS itself).
//! For keys without destructors, racy initialization suffices.

// FIXME: investigate using a fixed-size array instead, as the maximum number
//        of keys is [limited to 1088](https://learn.microsoft.com/en-us/windows/win32/ProcThread/thread-local-storage).

use crate::cell::UnsafeCell;
use crate::ptr;
use crate::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release};
use crate::sync::atomic::{AtomicPtr, AtomicU32};
use crate::sys::c;
use crate::sys::thread_local::guard;

pub type Key = u32;
type Dtor = unsafe extern "C" fn(*mut u8);

pub struct LazyKey {
    /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == u32::MAX
    /// is not a valid key value, this allows us to use zero as sentinel value
    /// without risking overflow.
    key: AtomicU32,
    dtor: Option<Dtor>,
    next: AtomicPtr<LazyKey>,
    /// Currently, destructors cannot be unregistered, so we cannot use racy
    /// initialization for keys. Instead, we need synchronize initialization.
    /// Use the Windows-provided `Once` since it does not require TLS.
    once: UnsafeCell<c::INIT_ONCE>,
}

impl LazyKey {
    #[inline]
    pub const fn new(dtor: Option<Dtor>) -> LazyKey {
        LazyKey {
            key: AtomicU32::new(0),
            dtor,
            next: AtomicPtr::new(ptr::null_mut()),
            once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),
        }
    }

    #[inline]
    pub fn force(&'static self) -> Key {
        match self.key.load(Acquire) {
            0 => unsafe { self.init() },
            key => key - 1,
        }
    }

    #[cold]
    unsafe fn init(&'static self) -> Key {
        if self.dtor.is_some() {
            let mut pending = c::FALSE;
            let r = unsafe {
                c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut())
            };
            assert_eq!(r, c::TRUE);

            if pending == c::FALSE {
                // Some other thread initialized the key, load it.
                self.key.load(Relaxed) - 1
            } else {
                let key = unsafe { c::TlsAlloc() };
                if key == c::TLS_OUT_OF_INDEXES {
                    // Wakeup the waiting threads before panicking to avoid deadlock.
                    unsafe {
                        c::InitOnceComplete(
                            self.once.get(),
                            c::INIT_ONCE_INIT_FAILED,
                            ptr::null_mut(),
                        );
                    }
                    panic!("out of TLS indexes");
                }

                unsafe {
                    register_dtor(self);
                }

                // Release-storing the key needs to be the last thing we do.
                // This is because in `fn key()`, other threads will do an acquire load of the key,
                // and if that sees this write then it will entirely bypass the `InitOnce`. We thus
                // need to establish synchronization through `key`. In particular that acquire load
                // must happen-after the register_dtor above, to ensure the dtor actually runs!
                self.key.store(key + 1, Release);

                let r = unsafe { c::InitOnceComplete(self.once.get(), 0, ptr::null_mut()) };
                debug_assert_eq!(r, c::TRUE);

                key
            }
        } else {
            // If there is no destructor to clean up, we can use racy initialization.

            let key = unsafe { c::TlsAlloc() };
            assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");

            match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {
                Ok(_) => key,
                Err(new) => unsafe {
                    // Some other thread completed initialization first, so destroy
                    // our key and use theirs.
                    let r = c::TlsFree(key);
                    debug_assert_eq!(r, c::TRUE);
                    new - 1
                },
            }
        }
    }
}

unsafe impl Send for LazyKey {}
unsafe impl Sync for LazyKey {}

#[inline]
pub unsafe fn set(key: Key, val: *mut u8) {
    let r = unsafe { c::TlsSetValue(key, val.cast()) };
    debug_assert_eq!(r, c::TRUE);
}

#[inline]
pub unsafe fn get(key: Key) -> *mut u8 {
    unsafe { c::TlsGetValue(key).cast() }
}

static DTORS: AtomicPtr<LazyKey> = AtomicPtr::new(ptr::null_mut());

/// Should only be called once per key, otherwise loops or breaks may occur in
/// the linked list.
unsafe fn register_dtor(key: &'static LazyKey) {
    guard::enable();

    let this = <*const LazyKey>::cast_mut(key);
    // Use acquire ordering to pass along the changes done by the previously
    // registered keys when we store the new head with release ordering.
    let mut head = DTORS.load(Acquire);
    loop {
        key.next.store(head, Relaxed);
        match DTORS.compare_exchange_weak(head, this, Release, Acquire) {
            Ok(_) => break,
            Err(new) => head = new,
        }
    }
}

/// This will and must only be run by the destructor callback in [`guard`].
pub unsafe fn run_dtors() {
    for _ in 0..5 {
        let mut any_run = false;

        // Use acquire ordering to observe key initialization.
        let mut cur = DTORS.load(Acquire);
        while !cur.is_null() {
            let pre_key = unsafe { (*cur).key.load(Acquire) };
            let dtor = unsafe { (*cur).dtor.unwrap() };
            cur = unsafe { (*cur).next.load(Relaxed) };

            // In LazyKey::init, we register the dtor before setting `key`.
            // So if one thread's `run_dtors` races with another thread executing `init` on the same
            // `LazyKey`, we can encounter a key of 0 here. That means this key was never
            // initialized in this thread so we can safely skip it.
            if pre_key == 0 {
                continue;
            }
            // If this is non-zero, then via the `Acquire` load above we synchronized with
            // everything relevant for this key. (It's not clear that this is needed, since the
            // release-acquire pair on DTORS also establishes synchronization, but better safe than
            // sorry.)
            let key = pre_key - 1;

            let ptr = unsafe { c::TlsGetValue(key) };
            if !ptr.is_null() {
                unsafe {
                    c::TlsSetValue(key, ptr::null_mut());
                    dtor(ptr as *mut _);
                    any_run = true;
                }
            }
        }

        if !any_run {
            break;
        }
    }
}