std\sys\path/
windows.rs

1use crate::ffi::{OsStr, OsString};
2use crate::path::{Path, PathBuf, Prefix};
3use crate::sys::api::utf16;
4use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s};
5use crate::{io, ptr};
6
7#[cfg(test)]
8mod tests;
9
10pub const MAIN_SEP_STR: &str = "\\";
11pub const MAIN_SEP: char = '\\';
12
13/// A null terminated wide string.
14#[repr(transparent)]
15pub struct WCStr([u16]);
16
17impl WCStr {
18    /// Convert a slice to a WCStr without checks.
19    ///
20    /// Though it is memory safe, the slice should also not contain interior nulls
21    /// as this may lead to unwanted truncation.
22    ///
23    /// # Safety
24    ///
25    /// The slice must end in a null.
26    pub unsafe fn from_wchars_with_null_unchecked(s: &[u16]) -> &Self {
27        unsafe { &*(s as *const [u16] as *const Self) }
28    }
29
30    pub fn as_ptr(&self) -> *const u16 {
31        self.0.as_ptr()
32    }
33
34    pub fn count_bytes(&self) -> usize {
35        self.0.len()
36    }
37}
38
39#[inline]
40pub fn with_native_path<T>(path: &Path, f: &dyn Fn(&WCStr) -> io::Result<T>) -> io::Result<T> {
41    let path = maybe_verbatim(path)?;
42    // SAFETY: maybe_verbatim returns null-terminated strings
43    let path = unsafe { WCStr::from_wchars_with_null_unchecked(&path) };
44    f(path)
45}
46
47#[inline]
48pub fn is_sep_byte(b: u8) -> bool {
49    b == b'/' || b == b'\\'
50}
51
52#[inline]
53pub fn is_verbatim_sep(b: u8) -> bool {
54    b == b'\\'
55}
56
57pub fn is_verbatim(path: &[u16]) -> bool {
58    path.starts_with(utf16!(r"\\?\")) || path.starts_with(utf16!(r"\??\"))
59}
60
61/// Returns true if `path` looks like a lone filename.
62pub(crate) fn is_file_name(path: &OsStr) -> bool {
63    !path.as_encoded_bytes().iter().copied().any(is_sep_byte)
64}
65pub(crate) fn has_trailing_slash(path: &OsStr) -> bool {
66    let is_verbatim = path.as_encoded_bytes().starts_with(br"\\?\");
67    let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte };
68    if let Some(&c) = path.as_encoded_bytes().last() { is_separator(c) } else { false }
69}
70
71/// Appends a suffix to a path.
72///
73/// Can be used to append an extension without removing an existing extension.
74pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
75    let mut path = OsString::from(path);
76    path.push(suffix);
77    path.into()
78}
79
80struct PrefixParser<'a, const LEN: usize> {
81    path: &'a OsStr,
82    prefix: [u8; LEN],
83}
84
85impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
86    #[inline]
87    fn get_prefix(path: &OsStr) -> [u8; LEN] {
88        let mut prefix = [0; LEN];
89        // SAFETY: Only ASCII characters are modified.
90        for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
91            prefix[i] = if ch == b'/' { b'\\' } else { ch };
92        }
93        prefix
94    }
95
96    fn new(path: &'a OsStr) -> Self {
97        Self { path, prefix: Self::get_prefix(path) }
98    }
99
100    fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
101        PrefixParserSlice {
102            path: self.path,
103            prefix: &self.prefix[..LEN.min(self.path.len())],
104            index: 0,
105        }
106    }
107}
108
109struct PrefixParserSlice<'a, 'b> {
110    path: &'a OsStr,
111    prefix: &'b [u8],
112    index: usize,
113}
114
115impl<'a> PrefixParserSlice<'a, '_> {
116    fn strip_prefix(&self, prefix: &str) -> Option<Self> {
117        self.prefix[self.index..]
118            .starts_with(prefix.as_bytes())
119            .then_some(Self { index: self.index + prefix.len(), ..*self })
120    }
121
122    fn prefix_bytes(&self) -> &'a [u8] {
123        &self.path.as_encoded_bytes()[..self.index]
124    }
125
126    fn finish(self) -> &'a OsStr {
127        // SAFETY: The unsafety here stems from converting between &OsStr and
128        // &[u8] and back. This is safe to do because (1) we only look at ASCII
129        // contents of the encoding and (2) new &OsStr values are produced only
130        // from ASCII-bounded slices of existing &OsStr values.
131        unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
132    }
133}
134
135pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
136    use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};
137
138    let parser = PrefixParser::<8>::new(path);
139    let parser = parser.as_slice();
140    if let Some(parser) = parser.strip_prefix(r"\\") {
141        // \\
142
143        // The meaning of verbatim paths can change when they use a different
144        // separator.
145        if let Some(parser) = parser.strip_prefix(r"?\")
146            && !parser.prefix_bytes().iter().any(|&x| x == b'/')
147        {
148            // \\?\
149            if let Some(parser) = parser.strip_prefix(r"UNC\") {
150                // \\?\UNC\server\share
151
152                let path = parser.finish();
153                let (server, path) = parse_next_component(path, true);
154                let (share, _) = parse_next_component(path, true);
155
156                Some(VerbatimUNC(server, share))
157            } else {
158                let path = parser.finish();
159
160                // in verbatim paths only recognize an exact drive prefix
161                if let Some(drive) = parse_drive_exact(path) {
162                    // \\?\C:
163                    Some(VerbatimDisk(drive))
164                } else {
165                    // \\?\prefix
166                    let (prefix, _) = parse_next_component(path, true);
167                    Some(Verbatim(prefix))
168                }
169            }
170        } else if let Some(parser) = parser.strip_prefix(r".\") {
171            // \\.\COM42
172            let path = parser.finish();
173            let (prefix, _) = parse_next_component(path, false);
174            Some(DeviceNS(prefix))
175        } else {
176            let path = parser.finish();
177            let (server, path) = parse_next_component(path, false);
178            let (share, _) = parse_next_component(path, false);
179
180            if !server.is_empty() && !share.is_empty() {
181                // \\server\share
182                Some(UNC(server, share))
183            } else {
184                // no valid prefix beginning with "\\" recognized
185                None
186            }
187        }
188    } else {
189        // If it has a drive like `C:` then it's a disk.
190        // Otherwise there is no prefix.
191        parse_drive(path).map(Disk)
192    }
193}
194
195// Parses a drive prefix, e.g. "C:" and "C:\whatever"
196fn parse_drive(path: &OsStr) -> Option<u8> {
197    // In most DOS systems, it is not possible to have more than 26 drive letters.
198    // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
199    fn is_valid_drive_letter(drive: &u8) -> bool {
200        drive.is_ascii_alphabetic()
201    }
202
203    match path.as_encoded_bytes() {
204        [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
205        _ => None,
206    }
207}
208
209// Parses a drive prefix exactly, e.g. "C:"
210fn parse_drive_exact(path: &OsStr) -> Option<u8> {
211    // only parse two bytes: the drive letter and the drive separator
212    if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
213        parse_drive(path)
214    } else {
215        None
216    }
217}
218
219// Parse the next path component.
220//
221// Returns the next component and the rest of the path excluding the component and separator.
222// Does not recognize `/` as a separator character if `verbatim` is true.
223fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
224    let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
225
226    match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
227        Some(separator_start) => {
228            let separator_end = separator_start + 1;
229
230            let component = &path.as_encoded_bytes()[..separator_start];
231
232            // Panic safe
233            // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
234            let path = &path.as_encoded_bytes()[separator_end..];
235
236            // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
237            // is encoded in a single byte, therefore `bytes[separator_start]` and
238            // `bytes[separator_end]` must be code point boundaries and thus
239            // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
240            unsafe {
241                (
242                    OsStr::from_encoded_bytes_unchecked(component),
243                    OsStr::from_encoded_bytes_unchecked(path),
244                )
245            }
246        }
247        None => (path, OsStr::new("")),
248    }
249}
250
251/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
252///
253/// This path may or may not have a verbatim prefix.
254pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
255    let path = to_u16s(path)?;
256    get_long_path(path, true)
257}
258
259/// Gets a normalized absolute path that can bypass path length limits.
260///
261/// Setting prefer_verbatim to true suggests a stronger preference for verbatim
262/// paths even when not strictly necessary. This allows the Windows API to avoid
263/// repeating our work. However, if the path may be given back to users or
264/// passed to other application then it's preferable to use non-verbatim paths
265/// when possible. Non-verbatim paths are better understood by users and handled
266/// by more software.
267pub(crate) fn get_long_path(mut path: Vec<u16>, prefer_verbatim: bool) -> io::Result<Vec<u16>> {
268    // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL).
269    // However, for APIs such as CreateDirectory[1], the limit is 248.
270    //
271    // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters
272    const LEGACY_MAX_PATH: usize = 248;
273    // UTF-16 encoded code points, used in parsing and building UTF-16 paths.
274    // All of these are in the ASCII range so they can be cast directly to `u16`.
275    const SEP: u16 = b'\\' as _;
276    const ALT_SEP: u16 = b'/' as _;
277    const QUERY: u16 = b'?' as _;
278    const COLON: u16 = b':' as _;
279    const DOT: u16 = b'.' as _;
280    const U: u16 = b'U' as _;
281    const N: u16 = b'N' as _;
282    const C: u16 = b'C' as _;
283
284    // \\?\
285    const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP];
286    // \??\
287    const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP];
288    // \\?\UNC\
289    const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP];
290
291    if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == [0] {
292        // Early return for paths that are already verbatim or empty.
293        return Ok(path);
294    } else if path.len() < LEGACY_MAX_PATH {
295        // Early return if an absolute path is less < 260 UTF-16 code units.
296        // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily.
297        match path.as_slice() {
298            // Starts with `D:`, `D:\`, `D:/`, etc.
299            // Does not match if the path starts with a `\` or `/`.
300            [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..]
301                if *drive != SEP && *drive != ALT_SEP =>
302            {
303                return Ok(path);
304            }
305            // Starts with `\\`, `//`, etc
306            [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path),
307            _ => {}
308        }
309    }
310
311    // Firstly, get the absolute path using `GetFullPathNameW`.
312    // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
313    let lpfilename = path.as_ptr();
314    fill_utf16_buf(
315        // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
316        // `lpfilename` is a pointer to a null terminated string that is not
317        // invalidated until after `GetFullPathNameW` returns successfully.
318        |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
319        |mut absolute| {
320            path.clear();
321
322            // Only prepend the prefix if needed.
323            if prefer_verbatim || absolute.len() + 1 >= LEGACY_MAX_PATH {
324                // Secondly, add the verbatim prefix. This is easier here because we know the
325                // path is now absolute and fully normalized (e.g. `/` has been changed to `\`).
326                let prefix = match absolute {
327                    // C:\ => \\?\C:\
328                    [_, COLON, SEP, ..] => VERBATIM_PREFIX,
329                    // \\.\ => \\?\
330                    [SEP, SEP, DOT, SEP, ..] => {
331                        absolute = &absolute[4..];
332                        VERBATIM_PREFIX
333                    }
334                    // Leave \\?\ and \??\ as-is.
335                    [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[],
336                    // \\ => \\?\UNC\
337                    [SEP, SEP, ..] => {
338                        absolute = &absolute[2..];
339                        UNC_PREFIX
340                    }
341                    // Anything else we leave alone.
342                    _ => &[],
343                };
344
345                path.reserve_exact(prefix.len() + absolute.len() + 1);
346                path.extend_from_slice(prefix);
347            } else {
348                path.reserve_exact(absolute.len() + 1);
349            }
350            path.extend_from_slice(absolute);
351            path.push(0);
352        },
353    )?;
354    Ok(path)
355}
356
357/// Make a Windows path absolute.
358pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
359    let path = path.as_os_str();
360    let prefix = parse_prefix(path);
361    // Verbatim paths should not be modified.
362    if prefix.map(|x| x.is_verbatim()).unwrap_or(false) {
363        // NULs in verbatim paths are rejected for consistency.
364        if path.as_encoded_bytes().contains(&0) {
365            return Err(io::const_error!(
366                io::ErrorKind::InvalidInput,
367                "strings passed to WinAPI cannot contain NULs",
368            ));
369        }
370        return Ok(path.to_owned().into());
371    }
372
373    let path = to_u16s(path)?;
374    let lpfilename = path.as_ptr();
375    fill_utf16_buf(
376        // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
377        // `lpfilename` is a pointer to a null terminated string that is not
378        // invalidated until after `GetFullPathNameW` returns successfully.
379        |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
380        os2path,
381    )
382}
383
384pub(crate) fn is_absolute(path: &Path) -> bool {
385    path.has_root() && path.prefix().is_some()
386}
387
388/// Test that the path is absolute, fully qualified and unchanged when processed by the Windows API.
389///
390/// For example:
391///
392/// - `C:\path\to\file` will return true.
393/// - `C:\path\to\nul` returns false because the Windows API will convert it to \\.\NUL
394/// - `C:\path\to\..\file` returns false because it will be resolved to `C:\path\file`.
395///
396/// This is a useful property because it means the path can be converted from and to and verbatim
397/// path just by changing the prefix.
398pub(crate) fn is_absolute_exact(path: &[u16]) -> bool {
399    // This is implemented by checking that passing the path through
400    // GetFullPathNameW does not change the path in any way.
401
402    // Windows paths are limited to i16::MAX length
403    // though the API here accepts a u32 for the length.
404    if path.is_empty() || path.len() > u32::MAX as usize || path.last() != Some(&0) {
405        return false;
406    }
407    // The path returned by `GetFullPathNameW` must be the same length as the
408    // given path, otherwise they're not equal.
409    let buffer_len = path.len();
410    let mut new_path = Vec::with_capacity(buffer_len);
411    let result = unsafe {
412        c::GetFullPathNameW(
413            path.as_ptr(),
414            new_path.capacity() as u32,
415            new_path.as_mut_ptr(),
416            crate::ptr::null_mut(),
417        )
418    };
419    // Note: if non-zero, the returned result is the length of the buffer without the null termination
420    if result == 0 || result as usize != buffer_len - 1 {
421        false
422    } else {
423        // SAFETY: `GetFullPathNameW` initialized `result` bytes and does not exceed `nBufferLength - 1` (capacity).
424        unsafe {
425            new_path.set_len((result as usize) + 1);
426        }
427        path == &new_path
428    }
429}