std\sys\stdio/
windows.rs

1#![unstable(issue = "none", feature = "windows_stdio")]
2
3use core::char::MAX_LEN_UTF8;
4use core::str::utf8_char_width;
5
6use crate::mem::MaybeUninit;
7use crate::os::windows::io::{FromRawHandle, IntoRawHandle};
8use crate::sys::handle::Handle;
9use crate::sys::pal::api::{self, WinError};
10use crate::sys::{c, cvt};
11use crate::{cmp, io, ptr, str};
12
13#[cfg(test)]
14mod tests;
15
16// Don't cache handles but get them fresh for every read/write. This allows us to track changes to
17// the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490.
18pub struct Stdin {
19    surrogate: u16,
20    incomplete_utf8: IncompleteUtf8,
21}
22
23pub struct Stdout {
24    incomplete_utf8: IncompleteUtf8,
25}
26
27pub struct Stderr {
28    incomplete_utf8: IncompleteUtf8,
29}
30
31struct IncompleteUtf8 {
32    bytes: [u8; 4],
33    len: u8,
34}
35
36impl IncompleteUtf8 {
37    // Implemented for use in Stdin::read.
38    fn read(&mut self, buf: &mut [u8]) -> usize {
39        // Write to buffer until the buffer is full or we run out of bytes.
40        let to_write = cmp::min(buf.len(), self.len as usize);
41        buf[..to_write].copy_from_slice(&self.bytes[..to_write]);
42
43        // Rotate the remaining bytes if not enough remaining space in buffer.
44        if usize::from(self.len) > buf.len() {
45            self.bytes.copy_within(to_write.., 0);
46            self.len -= to_write as u8;
47        } else {
48            self.len = 0;
49        }
50
51        to_write
52    }
53}
54
55// Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
56// #13304 for details).
57//
58// From MSDN (2011): "The storage for this buffer is allocated from a shared heap for the
59// process that is 64 KB in size. The maximum size of the buffer will depend on heap usage."
60//
61// We choose the cap at 8 KiB because libuv does the same, and it seems to be acceptable so far.
62const MAX_BUFFER_SIZE: usize = 8192;
63
64// The standard buffer size of BufReader for Stdin should be able to hold 3x more bytes than there
65// are `u16`'s in MAX_BUFFER_SIZE. This ensures the read data can always be completely decoded from
66// UTF-16 to UTF-8.
67pub const STDIN_BUF_SIZE: usize = MAX_BUFFER_SIZE / 2 * 3;
68
69pub fn get_handle(handle_id: u32) -> io::Result<c::HANDLE> {
70    let handle = unsafe { c::GetStdHandle(handle_id) };
71    if handle == c::INVALID_HANDLE_VALUE {
72        Err(io::Error::last_os_error())
73    } else if handle.is_null() {
74        Err(io::Error::from_raw_os_error(c::ERROR_INVALID_HANDLE as i32))
75    } else {
76        Ok(handle)
77    }
78}
79
80fn is_console(handle: c::HANDLE) -> bool {
81    // `GetConsoleMode` will return false (0) if this is a pipe (we don't care about the reported
82    // mode). This will only detect Windows Console, not other terminals connected to a pipe like
83    // MSYS. Which is exactly what we need, as only Windows Console needs a conversion to UTF-16.
84    let mut mode = 0;
85    unsafe { c::GetConsoleMode(handle, &mut mode) != 0 }
86}
87
88/// Returns true if the attached console's code page is currently UTF-8.
89#[cfg(not(target_vendor = "win7"))]
90fn is_utf8_console() -> bool {
91    unsafe { c::GetConsoleOutputCP() == c::CP_UTF8 }
92}
93
94#[cfg(target_vendor = "win7")]
95fn is_utf8_console() -> bool {
96    // Windows 7 has a fun "feature" where WriteFile on a console handle will return
97    // the number of UTF-16 code units written and not the number of bytes from the input string.
98    // So we always claim the console isn't UTF-8 to trigger the WriteConsole fallback code.
99    false
100}
101
102fn write(handle_id: u32, data: &[u8], incomplete_utf8: &mut IncompleteUtf8) -> io::Result<usize> {
103    if data.is_empty() {
104        return Ok(0);
105    }
106
107    let handle = get_handle(handle_id)?;
108    if !is_console(handle) || is_utf8_console() {
109        unsafe {
110            let handle = Handle::from_raw_handle(handle);
111            let ret = handle.write(data);
112            let _ = handle.into_raw_handle(); // Don't close the handle
113            return ret;
114        }
115    } else {
116        write_console_utf16(data, incomplete_utf8, handle)
117    }
118}
119
120fn write_console_utf16(
121    data: &[u8],
122    incomplete_utf8: &mut IncompleteUtf8,
123    handle: c::HANDLE,
124) -> io::Result<usize> {
125    if incomplete_utf8.len > 0 {
126        assert!(
127            incomplete_utf8.len < 4,
128            "Unexpected number of bytes for incomplete UTF-8 codepoint."
129        );
130        if data[0] >> 6 != 0b10 {
131            // not a continuation byte - reject
132            incomplete_utf8.len = 0;
133            return Err(io::const_error!(
134                io::ErrorKind::InvalidData,
135                "Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
136            ));
137        }
138        incomplete_utf8.bytes[incomplete_utf8.len as usize] = data[0];
139        incomplete_utf8.len += 1;
140        let char_width = utf8_char_width(incomplete_utf8.bytes[0]);
141        if (incomplete_utf8.len as usize) < char_width {
142            // more bytes needed
143            return Ok(1);
144        }
145        let s = str::from_utf8(&incomplete_utf8.bytes[0..incomplete_utf8.len as usize]);
146        incomplete_utf8.len = 0;
147        match s {
148            Ok(s) => {
149                assert_eq!(char_width, s.len());
150                let written = write_valid_utf8_to_console(handle, s)?;
151                assert_eq!(written, s.len()); // guaranteed by write_valid_utf8_to_console() for single codepoint writes
152                return Ok(1);
153            }
154            Err(_) => {
155                return Err(io::const_error!(
156                    io::ErrorKind::InvalidData,
157                    "Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
158                ));
159            }
160        }
161    }
162
163    // As the console is meant for presenting text, we assume bytes of `data` are encoded as UTF-8,
164    // which needs to be encoded as UTF-16.
165    //
166    // If the data is not valid UTF-8 we write out as many bytes as are valid.
167    // If the first byte is invalid it is either first byte of a multi-byte sequence but the
168    // provided byte slice is too short or it is the first byte of an invalid multi-byte sequence.
169    let len = cmp::min(data.len(), MAX_BUFFER_SIZE / 2);
170    let utf8 = match str::from_utf8(&data[..len]) {
171        Ok(s) => s,
172        Err(ref e) if e.valid_up_to() == 0 => {
173            let first_byte_char_width = utf8_char_width(data[0]);
174            if first_byte_char_width > 1 && data.len() < first_byte_char_width {
175                incomplete_utf8.bytes[0] = data[0];
176                incomplete_utf8.len = 1;
177                return Ok(1);
178            } else {
179                return Err(io::const_error!(
180                    io::ErrorKind::InvalidData,
181                    "Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
182                ));
183            }
184        }
185        Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
186    };
187
188    write_valid_utf8_to_console(handle, utf8)
189}
190
191fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usize> {
192    debug_assert!(!utf8.is_empty());
193
194    let mut utf16 = [MaybeUninit::<u16>::uninit(); MAX_BUFFER_SIZE / 2];
195    let utf8 = &utf8[..utf8.floor_char_boundary(utf16.len())];
196
197    let utf16: &[u16] = unsafe {
198        // Note that this theoretically checks validity twice in the (most common) case
199        // where the underlying byte sequence is valid utf-8 (given the check in `write()`).
200        let result = c::MultiByteToWideChar(
201            c::CP_UTF8,                          // CodePage
202            c::MB_ERR_INVALID_CHARS,             // dwFlags
203            utf8.as_ptr(),                       // lpMultiByteStr
204            utf8.len() as i32,                   // cbMultiByte
205            utf16.as_mut_ptr() as *mut c::WCHAR, // lpWideCharStr
206            utf16.len() as i32,                  // cchWideChar
207        );
208        assert!(result != 0, "Unexpected error in MultiByteToWideChar");
209
210        // Safety: MultiByteToWideChar initializes `result` values.
211        utf16[..result as usize].assume_init_ref()
212    };
213
214    let mut written = write_u16s(handle, utf16)?;
215
216    // Figure out how many bytes of as UTF-8 were written away as UTF-16.
217    if written == utf16.len() {
218        Ok(utf8.len())
219    } else {
220        // Make sure we didn't end up writing only half of a surrogate pair (even though the chance
221        // is tiny). Because it is not possible for user code to re-slice `data` in such a way that
222        // a missing surrogate can be produced (and also because of the UTF-8 validation above),
223        // write the missing surrogate out now.
224        // Buffering it would mean we have to lie about the number of bytes written.
225        let first_code_unit_remaining = utf16[written];
226        if matches!(first_code_unit_remaining, 0xDCEE..=0xDFFF) {
227            // low surrogate
228            // We just hope this works, and give up otherwise
229            let _ = write_u16s(handle, &utf16[written..written + 1]);
230            written += 1;
231        }
232        // Calculate the number of bytes of `utf8` that were actually written.
233        let mut count = 0;
234        for ch in utf16[..written].iter() {
235            count += match ch {
236                0x0000..=0x007F => 1,
237                0x0080..=0x07FF => 2,
238                0xDCEE..=0xDFFF => 1, // Low surrogate. We already counted 3 bytes for the other.
239                _ => 3,
240            };
241        }
242        debug_assert!(String::from_utf16(&utf16[..written]).unwrap() == utf8[..count]);
243        Ok(count)
244    }
245}
246
247fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
248    debug_assert!(data.len() < u32::MAX as usize);
249    let mut written = 0;
250    cvt(unsafe {
251        c::WriteConsoleW(handle, data.as_ptr(), data.len() as u32, &mut written, ptr::null_mut())
252    })?;
253    Ok(written as usize)
254}
255
256impl Stdin {
257    pub const fn new() -> Stdin {
258        Stdin { surrogate: 0, incomplete_utf8: IncompleteUtf8::new() }
259    }
260}
261
262impl io::Read for Stdin {
263    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
264        let handle = get_handle(c::STD_INPUT_HANDLE)?;
265        if !is_console(handle) {
266            unsafe {
267                let handle = Handle::from_raw_handle(handle);
268                let ret = handle.read(buf);
269                let _ = handle.into_raw_handle(); // Don't close the handle
270                return ret;
271            }
272        }
273
274        // If there are bytes in the incomplete utf-8, start with those.
275        // (No-op if there is nothing in the buffer.)
276        let mut bytes_copied = self.incomplete_utf8.read(buf);
277
278        if bytes_copied == buf.len() {
279            Ok(bytes_copied)
280        } else if buf.len() - bytes_copied < 4 {
281            // Not enough space to get a UTF-8 byte. We will use the incomplete UTF8.
282            let mut utf16_buf = [MaybeUninit::new(0); 1];
283            // Read one u16 character.
284            let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, 1, &mut self.surrogate)?;
285            // Read bytes, using the (now-empty) self.incomplete_utf8 as extra space.
286            let read_bytes = utf16_to_utf8(
287                unsafe { utf16_buf[..read].assume_init_ref() },
288                &mut self.incomplete_utf8.bytes,
289            )?;
290
291            // Read in the bytes from incomplete_utf8 until the buffer is full.
292            self.incomplete_utf8.len = read_bytes as u8;
293            // No-op if no bytes.
294            bytes_copied += self.incomplete_utf8.read(&mut buf[bytes_copied..]);
295            Ok(bytes_copied)
296        } else {
297            let mut utf16_buf = [MaybeUninit::<u16>::uninit(); MAX_BUFFER_SIZE / 2];
298
299            // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
300            // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
301            // lost.
302            let amount = cmp::min(buf.len() / 3, utf16_buf.len());
303            let read =
304                read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?;
305            // Safety `read_u16s_fixup_surrogates` returns the number of items
306            // initialized.
307            let utf16s = unsafe { utf16_buf[..read].assume_init_ref() };
308            match utf16_to_utf8(utf16s, buf) {
309                Ok(value) => return Ok(bytes_copied + value),
310                Err(e) => return Err(e),
311            }
312        }
313    }
314}
315
316// We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our
317// buffer size, and keep it around for the next read hoping to put them together.
318// This is a best effort, and might not work if we are not the only reader on Stdin.
319fn read_u16s_fixup_surrogates(
320    handle: c::HANDLE,
321    buf: &mut [MaybeUninit<u16>],
322    mut amount: usize,
323    surrogate: &mut u16,
324) -> io::Result<usize> {
325    // Insert possibly remaining unpaired surrogate from last read.
326    let mut start = 0;
327    if *surrogate != 0 {
328        buf[0] = MaybeUninit::new(*surrogate);
329        *surrogate = 0;
330        start = 1;
331        if amount == 1 {
332            // Special case: `Stdin::read` guarantees we can always read at least one new `u16`
333            // and combine it with an unpaired surrogate, because the UTF-8 buffer is at least
334            // 4 bytes.
335            amount = 2;
336        }
337    }
338    let mut amount = read_u16s(handle, &mut buf[start..amount])? + start;
339
340    if amount > 0 {
341        // Safety: The returned `amount` is the number of values initialized,
342        // and it is not 0, so we know that `buf[amount - 1]` have been
343        // initialized.
344        let last_char = unsafe { buf[amount - 1].assume_init() };
345        if matches!(last_char, 0xD800..=0xDBFF) {
346            // high surrogate
347            *surrogate = last_char;
348            amount -= 1;
349        }
350    }
351    Ok(amount)
352}
353
354// Returns `Ok(n)` if it initialized `n` values in `buf`.
355fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usize> {
356    // Configure the `pInputControl` parameter to not only return on `\r\n` but also Ctrl-Z, the
357    // traditional DOS method to indicate end of character stream / user input (SUB).
358    // See #38274 and https://stackoverflow.com/questions/43836040/win-api-readconsole.
359    const CTRL_Z: u16 = 0x1A;
360    const CTRL_Z_MASK: u32 = 1 << CTRL_Z;
361    let input_control = c::CONSOLE_READCONSOLE_CONTROL {
362        nLength: size_of::<c::CONSOLE_READCONSOLE_CONTROL>() as u32,
363        nInitialChars: 0,
364        dwCtrlWakeupMask: CTRL_Z_MASK,
365        dwControlKeyState: 0,
366    };
367
368    let mut amount = 0;
369    loop {
370        cvt(unsafe {
371            c::SetLastError(0);
372            c::ReadConsoleW(
373                handle,
374                buf.as_mut_ptr() as *mut core::ffi::c_void,
375                buf.len() as u32,
376                &mut amount,
377                &input_control,
378            )
379        })?;
380
381        // ReadConsoleW returns success with ERROR_OPERATION_ABORTED for Ctrl-C or Ctrl-Break.
382        // Explicitly check for that case here and try again.
383        if amount == 0 && api::get_last_error() == WinError::OPERATION_ABORTED {
384            continue;
385        }
386        break;
387    }
388    // Safety: if `amount > 0`, then that many bytes were written, so
389    // `buf[amount as usize - 1]` has been initialized.
390    if amount > 0 && unsafe { buf[amount as usize - 1].assume_init() } == CTRL_Z {
391        amount -= 1;
392    }
393    Ok(amount as usize)
394}
395
396fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
397    debug_assert!(utf16.len() <= i32::MAX as usize);
398    debug_assert!(utf8.len() <= i32::MAX as usize);
399
400    if utf16.is_empty() {
401        return Ok(0);
402    }
403
404    let result = unsafe {
405        c::WideCharToMultiByte(
406            c::CP_UTF8,              // CodePage
407            c::WC_ERR_INVALID_CHARS, // dwFlags
408            utf16.as_ptr(),          // lpWideCharStr
409            utf16.len() as i32,      // cchWideChar
410            utf8.as_mut_ptr(),       // lpMultiByteStr
411            utf8.len() as i32,       // cbMultiByte
412            ptr::null(),             // lpDefaultChar
413            ptr::null_mut(),         // lpUsedDefaultChar
414        )
415    };
416    if result == 0 {
417        // We can't really do any better than forget all data and return an error.
418        Err(io::const_error!(
419            io::ErrorKind::InvalidData,
420            "Windows stdin in console mode does not support non-UTF-16 input; \
421            encountered unpaired surrogate",
422        ))
423    } else {
424        Ok(result as usize)
425    }
426}
427
428impl IncompleteUtf8 {
429    pub const fn new() -> IncompleteUtf8 {
430        IncompleteUtf8 { bytes: [0; MAX_LEN_UTF8], len: 0 }
431    }
432}
433
434impl Stdout {
435    pub const fn new() -> Stdout {
436        Stdout { incomplete_utf8: IncompleteUtf8::new() }
437    }
438}
439
440impl io::Write for Stdout {
441    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
442        write(c::STD_OUTPUT_HANDLE, buf, &mut self.incomplete_utf8)
443    }
444
445    fn flush(&mut self) -> io::Result<()> {
446        Ok(())
447    }
448}
449
450impl Stderr {
451    pub const fn new() -> Stderr {
452        Stderr { incomplete_utf8: IncompleteUtf8::new() }
453    }
454}
455
456impl io::Write for Stderr {
457    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
458        write(c::STD_ERROR_HANDLE, buf, &mut self.incomplete_utf8)
459    }
460
461    fn flush(&mut self) -> io::Result<()> {
462        Ok(())
463    }
464}
465
466pub fn is_ebadf(err: &io::Error) -> bool {
467    err.raw_os_error() == Some(c::ERROR_INVALID_HANDLE as i32)
468}
469
470pub fn panic_output() -> Option<impl io::Write> {
471    Some(Stderr::new())
472}