1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
use crate::ffi::OsStr;
use crate::mem;
use crate::path::Prefix;

#[cfg(test)]
mod tests;

pub const MAIN_SEP_STR: &str = "\\";
pub const MAIN_SEP: char = '\\';

/// # Safety
///
/// `bytes` must be a valid wtf8 encoded slice
#[inline]
unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr {
    // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8,
    // which is compatible with &[u8].
    mem::transmute(bytes)
}

#[inline]
pub fn is_sep_byte(b: u8) -> bool {
    b == b'/' || b == b'\\'
}

#[inline]
pub fn is_verbatim_sep(b: u8) -> bool {
    b == b'\\'
}

pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
    use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};

    if let Some(path) = strip_prefix(path, r"\\") {
        // \\
        if let Some(path) = strip_prefix(path, r"?\") {
            // \\?\
            if let Some(path) = strip_prefix(path, r"UNC\") {
                // \\?\UNC\server\share

                let (server, path) = parse_next_component(path, true);
                let (share, _) = parse_next_component(path, true);

                Some(VerbatimUNC(server, share))
            } else {
                let (prefix, _) = parse_next_component(path, true);

                // in verbatim paths only recognize an exact drive prefix
                if let Some(drive) = parse_drive_exact(prefix) {
                    // \\?\C:
                    Some(VerbatimDisk(drive))
                } else {
                    // \\?\prefix
                    Some(Verbatim(prefix))
                }
            }
        } else if let Some(path) = strip_prefix(path, r".\") {
            // \\.\COM42
            let (prefix, _) = parse_next_component(path, false);
            Some(DeviceNS(prefix))
        } else {
            let (server, path) = parse_next_component(path, false);
            let (share, _) = parse_next_component(path, false);

            if !server.is_empty() && !share.is_empty() {
                // \\server\share
                Some(UNC(server, share))
            } else {
                // no valid prefix beginning with "\\" recognized
                None
            }
        }
    } else if let Some(drive) = parse_drive(path) {
        // C:
        Some(Disk(drive))
    } else {
        // no prefix
        None
    }
}

// Parses a drive prefix, e.g. "C:" and "C:\whatever"
fn parse_drive(prefix: &OsStr) -> Option<u8> {
    // In most DOS systems, it is not possible to have more than 26 drive letters.
    // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
    fn is_valid_drive_letter(drive: &u8) -> bool {
        drive.is_ascii_alphabetic()
    }

    match prefix.bytes() {
        [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
        _ => None,
    }
}

// Parses a drive prefix exactly, e.g. "C:"
fn parse_drive_exact(prefix: &OsStr) -> Option<u8> {
    // only parse two bytes: the drive letter and the drive separator
    if prefix.len() == 2 { parse_drive(prefix) } else { None }
}

fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> {
    // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]`
    // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice.
    match path.bytes().strip_prefix(prefix.as_bytes()) {
        Some(path) => unsafe { Some(bytes_as_os_str(path)) },
        None => None,
    }
}

// Parse the next path component.
//
// Returns the next component and the rest of the path excluding the component and separator.
// Does not recognize `/` as a separator character if `verbatim` is true.
fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
    let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };

    match path.bytes().iter().position(|&x| separator(x)) {
        Some(separator_start) => {
            let mut separator_end = separator_start + 1;

            // a series of multiple separator characters is treated as a single separator,
            // except in verbatim paths
            while !verbatim && separator_end < path.len() && separator(path.bytes()[separator_end])
            {
                separator_end += 1;
            }

            let component = &path.bytes()[..separator_start];

            // Panic safe
            // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
            let path = &path.bytes()[separator_end..];

            // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
            // is encoded in a single byte, therefore `bytes[separator_start]` and
            // `bytes[separator_end]` must be code point boundaries and thus
            // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
            unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) }
        }
        None => (path, OsStr::new("")),
    }
}