silo:std.codepoint

Source src/silo:std.codepoint

1##! Codepoint operations — Unicode scalar-value methods on `:impl Codepoint`.
2##!
3##! Covers classification, case conversion, integer and string
4##! round-trips, UTF-8 / UTF-16 length queries, and Rust-style escape
5##! renderings.
6
7:use
8  :open core AnyInt Bool Codepoint Option None Some Str
9:end
10
11# si[impl codepoint.ascii]
12## 7-bit ASCII subset of Codepoint.
13:alias(pub) Ascii (Int 0..128)
14
15# si[impl codepoint.error.try-from]
16## Error produced by `(Codepoint .try-from)` when an integer does not
17## represent a valid Unicode scalar value.
18:union(pub) CodepointTryFromError
19  | OutOfRange AnyInt
20  | Surrogate AnyInt
21:end
22
23# si[impl codepoint.error.parse]
24## Error produced when parsing a `Codepoint` from a `Str`.
25:union(pub) ParseCodepointError
26  | Empty
27  | TooManyCodepoints
28  | InvalidScalar
29:end
30
31:impl Codepoint
32  # si[impl codepoint.is-alpha]
33  ## True iff the codepoint is Unicode-alphabetic.
34  .is-alpha ( Codepoint -> Bool )
35    codepoint-is-alpha ;
36
37  # si[impl codepoint.is-digit]
38  ## True iff the codepoint is a Unicode decimal digit (Nd).
39  .is-digit ( Codepoint -> Bool )
40    codepoint-is-digit ;
41
42  # si[impl codepoint.is-alphanumeric]
43  ## True iff the codepoint is alphabetic or a digit.
44  .is-alphanumeric ( Codepoint -> Bool )
45    codepoint-is-alphanumeric ;
46
47  # si[impl codepoint.is-upper]
48  ## True iff the codepoint is an uppercase letter.
49  .is-upper ( Codepoint -> Bool )
50    codepoint-is-upper ;
51
52  # si[impl codepoint.is-lower]
53  ## True iff the codepoint is a lowercase letter.
54  .is-lower ( Codepoint -> Bool )
55    codepoint-is-lower ;
56
57  # si[impl codepoint.is-whitespace]
58  ## True iff the codepoint is whitespace.
59  .is-whitespace ( Codepoint -> Bool )
60    codepoint-is-whitespace ;
61
62  # si[impl codepoint.is-ascii]
63  ## True iff the codepoint is in the 0..128 ASCII range.
64  .is-ascii ( Codepoint -> Bool )
65    codepoint-is-ascii ;
66
67  # si[impl codepoint.is-ascii-digit]
68  ## True iff the codepoint is `'0'` .. `'9'`.
69  .is-ascii-digit ( Codepoint -> Bool )
70    codepoint-is-ascii-digit ;
71
72  # si[impl codepoint.is-ascii-alpha]
73  ## True iff the codepoint is `'A'` .. `'Z'` or `'a'` .. `'z'`.
74  .is-ascii-alpha ( Codepoint -> Bool )
75    codepoint-is-ascii-alpha ;
76
77  # si[impl codepoint.is-control]
78  ## True iff the codepoint is a Unicode control character (Cc).
79  .is-control ( Codepoint -> Bool )
80    codepoint-is-control ;
81
82  # si[impl codepoint.is-numeric]
83  ## True iff the codepoint has Numeric_Type != None (broader than Nd).
84  .is-numeric ( Codepoint -> Bool )
85    codepoint-is-numeric ;
86
87  # si[impl codepoint.to-upper]
88  ## Unicode uppercase equivalent (first codepoint of the mapping).
89  .to-upper ( Codepoint -> Codepoint )
90    codepoint-to-upper ;
91
92  # si[impl codepoint.to-lower]
93  ## Unicode lowercase equivalent (first codepoint of the mapping).
94  .to-lower ( Codepoint -> Codepoint )
95    codepoint-to-lower ;
96
97  # si[impl codepoint.to-int]
98  ## Integer codepoint value (0 .. 0x10FFFF).
99  .to-int ( Codepoint -> AnyInt )
100    char-to-int ;
101
102  # si[impl codepoint.to-str]
103  ## Single-codepoint `Str`.
104  .to-str ( Codepoint -> Str )
105    char-to-str ;
106
107  # si[impl codepoint.len-utf8]
108  ## Number of bytes to encode the codepoint in UTF-8 (1, 2, 3, or 4).
109  .len-utf8 ( Codepoint -> AnyInt )
110    codepoint-len-utf8 ;
111
112  # si[impl codepoint.len-utf16]
113  ## Number of UTF-16 code units to encode the codepoint (1 or 2).
114  .len-utf16 ( Codepoint -> AnyInt )
115    codepoint-len-utf16 ;
116
117  # si[impl codepoint.escape-default]
118  ## Rust-style escape rendering suitable for string literals.
119  .escape-default ( Codepoint -> Str )
120    codepoint-escape-default ;
121
122  # si[impl codepoint.escape-unicode]
123  ## `\u{XXXX}` uppercase hex escape form.
124  .escape-unicode ( Codepoint -> Str )
125    codepoint-escape-unicode ;
126
127  # si[impl codepoint.escape-debug]
128  ## Rust-style debug escape (leaves printable Unicode unescaped).
129  .escape-debug ( Codepoint -> Str )
130    codepoint-escape-debug ;
131
132  # si[impl codepoint.to-digit]
133  ## Digit value in the given radix (2..=36), or `None`.
134  .to-digit ( AnyInt Codepoint -> (Option AnyInt) )
135    codepoint-to-digit ;
136:end