RFC 3987: IRI syntax
IRI is a superset of the URI, adding support for Unicode characters.
- Main Article
- URI (Uniform Resource Identifier)
- Specification
- RFC 3987: Internationalized Resource Identifiers (IRIs)
- Imported
Defined rules
IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]
ihier-part = "//" iauthority ipath-abempty
/ ipath-absolute
/ ipath-rootless
/ ipath-empty
IRI-reference = IRI / irelative-ref
absolute-IRI = scheme ":" ihier-part [ "?" iquery ]
irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]
irelative-part = "//" iauthority ipath-abempty
/ ipath-absolute
/ ipath-noscheme
/ ipath-empty
iauthority = [ iuserinfo "@" ] ihost [ ":" port ]
iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" )
ihost = IP-literal / IPv4address / ireg-name
ireg-name = *( iunreserved / pct-encoded / sub-delims )
ipath = ipath-abempty ; begins with "/" or is empty
/ ipath-absolute ; begins with "/" but not "//"
/ ipath-noscheme ; begins with a non-colon segment
/ ipath-rootless ; begins with a segment
/ ipath-empty ; zero characters
ipath-abempty = *( "/" isegment )
ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ]
ipath-noscheme = isegment-nz-nc *( "/" isegment )
ipath-rootless = isegment-nz *( "/" isegment )
ipath-empty = 0<ipchar>
isegment = *ipchar
isegment-nz = 1*ipchar
isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
/ "@" )
; non-zero-length segment without any colon ":"
ipchar = iunreserved / pct-encoded / sub-delims / ":"
/ "@"
iquery = *( ipchar / iprivate / "/" / "?" )
ifragment = *( ipchar / "/" / "?" )
iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
/ %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
/ %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
/ %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
/ %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
/ %xD0000-DFFFD / %xE1000-EFFFD
iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD