aboutsummaryrefslogtreecommitdiff
path: root/uri.go
diff options
context:
space:
mode:
authorGravatar Erik Dubbelboer <erik@dubbelboer.com> 2021-10-01 13:33:42 +0200
committerGravatar Erik Dubbelboer <erik@dubbelboer.com> 2021-10-01 13:38:31 +0200
commit542a203b42a3cb722244e1ef74fda0804a926383 (patch)
tree5690b21323c3ae6a779603e4ac3a21c1bada9a85 /uri.go
parentfeat: improve TCPDialer by `sync.map` instead of `map+mutex` (#1106) (diff)
downloadfasthttp-542a203b42a3cb722244e1ef74fda0804a926383.tar.gz
fasthttp-542a203b42a3cb722244e1ef74fda0804a926383.tar.bz2
fasthttp-542a203b42a3cb722244e1ef74fda0804a926383.zip
Properly parse URI
Use URI parse code based on net/uri to validate hostnames.
Diffstat (limited to 'uri.go')
-rw-r--r--uri.go221
1 files changed, 221 insertions, 0 deletions
diff --git a/uri.go b/uri.go
index b57b1f7..a2ca6a5 100644
--- a/uri.go
+++ b/uri.go
@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
+ "strconv"
"sync"
)
@@ -298,6 +299,10 @@ func (u *URI) parse(host, uri []byte, isTLS bool) error {
}
}
+ host, err := parseHost(host)
+ if err != nil {
+ return err
+ }
u.host = append(u.host, host...)
lowercaseBytes(u.host)
@@ -338,6 +343,222 @@ func (u *URI) parse(host, uri []byte, isTLS bool) error {
return nil
}
+// parseHost parses host as an authority without user
+// information. That is, as host[:port].
+//
+// Based on https://github.com/golang/go/blob/8ac5cbe05d61df0a7a7c9a38ff33305d4dcfea32/src/net/url/url.go#L619
+func parseHost(host []byte) ([]byte, error) {
+ if len(host) > 0 && host[0] == '[' {
+ // Parse an IP-Literal in RFC 3986 and RFC 6874.
+ // E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80".
+ i := bytes.LastIndexByte(host, ']')
+ if i < 0 {
+ return nil, errors.New("missing ']' in host")
+ }
+ colonPort := host[i+1:]
+ if !validOptionalPort(colonPort) {
+ return nil, fmt.Errorf("invalid port %q after host", colonPort)
+ }
+
+ // RFC 6874 defines that %25 (%-encoded percent) introduces
+ // the zone identifier, and the zone identifier can use basically
+ // any %-encoding it likes. That's different from the host, which
+ // can only %-encode non-ASCII bytes.
+ // We do impose some restrictions on the zone, to avoid stupidity
+ // like newlines.
+ zone := bytes.Index(host[:i], []byte("%25"))
+ if zone >= 0 {
+ host1, err := unescape(host[:zone], encodeHost)
+ if err != nil {
+ return nil, err
+ }
+ host2, err := unescape(host[zone:i], encodeZone)
+ if err != nil {
+ return nil, err
+ }
+ host3, err := unescape(host[i:], encodeHost)
+ if err != nil {
+ return nil, err
+ }
+ return append(host1, append(host2, host3...)...), nil
+ }
+ } else if i := bytes.LastIndexByte(host, ':'); i != -1 {
+ colonPort := host[i:]
+ if !validOptionalPort(colonPort) {
+ return nil, fmt.Errorf("invalid port %q after host", colonPort)
+ }
+ }
+
+ var err error
+ if host, err = unescape(host, encodeHost); err != nil {
+ return nil, err
+ }
+ return host, nil
+}
+
+type encoding int
+
+const (
+ encodeHost encoding = 1 + iota
+ encodeZone
+)
+
+type EscapeError string
+
+func (e EscapeError) Error() string {
+ return "invalid URL escape " + strconv.Quote(string(e))
+}
+
+type InvalidHostError string
+
+func (e InvalidHostError) Error() string {
+ return "invalid character " + strconv.Quote(string(e)) + " in host name"
+}
+
+// unescape unescapes a string; the mode specifies
+// which section of the URL string is being unescaped.
+//
+// Based on https://github.com/golang/go/blob/8ac5cbe05d61df0a7a7c9a38ff33305d4dcfea32/src/net/url/url.go#L199
+func unescape(s []byte, mode encoding) ([]byte, error) {
+ // Count %, check that they're well-formed.
+ n := 0
+ for i := 0; i < len(s); {
+ switch s[i] {
+ case '%':
+ n++
+ if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
+ s = s[i:]
+ if len(s) > 3 {
+ s = s[:3]
+ }
+ return nil, EscapeError(s)
+ }
+ // Per https://tools.ietf.org/html/rfc3986#page-21
+ // in the host component %-encoding can only be used
+ // for non-ASCII bytes.
+ // But https://tools.ietf.org/html/rfc6874#section-2
+ // introduces %25 being allowed to escape a percent sign
+ // in IPv6 scoped-address literals. Yay.
+ if mode == encodeHost && unhex(s[i+1]) < 8 && !bytes.Equal(s[i:i+3], []byte("%25")) {
+ return nil, EscapeError(s[i : i+3])
+ }
+ if mode == encodeZone {
+ // RFC 6874 says basically "anything goes" for zone identifiers
+ // and that even non-ASCII can be redundantly escaped,
+ // but it seems prudent to restrict %-escaped bytes here to those
+ // that are valid host name bytes in their unescaped form.
+ // That is, you can use escaping in the zone identifier but not
+ // to introduce bytes you couldn't just write directly.
+ // But Windows puts spaces here! Yay.
+ v := unhex(s[i+1])<<4 | unhex(s[i+2])
+ if !bytes.Equal(s[i:i+3], []byte("%25")) && v != ' ' && shouldEscape(v, encodeHost) {
+ return nil, EscapeError(s[i : i+3])
+ }
+ }
+ i += 3
+ default:
+ if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
+ return nil, InvalidHostError(s[i : i+1])
+ }
+ i++
+ }
+ }
+
+ if n == 0 {
+ return s, nil
+ }
+
+ t := s[:0]
+ for i := 0; i < len(s); i++ {
+ switch s[i] {
+ case '%':
+ t = append(t, unhex(s[i+1])<<4|unhex(s[i+2]))
+ i += 2
+ default:
+ t = append(t, s[i])
+ }
+ }
+ return t, nil
+}
+
+// Return true if the specified character should be escaped when
+// appearing in a URL string, according to RFC 3986.
+//
+// Please be informed that for now shouldEscape does not check all
+// reserved characters correctly. See golang.org/issue/5684.
+//
+// Based on https://github.com/golang/go/blob/8ac5cbe05d61df0a7a7c9a38ff33305d4dcfea32/src/net/url/url.go#L100
+func shouldEscape(c byte, mode encoding) bool {
+ // §2.3 Unreserved characters (alphanum)
+ if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
+ return false
+ }
+
+ if mode == encodeHost || mode == encodeZone {
+ // §3.2.2 Host allows
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ // as part of reg-name.
+ // We add : because we include :port as part of host.
+ // We add [ ] because we include [ipv6]:port as part of host.
+ // We add < > because they're the only characters left that
+ // we could possibly allow, and Parse will reject them if we
+ // escape them (because hosts can't use %-encoding for
+ // ASCII bytes).
+ switch c {
+ case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
+ return false
+ }
+ }
+
+ if c == '-' || c == '_' || c == '.' || c == '~' { // §2.3 Unreserved characters (mark)
+ return false
+ }
+
+ // Everything else must be escaped.
+ return true
+}
+
+func ishex(c byte) bool {
+ switch {
+ case '0' <= c && c <= '9':
+ return true
+ case 'a' <= c && c <= 'f':
+ return true
+ case 'A' <= c && c <= 'F':
+ return true
+ }
+ return false
+}
+
+func unhex(c byte) byte {
+ switch {
+ case '0' <= c && c <= '9':
+ return c - '0'
+ case 'a' <= c && c <= 'f':
+ return c - 'a' + 10
+ case 'A' <= c && c <= 'F':
+ return c - 'A' + 10
+ }
+ return 0
+}
+
+// validOptionalPort reports whether port is either an empty string
+// or matches /^:\d*$/
+func validOptionalPort(port []byte) bool {
+ if len(port) == 0 {
+ return true
+ }
+ if port[0] != ':' {
+ return false
+ }
+ for _, b := range port[1:] {
+ if b < '0' || b > '9' {
+ return false
+ }
+ }
+ return true
+}
+
func normalizePath(dst, src []byte) []byte {
dst = dst[:0]
dst = addLeadingSlash(dst, src)