ws/internal/uri/parser.go

320 lines
6.5 KiB
Go

package uri
import (
"fmt"
"strings"
)
// === WILDCARDS ===
//
// The star '*' -> matches 0 or 1 slash-bounded string
// The multi star '**' -> matches 0 or more slash-separated strings
// The dot '.' -> matches 1 slash-bounded string
// The multi dot '..' -> matches 1 or more slash-separated strings
//
// === SCHEME POLICY ===
//
// - The last '/' is optional
// - Any '**' at the very end will match anything that starts with the given prefix
//
// === LIMITATIONS ==
//
// - A scheme must begin with '/'
// - A scheme cannot contain something else than a STRING or WILDCARD between 2 '/' separators
// - A scheme STRING cannot contain the symbols '/' as a character
// - A scheme STRING containing '*' or '.' characters will be treating as STRING only
// - A maximum of 16 slash-separated matchers (STRING or WILDCARD) are allowed
const maxMatch = 16
// Represents an URI matcher
type matcher struct {
pat string // pattern to match (empty if wildcard)
req bool // whether it is required
mul bool // whether multiple matches are allowed
buf []string // matched content (when matching)
}
// Scheme represents an URI scheme
type Scheme []*matcher
// FromString builds an URI scheme from a string pattern
func FromString(s string) (*Scheme, error) {
// handle '/' at the start
if len(s) < 1 || s[0] != '/' {
return nil, fmt.Errorf("invalid URI; must start with '/'")
}
parts := strings.Split(s, "/")
// check max match size
if len(parts)-2 > maxMatch {
for i, p := range parts {
fmt.Printf("%d: '%s'\n", i, p)
}
return nil, fmt.Errorf("URI must not exceed %d slash-separated components, got %d", maxMatch, len(parts))
}
sch, err := buildScheme(parts)
if err != nil {
return nil, err
}
opti, err := sch.optimise()
if err != nil {
return nil, err
}
return &opti, nil
}
// Match returns whether the given URI is matched by the scheme
func (s Scheme) Match(uri string) bool {
if len(s) == 0 {
return true
}
// check for string match
clearURI, match := s.matchString(uri)
if !match {
return false
}
// check for non-string match (wildcards)
return s.matchWildcards(clearURI)
}
// GetMatch returns the indexed match (excluding string matchers)
func (s Scheme) GetMatch(n uint8) ([]string, error) {
if n > uint8(len(s)) {
return nil, fmt.Errorf("index out of range")
}
// iterate to find index (exclude strings)
matches := -1
for _, m := range s {
if len(m.pat) > 0 {
continue
}
matches++
// expected index -> return matches
if uint8(matches) == n {
return m.buf, nil
}
}
// nothing found -> return empty set
return nil, fmt.Errorf("index out of range (max: %d)", matches)
}
// GetAllMatch returns all the indexed match (excluding string matchers)
func (s Scheme) GetAllMatch() [][]string {
match := make([][]string, 0, len(s))
for _, m := range s {
if len(m.pat) > 0 {
continue
}
match = append(match, m.buf)
}
return match
}
// buildScheme builds a 'basic' scheme
// from a pattern string
func buildScheme(ss []string) (Scheme, error) {
sch := make(Scheme, 0, maxMatch)
for _, s := range ss {
if len(s) == 0 {
continue
}
m := &matcher{}
switch s {
// card: 0, N
case "**":
m.req = false
m.mul = true
sch = append(sch, m)
// card: 1, N
case "..":
m.req = true
m.mul = true
sch = append(sch, m)
// card: 0, 1
case "*":
m.req = false
m.mul = false
sch = append(sch, m)
// card: 1
case ".":
m.req = true
m.mul = false
sch = append(sch, m)
// card: 1, literal string
default:
m.req = true
m.mul = false
m.pat = fmt.Sprintf("/%s", s)
sch = append(sch, m)
}
}
return sch, nil
}
// optimise optimised the scheme for further parsing
func (s Scheme) optimise() (Scheme, error) {
if len(s) <= 1 {
return s, nil
}
// init reshifted scheme
rshift := make(Scheme, 0, maxMatch)
rshift = append(rshift, s[0])
// iterate over matchers
for p, i, l := 0, 1, len(s); i < l; i++ {
pre, cur := s[p], s[i]
// merge: 2 following literals
if len(pre.pat) > 0 && len(cur.pat) > 0 {
// merge strings into previous
pre.pat = fmt.Sprintf("%s%s", pre.pat, cur.pat)
// delete current
s[i] = nil
}
// increment previous (only if current is not nul)
if s[i] != nil {
rshift = append(rshift, s[i])
p = i
}
}
return rshift, nil
}
// matchString checks the STRING matchers from an URI
// - returns a boolean : false when not matching, true eitherway
// - returns a cleared uri, without STRING data
func (s Scheme) matchString(uri string) (string, bool) {
var (
clearedInput = uri
minOffset = 0
)
for _, m := range s {
ls := len(m.pat)
// ignore no STRING match
if ls == 0 {
continue
}
// get offset in URI (else -1)
off := strings.Index(clearedInput, m.pat)
if off < 0 {
return "", false
}
// fail on invalid offset range
if off < minOffset {
return "", false
}
// check for trailing '/'
hasSlash := 0
if off+ls < len(clearedInput) && clearedInput[off+ls] == '/' {
hasSlash = 1
}
// remove the current string (+trailing slash) from the URI
beg, end := clearedInput[:off], clearedInput[off+ls+hasSlash:]
clearedInput = fmt.Sprintf("%s\a/%s", beg, end) // separate matches with a '\a' character
// update offset range
// +2 slash separators
// -1 because strings begin with 1 slash already
minOffset = len(beg) + 2 - 1
}
// if exists, remove trailing '/'
if clearedInput[len(clearedInput)-1] == '/' {
clearedInput = clearedInput[:len(clearedInput)-1]
}
// if exists, remove trailing '\a'
if clearedInput[len(clearedInput)-1] == '\a' {
clearedInput = clearedInput[:len(clearedInput)-1]
}
return clearedInput, true
}
// matchWildcards check the WILCARDS (non-string) matchers from
// a cleared URI. it returns if the string matches
// + it sets the matchers buffers for later extraction
func (s Scheme) matchWildcards(clear string) bool {
// extract wildcards (ref)
wildcards := make(Scheme, 0, maxMatch)
for _, m := range s {
if len(m.pat) == 0 {
m.buf = nil // flush buffers
wildcards = append(wildcards, m)
}
}
if len(wildcards) == 0 {
return true
}
// break uri by '\a' characters
matches := strings.Split(clear, "\a")[1:]
for n, match := range matches {
// no more matcher
if n >= len(wildcards) {
return false
}
// from index 1 because it begins with '/'
data := strings.Split(match, "/")[1:]
// missing required
if wildcards[n].req && len(data) < 1 {
return false
}
// if not multi but got multi
if !wildcards[n].mul && len(data) > 1 {
return false
}
wildcards[n].buf = data
}
return true
}