ws/internal/uri/parser.go

361 lines
7.0 KiB
Go
Raw Normal View History

2021-05-14 15:23:33 +00:00
package uri
import (
"fmt"
2018-09-29 12:39:12 +00:00
"strings"
)
2021-05-14 15:23:33 +00:00
// === WILDCARDS ===
//
// The star '*' -> matches 0 or 1 slash-bounded string
// The multi star '**' -> matches 0 or more slash-separated strings
// The dot '.' -> matches 1 slash-bounded string
// The multi dot '..' -> matches 1 or more slash-separated strings
//
// === SCHEME POLICY ===
//
// - The last '/' is optional
// - Any '**' at the very end will match anything that starts with the given prefix
//
// === LIMITATIONS ==
//
// - A scheme must begin with '/'
// - A scheme cannot contain something else than a STRING or WILDCARD between 2 '/' separators
// - A scheme STRING cannot contain the symbols '/' as a character
// - A scheme STRING containing '*' or '.' characters will be treating as STRING only
// - A maximum of 16 slash-separated matchers (STRING or WILDCARD) are allowed
const maxMatch = 16
// Represents an URI matcher
type matcher struct {
pat string // pattern to match (empty if wildcard)
req bool // whether it is required
mul bool // whether multiple matches are allowed
buf []string // matched content (when matching)
}
// Scheme represents an URI scheme
type Scheme []*matcher
// FromString builds an URI scheme from a pattern string
func FromString(s string) (*Scheme, error) {
// 1. Manage '/' at the start
if len(s) < 1 || s[0] != '/' {
return nil, fmt.Errorf("invalid URI; must start with '/'")
2021-05-14 15:23:33 +00:00
}
// 2. Split by '/'
parts := strings.Split(s, "/")
// 3. Max exceeded
if len(parts)-2 > maxMatch {
for i, p := range parts {
fmt.Printf("%d: '%s'\n", i, p)
}
return nil, fmt.Errorf("URI must not exceed %d slash-separated components, got %d", maxMatch, len(parts))
}
// 4. Build for each part
sch, err := buildScheme(parts)
if err != nil {
return nil, err
}
// 5. Optimise structure
opti, err := sch.optimise()
if err != nil {
return nil, err
}
return &opti, nil
}
// Match returns if the given URI is matched by the scheme
func (s Scheme) Match(str string) bool {
// 1. Nothing -> match all
if len(s) == 0 {
return true
}
// 2. Check for string match
clearURI, match := s.matchString(str)
if !match {
return false
}
// 3. Check for non-string match (wildcards)
match = s.matchWildcards(clearURI)
if !match {
return false
}
return true
}
// GetMatch returns the indexed match (excluding string matchers)
func (s Scheme) GetMatch(n uint8) ([]string, error) {
// 1. Index out of range
if n > uint8(len(s)) {
return nil, fmt.Errorf("index out of range")
2021-05-14 15:23:33 +00:00
}
// 2. Iterate to find index (exclude strings)
ni := -1
for _, m := range s {
// ignore strings
if len(m.pat) > 0 {
continue
}
// increment match counter : ni
ni++
// if expected index -> return matches
if uint8(ni) == n {
return m.buf, nil
}
}
// 3. If nothing found -> return empty set
return nil, fmt.Errorf("index out of range (max: %d)", ni)
2021-05-14 15:23:33 +00:00
}
// GetAllMatch returns all the indexed match (excluding string matchers)
func (s Scheme) GetAllMatch() [][]string {
match := make([][]string, 0, len(s))
for _, m := range s {
// ignore strings
if len(m.pat) > 0 {
continue
}
match = append(match, m.buf)
}
return match
}
// buildScheme builds a 'basic' scheme
// from a pattern string
func buildScheme(ss []string) (Scheme, error) {
2021-05-14 15:19:02 +00:00
// 1. Build scheme
sch := make(Scheme, 0, maxMatch)
for _, s := range ss {
2021-05-14 15:19:02 +00:00
// 2. ignore empty
2018-09-29 12:39:12 +00:00
if len(s) == 0 {
continue
}
2021-05-14 15:19:02 +00:00
m := &matcher{}
switch s {
2021-05-14 15:19:02 +00:00
// 3. Card: 0, N
2018-09-29 12:39:12 +00:00
case "**":
m.req = false
m.mul = true
sch = append(sch, m)
2021-05-14 15:19:02 +00:00
// 4. Card: 1, N
2018-09-29 12:39:12 +00:00
case "..":
m.req = true
m.mul = true
sch = append(sch, m)
2021-05-14 15:19:02 +00:00
// 5. Card: 0, 1
2018-09-29 12:39:12 +00:00
case "*":
m.req = false
m.mul = false
sch = append(sch, m)
2021-05-14 15:19:02 +00:00
// 6. Card: 1
2018-09-29 12:39:12 +00:00
case ".":
m.req = true
m.mul = false
sch = append(sch, m)
2021-05-14 15:19:02 +00:00
// 7. Card: 1, literal string
2018-09-29 12:39:12 +00:00
default:
m.req = true
m.mul = false
m.pat = fmt.Sprintf("/%s", s)
sch = append(sch, m)
}
}
return sch, nil
}
// optimise optimised the scheme for further parsing
func (s Scheme) optimise() (Scheme, error) {
2021-05-14 15:19:02 +00:00
// 1. Nothing to do if only 1 element
if len(s) <= 1 {
return s, nil
}
2021-05-14 15:19:02 +00:00
// 2. Init reshifted scheme
rshift := make(Scheme, 0, maxMatch)
rshift = append(rshift, s[0])
2021-05-14 15:19:02 +00:00
// 2. Iterate over matchers
2018-09-29 12:39:12 +00:00
for p, i, l := 0, 1, len(s); i < l; i++ {
pre, cur := s[p], s[i]
/* Merge: 2 following literals */
if len(pre.pat) > 0 && len(cur.pat) > 0 {
// merge strings into previous
pre.pat = fmt.Sprintf("%s%s", pre.pat, cur.pat)
// delete current
s[i] = nil
}
// increment previous (only if current is not nul)
if s[i] != nil {
rshift = append(rshift, s[i])
p = i
}
}
return rshift, nil
}
// matchString checks the STRING matchers from an URI
// it returns a boolean : false when not matching, true eitherway
// it returns a cleared uri, without STRING data
func (s Scheme) matchString(uri string) (string, bool) {
2021-05-14 15:19:02 +00:00
// 1. Initialise variables
clr := uri // contains cleared input string
minOff := 0 // minimum offset
2021-05-14 15:19:02 +00:00
// 2. Iterate over strings
for _, m := range s {
ls := len(m.pat)
// {1} If not STRING matcher -> ignore //
2018-09-29 12:39:12 +00:00
if ls == 0 {
continue
}
// {2} Get offset in URI (else -1) //
off := strings.Index(clr, m.pat)
2018-09-29 12:39:12 +00:00
if off < 0 {
return "", false
}
// {3} Fail on invalid offset range //
2018-09-29 12:39:12 +00:00
if off < minOff {
return "", false
}
// {4} Check for trailing '/' //
hasSlash := 0
if off+ls < len(clr) && clr[off+ls] == '/' {
hasSlash = 1
}
// {5} Remove the current string (+trailing slash) from the URI //
beg, end := clr[:off], clr[off+ls+hasSlash:]
clr = fmt.Sprintf("%s\a/%s", beg, end) // separate matches by '\a' character
// {6} Update offset range //
minOff = len(beg) + 2 - 1 // +2 slash separators
2018-09-29 12:39:12 +00:00
// -1 because strings begin with 1 slash already
}
2021-05-14 15:19:02 +00:00
// 3. If exists, remove trailing '/'
if clr[len(clr)-1] == '/' {
clr = clr[:len(clr)-1]
}
2021-05-14 15:19:02 +00:00
// 4. If exists, remove trailing '\a'
if clr[len(clr)-1] == '\a' {
clr = clr[:len(clr)-1]
}
return clr, true
}
// matchWildcards check the WILCARDS (non-string) matchers from
// a cleared URI. it returns if the string matches
// + it sets the matchers buffers for later extraction
func (s Scheme) matchWildcards(clear string) bool {
2021-05-14 15:19:02 +00:00
// 1. Extract wildcards (ref)
wildcards := make(Scheme, 0, maxMatch)
for _, m := range s {
if len(m.pat) == 0 {
m.buf = nil // flush buffers
wildcards = append(wildcards, m)
}
}
2021-05-14 15:19:02 +00:00
// 2. If no wildcards -> match
if len(wildcards) == 0 {
return true
}
2021-05-14 15:19:02 +00:00
// 3. Break uri by '\a' characters
matches := strings.Split(clear, "\a")[1:]
2021-05-14 15:19:02 +00:00
// 4. Iterate over matches
for n, match := range matches {
// {1} If no more matcher //
if n >= len(wildcards) {
return false
}
// {2} Split by '/' //
data := strings.Split(match, "/")[1:] // from index 1 because it begins with '/'
// {3} If required and missing //
if wildcards[n].req && len(data) < 1 {
return false
}
// {4} If not multi but got multi //
if !wildcards[n].mul && len(data) > 1 {
return false
}
// {5} Store data into matcher //
wildcards[n].buf = data
}
2021-05-14 15:19:02 +00:00
// 5. Match
return true
2018-09-29 12:39:12 +00:00
}