added [internal.uri.parser] for better real-time URI parsing

This commit is contained in:
xdrm-brackets 2018-04-26 13:03:44 +02:00
parent 5f88fede99
commit 63b5125e6c
3 changed files with 179 additions and 0 deletions

View File

@ -0,0 +1,103 @@
package parser
import (
"fmt"
)
// buildScheme builds a 'basic' scheme
// from a pattern string
func buildScheme(ss []string) (Scheme, error) {
/* (1) Build scheme */
sch := make(Scheme, 0, maxMatch)
for _, s := range ss {
/* (2) ignore empty */
if len(s) == 0 { continue }
m := new(matcher)
switch s {
/* (3) Card: 0, N */
case "**":
m.req = false
m.mul = true
sch = append(sch, m)
/* (4) Card: 1, N */
case "..":
m.req = true
m.mul = true
sch = append(sch, m)
/* (5) Card: 0, 1 */
case "*":
m.req = false
m.mul = false
sch = append(sch, m)
/* (6) Card: 1 */
case ".":
m.req = true
m.mul = false
sch = append(sch, m)
/* (7) Card: 1, literal string */
default:
m.req = true
m.mul = false
m.pat = fmt.Sprintf("/%s", s)
sch = append(sch, m)
}
}
return sch, nil
}
// optimise optimised the scheme for further parsing
func (s Scheme) optimise() (Scheme, error) {
/* (1) Nothing to do if only 1 element */
if len(s) <= 1 {
return s, nil
}
/* (2) Init reshifted scheme */
rshift := make(Scheme, 0, maxMatch)
rshift = append(rshift, s[0])
/* (2) Iterate over matchers */
for p, i, l := 0, 1, len(s) ; i < l ; i++ {
pre, cur := s[p], s[i]
/* Merge: 2 following literals */
if len(pre.pat) > 0 && len(cur.pat) > 0 {
// merge strings into previous
pre.pat = fmt.Sprintf("%s%s", pre.pat, cur.pat)
// delete current
s[i] = nil
}
// increment previous (only if current is not nul)
if s[i] != nil {
rshift = append(rshift, s[i])
p = i
}
}
return rshift, nil
}

View File

@ -0,0 +1,41 @@
package parser
import (
"fmt"
"strings"
)
// Build builds an URI scheme from a pattern string
func Build(s string) (*Scheme, error){
/* (1) Manage '/' at the start */
if len(s) < 1 || s[0] != '/' {
return nil, fmt.Errorf("URI must begin with '/'")
}
/* (2) Split by '/' */
parts := strings.Split(s, "/")
/* (3) Max exceeded */
if len(parts)-2 > maxMatch {
for i, p := range parts {
fmt.Printf("%d: '%s'\n", i, p);
}
return nil, fmt.Errorf("URI must not exceed %d slash-separated components, got %d", maxMatch, len(parts))
}
/* (4) Build for each part */
sch, err := buildScheme(parts)
if err != nil {
return nil, err
}
/* (5) Optimise */
opti, err := sch.optimise()
if err != nil {
return nil, err
}
return &opti, nil
}

View File

@ -0,0 +1,35 @@
package parser
// === WILDCARDS ===
//
// The star '*' -> matches 0 or 1 slash-bounded string
// The multi star '**' -> matches 0 or more slash-separated strings
// The dot '.' -> matches 1 slash-bounded string
// The multi dot '..' -> matches 1 or more slash-separated strings
//
// === SCHEME POLICY ===
//
// - The last '/' is optional
// - Any '**' at the very end will match anything that starts with the given prefix
//
// === LIMITATIONS ==
//
// - A scheme must begin with '/'
// - A scheme cannot contain something else than a STRING or WILDCARD between 2 '/' separators
// - A scheme STRING cannot contain the symbols '/' as a character
// - A scheme STRING containing '*' or '.' characters will be treating as STRING only
// - A maximum of 16 slash-separated matchers (STRING or WILDCARD) are allowed
const maxMatch = 16
// Represents an URI matcher
type matcher struct {
pat string // pattern to match (empty if wildcard)
req bool // whether it is required
mul bool // whether multiple matches are allowed
buf []string // matched content
}
// Represents an URI scheme
type Scheme []*matcher