From 63b5125e6c92d032bac996e6635b44ee8cd2e44f Mon Sep 17 00:00:00 2001 From: xdrm-brackets Date: Thu, 26 Apr 2018 13:03:44 +0200 Subject: [PATCH] added [internal.uri.parser] for better real-time URI parsing --- internal/uri/parser/private.go | 103 +++++++++++++++++++++++++++++++++ internal/uri/parser/public.go | 41 +++++++++++++ internal/uri/parser/types.go | 35 +++++++++++ 3 files changed, 179 insertions(+) create mode 100644 internal/uri/parser/private.go create mode 100644 internal/uri/parser/public.go create mode 100644 internal/uri/parser/types.go diff --git a/internal/uri/parser/private.go b/internal/uri/parser/private.go new file mode 100644 index 0000000..e6066d2 --- /dev/null +++ b/internal/uri/parser/private.go @@ -0,0 +1,103 @@ +package parser + +import ( + "fmt" +) + + +// buildScheme builds a 'basic' scheme +// from a pattern string +func buildScheme(ss []string) (Scheme, error) { + + /* (1) Build scheme */ + sch := make(Scheme, 0, maxMatch) + + for _, s := range ss { + + /* (2) ignore empty */ + if len(s) == 0 { continue } + + m := new(matcher) + + switch s { + + /* (3) Card: 0, N */ + case "**": + m.req = false + m.mul = true + sch = append(sch, m) + + /* (4) Card: 1, N */ + case "..": + m.req = true + m.mul = true + sch = append(sch, m) + + /* (5) Card: 0, 1 */ + case "*": + m.req = false + m.mul = false + sch = append(sch, m) + + /* (6) Card: 1 */ + case ".": + m.req = true + m.mul = false + sch = append(sch, m) + + /* (7) Card: 1, literal string */ + default: + m.req = true + m.mul = false + m.pat = fmt.Sprintf("/%s", s) + sch = append(sch, m) + + } + + } + + return sch, nil +} + + + +// optimise optimised the scheme for further parsing +func (s Scheme) optimise() (Scheme, error) { + + /* (1) Nothing to do if only 1 element */ + if len(s) <= 1 { + return s, nil + } + + /* (2) Init reshifted scheme */ + rshift := make(Scheme, 0, maxMatch) + rshift = append(rshift, s[0]) + + + /* (2) Iterate over matchers */ + for p, i, l := 0, 1, len(s) ; i < l ; i++ { + + pre, cur := s[p], s[i] + + /* Merge: 2 following literals */ + if len(pre.pat) > 0 && len(cur.pat) > 0 { + + // merge strings into previous + pre.pat = fmt.Sprintf("%s%s", pre.pat, cur.pat) + + // delete current + s[i] = nil + + } + + // increment previous (only if current is not nul) + if s[i] != nil { + rshift = append(rshift, s[i]) + p = i + } + + } + + return rshift, nil + +} \ No newline at end of file diff --git a/internal/uri/parser/public.go b/internal/uri/parser/public.go new file mode 100644 index 0000000..f857591 --- /dev/null +++ b/internal/uri/parser/public.go @@ -0,0 +1,41 @@ +package parser + +import ( + "fmt" + "strings" +) + +// Build builds an URI scheme from a pattern string +func Build(s string) (*Scheme, error){ + + /* (1) Manage '/' at the start */ + if len(s) < 1 || s[0] != '/' { + return nil, fmt.Errorf("URI must begin with '/'") + } + + /* (2) Split by '/' */ + parts := strings.Split(s, "/") + + /* (3) Max exceeded */ + if len(parts)-2 > maxMatch { + for i, p := range parts { + fmt.Printf("%d: '%s'\n", i, p); + } + return nil, fmt.Errorf("URI must not exceed %d slash-separated components, got %d", maxMatch, len(parts)) + } + + /* (4) Build for each part */ + sch, err := buildScheme(parts) + if err != nil { + return nil, err + } + + /* (5) Optimise */ + opti, err := sch.optimise() + if err != nil { + return nil, err + } + + return &opti, nil + +} \ No newline at end of file diff --git a/internal/uri/parser/types.go b/internal/uri/parser/types.go new file mode 100644 index 0000000..6b1c35c --- /dev/null +++ b/internal/uri/parser/types.go @@ -0,0 +1,35 @@ +package parser + +// === WILDCARDS === +// +// The star '*' -> matches 0 or 1 slash-bounded string +// The multi star '**' -> matches 0 or more slash-separated strings +// The dot '.' -> matches 1 slash-bounded string +// The multi dot '..' -> matches 1 or more slash-separated strings +// +// === SCHEME POLICY === +// +// - The last '/' is optional +// - Any '**' at the very end will match anything that starts with the given prefix +// +// === LIMITATIONS == +// +// - A scheme must begin with '/' +// - A scheme cannot contain something else than a STRING or WILDCARD between 2 '/' separators +// - A scheme STRING cannot contain the symbols '/' as a character +// - A scheme STRING containing '*' or '.' characters will be treating as STRING only +// - A maximum of 16 slash-separated matchers (STRING or WILDCARD) are allowed + +const maxMatch = 16 + +// Represents an URI matcher +type matcher struct { + pat string // pattern to match (empty if wildcard) + req bool // whether it is required + mul bool // whether multiple matches are allowed + buf []string // matched content +} + + +// Represents an URI scheme +type Scheme []*matcher \ No newline at end of file