Skip to content

Commit

Permalink
Reworked the JSON parser to emit '#' nodes for Object and Array decla…
Browse files Browse the repository at this point in the history
…rations. Added '#' as an allowed character for element names.
  • Loading branch information
ChrisTrenkamp committed Feb 24, 2024
1 parent 4761317 commit 5620cd8
Show file tree
Hide file tree
Showing 7 changed files with 1,758 additions and 852 deletions.
63 changes: 53 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* `xsel` does not implement the [id](https://www.w3.org/TR/xpath-10/#function-id) function.
* The grammar as defined in the XPath 1.0 spec doesn't explicitly allow function calls in the middle of a path expression, such as `/path/function-call()/path`. `xsel` allows function calls in the middle of path expressions.
* `xsel` allows name lookups with a wildcard for the namespace, such as `/*:path`.
* `xsel` allows the `#` character in element selections.

## Basic usage

Expand Down Expand Up @@ -213,28 +214,70 @@ func main() {
To build a custom document, implement your own [Parser](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/parser#Parser) method, and build [Element](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/node#Element)'s, [Attribute](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/node#Attribute)'s [Character Data](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/node#CharData), [Comment](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/node#Comment)'s, [Processing Instruction](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/node#ProcInst)'s, and [Namespace](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/node#Namespace)'s.


## Caveats for HTML documents
## HTML documents

Namespaces are completely ignored for HTML documents. Keep all queries in the default namespace. Write queries such as `//svg`. Do not write queries such as `//svg:svg`.
Use the `xsel.ReadHtml` function to read HTML documents. Namespaces are completely ignored for HTML documents. Keep all queries in the default namespace. Write queries such as `//svg`. Do not write queries such as `//svg:svg`.

## Caveats for JSON documents
## JSON documents

JSON documents only build elements and character data. All element names are in the default namespace.

Elements in arrays are wrapped in element nodes, with a name based on the name of the object field, and arrays nested in arrays are flattened. For example, if you had the following JSON document:
JSON documents only build elements and character data. Object and array declarations will omit an element node with the name `#`. So for example, given the following JSON file:

```
{
"states": ["AK", ["MD", "FL"] ]
}
```

The XML equivalent will be:
It would look like this in XML...

```
<states>AK</states>
<states>MD</states>
<states>FL</states>
<#>
<states>
<#>
AK
<#>
MD
FL
</#>
</#>
</states>
</#>
```

... however, `MD` and `FL` are separate text nodes, which is different from XML parsing:


```go
package main

import (
"bytes"
"fmt"

"github.com/ChrisTrenkamp/xsel"
)

func main() {
json := `
{
"states": ["AK", ["MD", "FL"] ]
}
`

xpath := xsel.MustBuildExpr(`/#/states/#/text()`)
cursor, _ := xsel.ReadJson(bytes.NewBufferString(json))
result, _ := xsel.Exec(cursor, &xpath)

fmt.Println(result)

// Notice the [2] in the text selection.
xpath = xsel.MustBuildExpr(`/#/states/#/#/text()[2]`)
result, _ = xsel.Exec(cursor, &xpath)

fmt.Println(result)
// Output: AK
// FL
}
```

## Commandline Utility
Expand Down
21 changes: 21 additions & 0 deletions doc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,27 @@ func ExampleWithFunction() {
// Output: This is a comment.
}

func ExampleReadJson() {
json := `
{
"states": ["AK", ["MD", "FL"] ]
}
`

xpath := xsel.MustBuildExpr(`/#/states/#/text()`)
cursor, _ := xsel.ReadJson(bytes.NewBufferString(json))
result, _ := xsel.Exec(cursor, &xpath)

fmt.Println(result)

xpath = xsel.MustBuildExpr(`/#/states/#/#/text()[2]`)
result, _ = xsel.Exec(cursor, &xpath)

fmt.Println(result)
// Output: AK
// FL
}

func ExampleUnmarshal() {
xml := `
<Root xmlns="http://www.adventure-works.com">
Expand Down
39 changes: 24 additions & 15 deletions exec/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,22 @@ func printTree(cursor store.Cursor, depth int) {
switch t := cursor.Node().(type) {
case node.Element:
fmt.Println(t.Local())
fmt.Println("<" + t.Local() + ">")
case node.CharData:
fmt.Println(t.CharDataValue())
}
for _, i := range cursor.Children() {
printTree(i, depth+1)
}
if t, isEnd := cursor.Node().(node.Element); isEnd {
for i := 0; i < depth; i++ {
fmt.Print(" ")
}
fmt.Println("</" + t.Local() + ">")
}
}
*/

Expand Down Expand Up @@ -1385,43 +1393,44 @@ func TestJsonNestedArray(t *testing.T) {
"nil": null
}
`
value := execJsonNodes(t, "/a[. = '0']", json)

if value.String() != "0" || value[0].Node().(node.Element).Local() != "a" {
value := execJsonNodes(t, "/#/a/#/text()[. = '0']", json)

if c := value[0].Node().(node.CharData); c.CharDataValue() != "0" {
t.Error("bad array value")
}

value = execJsonNodes(t, "/a[. = 'b']", json)
value = execJsonNodes(t, "/#/a/#/#/text()[. = 'b']", json)

if value.String() != "b" || value[0].Node().(node.Element).Local() != "a" {
if c := value[0].Node().(node.CharData); c.CharDataValue() != "b" {
t.Error("bad nested array value")
}

value = execJsonNodes(t, "/a/d[. = 2.71828]", json)
value = execJsonNodes(t, "/#/a/#/#/#/d[. = 2.71828]", json)

if value.String() != "2.71828" || value[0].Node().(node.Element).Local() != "d" {
t.Error("bad object-in-array value")
}

value = execJsonNodes(t, "/b/c", json)
value = execJsonNodes(t, "/#/b/#/c", json)

if value.String() != "3.14" || value[0].Node().(node.Element).Local() != "c" {
t.Error("bad nested object value")
}

value = execJsonNodes(t, "/b/d/e", json)
value = execJsonNodes(t, "/#/b/#/d/#/#/e", json)

if value.String() != "f" || value[0].Node().(node.Element).Local() != "e" {
t.Error("bad object-in-array-in-object value")
}

value = execJsonNodes(t, "/b/d[. = 'g']", json)
value = execJsonNodes(t, "/#/b/#/d/#/text()[. = 'g']", json)

if value.String() != "g" || value[0].Node().(node.Element).Local() != "d" {
if c := value[0].Node().(node.CharData); c.CharDataValue() != "g" {
t.Error("bad object-in-array-in-object value")
}

value = execJsonNodes(t, "/nil", json)
value = execJsonNodes(t, "/#/nil", json)

if value.String() != "null" || value[0].Node().(node.Element).Local() != "nil" {
t.Error("bad nil value")
Expand Down Expand Up @@ -1470,16 +1479,16 @@ func TestJson(t *testing.T) {
}

for i := 0; i < 4; i++ {
if pricedItems[i].Node().(node.Element).Local() != "book" {
t.Error("name not 'book'")
if pricedItems[i].Node().(node.Element).Local() != "#" {
t.Error("name not '#'")
}
}

if pricedItems[4].Node().(node.Element).Local() != "bicycle" {
if pricedItems[4].Parent().Node().(node.Element).Local() != "bicycle" {
t.Error("name not 'bicycle'")
}

nodes := execJsonNodes(t, "/store/book/author", json)
nodes := execJsonNodes(t, "/#/store/#/book/#/#/author", json)

if len(nodes) != 4 {
t.Error("result size not 4")
Expand Down
5 changes: 5 additions & 0 deletions grammar/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
```
go install github.com/goccmack/gogll/v3@latest
cd grammar
gogll xpath_grammar.txt
```
Loading

0 comments on commit 5620cd8

Please sign in to comment.