From 12122a2bda31475c1b6aca9aa63bf364a592a970 Mon Sep 17 00:00:00 2001 From: Davide Bettio Date: Wed, 15 Jul 2020 11:35:20 +0200 Subject: [PATCH 1/2] Update identifier rule --- src/jsonpath_lexer.xrl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jsonpath_lexer.xrl b/src/jsonpath_lexer.xrl index 97347ae..1429856 100644 --- a/src/jsonpath_lexer.xrl +++ b/src/jsonpath_lexer.xrl @@ -19,7 +19,7 @@ Definitions. WHITESPACE = [\s\t\n\r] -IDENTIFIER = [a-zA-Z_][a-zA-Z0-9_]* +IDENTIFIER = [^'".*0-9()$?,>=<\-\:\@\[\]\s\t\n\r][^'".*()$?,>=<\-\:\@\[\]\s\t\n\r]* INTEGER = \-?[0-9]+ STRING = \"[^"]*\" LSTRING = \'[^']*\' @@ -27,7 +27,7 @@ LSTRING = \'[^']*\' Rules. {WHITESPACE}+ : skip_token. -{IDENTIFIER} : {token, {identifier, TokenLine, list_to_binary(TokenChars)}}. +{IDENTIFIER} : {token, {identifier, TokenLine, unicode:characters_to_binary(TokenChars)}}. {INTEGER} : {token, {integer, TokenLine, list_to_integer(TokenChars)}}. {STRING} : {token, {string, TokenLine, string_to_binary(TokenChars)}}. {LSTRING} : {token, {string, TokenLine, string_to_binary(TokenChars)}}. From 42aeb05b87fbf63e9750f1ae8d49ae6d311c1f57 Mon Sep 17 00:00:00 2001 From: Davide Bettio Date: Wed, 15 Jul 2020 11:37:19 +0200 Subject: [PATCH 2/2] Test support for dot notation with non ASCII key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tests for paths such as $.ユニコード. --- test/ex_json_path_test.exs | 7 ------- test/jsonpath_lexer_test.exs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/test/ex_json_path_test.exs b/test/ex_json_path_test.exs index bcc3549..74c56e3 100644 --- a/test/ex_json_path_test.exs +++ b/test/ex_json_path_test.exs @@ -719,12 +719,5 @@ defmodule ExJSONPathTest do assert {:error, %ParsingError{message: "" <> _msg}} = ExJSONPath.eval(array, path) end - - test "with unexpected chars" do - map = %{"a" => %{"b" => 42}} - path = ~s{ùùù} - - assert {:error, %ParsingError{message: "" <> _msg}} = ExJSONPath.eval(map, path) - end end end diff --git a/test/jsonpath_lexer_test.exs b/test/jsonpath_lexer_test.exs index 07f4a7d..cb8d189 100644 --- a/test/jsonpath_lexer_test.exs +++ b/test/jsonpath_lexer_test.exs @@ -52,4 +52,34 @@ defmodule ExJSONPath.Lexer do {:identifier, 1, "value"} ], 1} end + + # https://cburgmer.github.io/json-path-comparison/results/dot_notation_with_non_ASCII_key.html + test "unicode tokenization" do + assert :jsonpath_lexer.string('$.ユニコード') == + {:ok, [{:"$", 1}, {:., 1}, {:identifier, 1, "ユニコード"}], 1} + end + + test "mixed unicode and ascii tokenization" do + assert :jsonpath_lexer.string('$.ユnikodo') == + {:ok, [{:"$", 1}, {:., 1}, {:identifier, 1, "ユnikodo"}], 1} + end + + test "mixed ascii and unicode tokenization" do + assert :jsonpath_lexer.string('$.yuニコード') == + {:ok, [{:"$", 1}, {:., 1}, {:identifier, 1, "yuニコード"}], 1} + end + + test "latin accents and ascii tokenization" do + assert :jsonpath_lexer.string('$.à.è.i') == + {:ok, + [ + {:"$", 1}, + {:., 1}, + {:identifier, 1, "à"}, + {:., 1}, + {:identifier, 1, "è"}, + {:., 1}, + {:identifier, 1, "i"} + ], 1} + end end