diff --git a/src/jsonpath_lexer.xrl b/src/jsonpath_lexer.xrl index 97347ae..1429856 100644 --- a/src/jsonpath_lexer.xrl +++ b/src/jsonpath_lexer.xrl @@ -19,7 +19,7 @@ Definitions. WHITESPACE = [\s\t\n\r] -IDENTIFIER = [a-zA-Z_][a-zA-Z0-9_]* +IDENTIFIER = [^'".*0-9()$?,>=<\-\:\@\[\]\s\t\n\r][^'".*()$?,>=<\-\:\@\[\]\s\t\n\r]* INTEGER = \-?[0-9]+ STRING = \"[^"]*\" LSTRING = \'[^']*\' @@ -27,7 +27,7 @@ LSTRING = \'[^']*\' Rules. {WHITESPACE}+ : skip_token. -{IDENTIFIER} : {token, {identifier, TokenLine, list_to_binary(TokenChars)}}. +{IDENTIFIER} : {token, {identifier, TokenLine, unicode:characters_to_binary(TokenChars)}}. {INTEGER} : {token, {integer, TokenLine, list_to_integer(TokenChars)}}. {STRING} : {token, {string, TokenLine, string_to_binary(TokenChars)}}. {LSTRING} : {token, {string, TokenLine, string_to_binary(TokenChars)}}. diff --git a/test/ex_json_path_test.exs b/test/ex_json_path_test.exs index bcc3549..74c56e3 100644 --- a/test/ex_json_path_test.exs +++ b/test/ex_json_path_test.exs @@ -719,12 +719,5 @@ defmodule ExJSONPathTest do assert {:error, %ParsingError{message: "" <> _msg}} = ExJSONPath.eval(array, path) end - - test "with unexpected chars" do - map = %{"a" => %{"b" => 42}} - path = ~s{ùùù} - - assert {:error, %ParsingError{message: "" <> _msg}} = ExJSONPath.eval(map, path) - end end end diff --git a/test/jsonpath_lexer_test.exs b/test/jsonpath_lexer_test.exs index 07f4a7d..cb8d189 100644 --- a/test/jsonpath_lexer_test.exs +++ b/test/jsonpath_lexer_test.exs @@ -52,4 +52,34 @@ defmodule ExJSONPath.Lexer do {:identifier, 1, "value"} ], 1} end + + # https://cburgmer.github.io/json-path-comparison/results/dot_notation_with_non_ASCII_key.html + test "unicode tokenization" do + assert :jsonpath_lexer.string('$.ユニコード') == + {:ok, [{:"$", 1}, {:., 1}, {:identifier, 1, "ユニコード"}], 1} + end + + test "mixed unicode and ascii tokenization" do + assert :jsonpath_lexer.string('$.ユnikodo') == + {:ok, [{:"$", 1}, {:., 1}, {:identifier, 1, "ユnikodo"}], 1} + end + + test "mixed ascii and unicode tokenization" do + assert :jsonpath_lexer.string('$.yuニコード') == + {:ok, [{:"$", 1}, {:., 1}, {:identifier, 1, "yuニコード"}], 1} + end + + test "latin accents and ascii tokenization" do + assert :jsonpath_lexer.string('$.à.è.i') == + {:ok, + [ + {:"$", 1}, + {:., 1}, + {:identifier, 1, "à"}, + {:., 1}, + {:identifier, 1, "è"}, + {:., 1}, + {:identifier, 1, "i"} + ], 1} + end end