Module astrapy.utils.document_paths
Expand source code
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from typing import Iterable, overload
FIELD_NAME_ESCAPE_CHAR = "&"
FIELD_NAME_SEGMENT_SEPARATOR = "."
FIELD_NAME_ESCAPED_CHARS = {FIELD_NAME_ESCAPE_CHAR, FIELD_NAME_SEGMENT_SEPARATOR}
FIELD_NAME_ESCAPE_MAP = {
e_char: f"{FIELD_NAME_ESCAPE_CHAR}{e_char}" for e_char in FIELD_NAME_ESCAPED_CHARS
}
ILLEGAL_ESCAPE_ERROR_MESSAGE_TEMPLATE = (
"Illegal escape sequence found while parsing field path "
"specification '{field_path}': '{escape_sequence}'"
)
UNTERMINATED_ESCAPE_ERROR_MESSAGE_TEMPLATE = (
"Unterminated escape sequence found at end of path specification '{field_path}'"
)
def _escape_field_name(field_name: str | int) -> str:
"""
Escape a single literal field-name path segment.
Args:
field_name: a literal field-name path segment, i.e. a single field name
identifying a key in a JSON-like document. Example: "sub.key&friends".
Non-negative whole numbers (representing indexes in lists) become strings.
Returns:
a string expressing the input in escaped form, i.e. with the necessary
escape sequences conforming to the Data API escaping rules.
Example:
>>> escape_field_name("sub.key&friends")
'sub&.key&&friends'
"""
return "".join(FIELD_NAME_ESCAPE_MAP.get(char, char) for char in f"{field_name}")
@overload
def escape_field_names(field_names: Iterable[str | int]) -> str: ...
@overload
def escape_field_names(*field_names: str | int) -> str: ...
# This mypy directive essentially hides the 'funny' call pattern of passing multiple iterators:
def escape_field_names(*field_names: str | int | Iterable[str | int]) -> str: # type: ignore[misc]
"""
Escape one or more field-name path segments into a full string expression.
Args:
field_names: the function accepts any number of string or integer (non-negative)
arguments - or, equivalently, a single argument with an iterable of them.
In either case, the input(s) is a literal, unescaped specification for
a path in a document.
Returns:
a single string resulting from dot-concatenation of the input segments, with
each path segment having been escaped conforming to the Data API escaping rules.
Example:
>>> escape_field_names()
''
>>> escape_field_names("f")
'f'
>>> escape_field_names(123)
'123'
>>> escape_field_names("f", 123, "tom&jerry")
'f.123.tom&&jerry'
>>> escape_field_names(["f"])
'f'
>>> escape_field_names(123)
'123'
>>> escape_field_names(["f", 123, "tom&jerry"])
'f.123.tom&&jerry'
"""
_field_names: Iterable[str | int]
# strings are iterables, so:
if len(field_names) == 1 and not isinstance(field_names[0], (str, int)):
_field_names = field_names[0]
else:
# user passing a list of string-or-ints.
# But secretly any arg may still be Iterable:
_field_names = [
segment
for field_name in field_names
for segment in (
[field_name] if isinstance(field_name, (str, int)) else field_name
)
]
return FIELD_NAME_SEGMENT_SEPARATOR.join(
[_escape_field_name(f_n) for f_n in _field_names]
)
def unescape_field_path(field_path: str) -> list[str]:
"""Apply unescaping rules to a single-string field path.
The result is a list of the individual field names, each a literal
with nothing escaped anymore.
Args:
field_path: an expression denoting a field path following dot-notation
with escaping for special characters.
Returns:
a list of literal field names. Even "number-looking" fields, such as "0"
or "12", are returned as strings and it is up to the caller to interpret
these according to the context.
Example:
>>> unescape_field_path("a.b")
['a', 'b']
>>>
>>> unescape_field_path("a&.b")
['a.b']
>>>
>>> unescape_field_path("a&&b&.c")
['a&b.c']
>>>
>>> unescape_field_path("a&.b.c&&d")
['a.b', 'c&d']
"""
segments: list[str] = []
buffer = ""
path_length = len(field_path)
if path_length == 0:
return segments
char_i = 0
while char_i < path_length:
char = field_path[char_i]
if char == FIELD_NAME_SEGMENT_SEPARATOR:
segments += [buffer]
buffer = ""
elif char == FIELD_NAME_ESCAPE_CHAR:
if char_i + 1 >= path_length:
msg = UNTERMINATED_ESCAPE_ERROR_MESSAGE_TEMPLATE.format(
field_path=field_path,
)
raise ValueError(msg)
char_i += 1
char = field_path[char_i]
if char in FIELD_NAME_ESCAPED_CHARS:
buffer += char
else:
msg = ILLEGAL_ESCAPE_ERROR_MESSAGE_TEMPLATE.format(
field_path=field_path,
escape_sequence=f"{FIELD_NAME_ESCAPE_CHAR}{char}",
)
raise ValueError(msg)
else:
buffer += char
char_i += 1
segments += [buffer]
return segments
Functions
def escape_field_names(*field_names: str | int | Iterable[str | int]) ‑> str
-
Escape one or more field-name path segments into a full string expression.
Args
field_names
- the function accepts any number of string or integer (non-negative) arguments - or, equivalently, a single argument with an iterable of them. In either case, the input(s) is a literal, unescaped specification for a path in a document.
Returns
a single string resulting from dot-concatenation of the input segments, with each path segment having been escaped conforming to the Data API escaping rules.
Example
>>> escape_field_names() '' >>> escape_field_names("f") 'f' >>> escape_field_names(123) '123' >>> escape_field_names("f", 123, "tom&jerry") 'f.123.tom&&jerry' >>> escape_field_names(["f"]) 'f' >>> escape_field_names(123) '123' >>> escape_field_names(["f", 123, "tom&jerry"]) 'f.123.tom&&jerry'
Expand source code
def escape_field_names(*field_names: str | int | Iterable[str | int]) -> str: # type: ignore[misc] """ Escape one or more field-name path segments into a full string expression. Args: field_names: the function accepts any number of string or integer (non-negative) arguments - or, equivalently, a single argument with an iterable of them. In either case, the input(s) is a literal, unescaped specification for a path in a document. Returns: a single string resulting from dot-concatenation of the input segments, with each path segment having been escaped conforming to the Data API escaping rules. Example: >>> escape_field_names() '' >>> escape_field_names("f") 'f' >>> escape_field_names(123) '123' >>> escape_field_names("f", 123, "tom&jerry") 'f.123.tom&&jerry' >>> escape_field_names(["f"]) 'f' >>> escape_field_names(123) '123' >>> escape_field_names(["f", 123, "tom&jerry"]) 'f.123.tom&&jerry' """ _field_names: Iterable[str | int] # strings are iterables, so: if len(field_names) == 1 and not isinstance(field_names[0], (str, int)): _field_names = field_names[0] else: # user passing a list of string-or-ints. # But secretly any arg may still be Iterable: _field_names = [ segment for field_name in field_names for segment in ( [field_name] if isinstance(field_name, (str, int)) else field_name ) ] return FIELD_NAME_SEGMENT_SEPARATOR.join( [_escape_field_name(f_n) for f_n in _field_names] )
def unescape_field_path(field_path: str) ‑> list[str]
-
Apply unescaping rules to a single-string field path.
The result is a list of the individual field names, each a literal with nothing escaped anymore.
Args
field_path
- an expression denoting a field path following dot-notation with escaping for special characters.
Returns
a list of literal field names. Even "number-looking" fields, such as "0" or "12", are returned as strings and it is up to the caller to interpret these according to the context.
Example
>>> unescape_field_path("a.b") ['a', 'b'] >>> >>> unescape_field_path("a&.b") ['a.b'] >>> >>> unescape_field_path("a&&b&.c") ['a&b.c'] >>> >>> unescape_field_path("a&.b.c&&d") ['a.b', 'c&d']
Expand source code
def unescape_field_path(field_path: str) -> list[str]: """Apply unescaping rules to a single-string field path. The result is a list of the individual field names, each a literal with nothing escaped anymore. Args: field_path: an expression denoting a field path following dot-notation with escaping for special characters. Returns: a list of literal field names. Even "number-looking" fields, such as "0" or "12", are returned as strings and it is up to the caller to interpret these according to the context. Example: >>> unescape_field_path("a.b") ['a', 'b'] >>> >>> unescape_field_path("a&.b") ['a.b'] >>> >>> unescape_field_path("a&&b&.c") ['a&b.c'] >>> >>> unescape_field_path("a&.b.c&&d") ['a.b', 'c&d'] """ segments: list[str] = [] buffer = "" path_length = len(field_path) if path_length == 0: return segments char_i = 0 while char_i < path_length: char = field_path[char_i] if char == FIELD_NAME_SEGMENT_SEPARATOR: segments += [buffer] buffer = "" elif char == FIELD_NAME_ESCAPE_CHAR: if char_i + 1 >= path_length: msg = UNTERMINATED_ESCAPE_ERROR_MESSAGE_TEMPLATE.format( field_path=field_path, ) raise ValueError(msg) char_i += 1 char = field_path[char_i] if char in FIELD_NAME_ESCAPED_CHARS: buffer += char else: msg = ILLEGAL_ESCAPE_ERROR_MESSAGE_TEMPLATE.format( field_path=field_path, escape_sequence=f"{FIELD_NAME_ESCAPE_CHAR}{char}", ) raise ValueError(msg) else: buffer += char char_i += 1 segments += [buffer] return segments