"""
Using lexer dynamic_complete
============================

Demonstrates how to use ``lexer='dynamic_complete'`` and ``ambiguity='explicit'``

Sometimes you have data that is highly ambiguous or 'broken' in some sense.
When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be able
parse just about anything as long as there is a valid way to generate it from
the Grammar, including looking 'into' the Regexes.

This examples shows how to parse a json input where the quotes have been
replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}``
Notice that underscores might still appear inside strings, so a potentially
valid reading of the above is:
``{"foo_:{}, _bar": [], "baz": ""}``
"""
from pprint import pprint

from lark import Lark, Tree, Transformer, v_args
from lark.visitors import Transformer_InPlace

GRAMMAR = r"""
%import common.SIGNED_NUMBER
%import common.WS_INLINE
%import common.NEWLINE
%ignore WS_INLINE

?start: value

?value: object
      | array
      | string
      | SIGNED_NUMBER      -> number
      | "true"             -> true
      | "false"            -> false
      | "null"             -> null

array  : "[" (value ("," value)*)? "]"
object : "{" (pair ("," pair)*)? "}"
pair   : string ":" value

string: STRING
STRING : ESCAPED_STRING

ESCAPED_STRING: QUOTE_CHAR _STRING_ESC_INNER QUOTE_CHAR
QUOTE_CHAR: "_"

_STRING_INNER: /.*/
_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/

"""


def score(tree: Tree):
    """
    Scores an option by how many children (and grand-children, and
    grand-grand-children, ...) it has.
    This means that the option with fewer large terminals get's selected

    Between
        object
          pair
            string	_foo_
            object
          pair
            string	_bar_: [], _baz_
            string	__

    and

        object
          pair
            string	_foo_
            object
          pair
            string	_bar_
            array
          pair
            string	_baz_
            string	__

    this will give the second a higher score. (9 vs 13)
    """
    return sum(len(t.children) for t in tree.iter_subtrees())


class RemoveAmbiguities(Transformer_InPlace):
    """
    Selects an option to resolve an ambiguity using the score function above.
    Scores each option and selects the one with the higher score, e.g. the one
    with more nodes.

    If there is a performance problem with the Tree having to many _ambig and
    being slow and to large, this can instead be written as a ForestVisitor.
    Look at the 'Custom SPPF Prioritizer' example.
    """
    def _ambig(self, options):
        return max(options, key=score)


class TreeToJson(Transformer):
    """
    This is the same Transformer as the json_parser example.
    """
    @v_args(inline=True)
    def string(self, s):
        return s[1:-1].replace('\\"', '"')

    array = list
    pair = tuple
    object = dict
    number = v_args(inline=True)(float)

    null = lambda self, _: None
    true = lambda self, _: True
    false = lambda self, _: False


parser = Lark(GRAMMAR, parser='earley', ambiguity="explicit", lexer='dynamic_complete')

EXAMPLES = [
    r'{_array_:[1,2,3]}',

    r'{_abc_: _array must be of the following format [_1_, _2_, _3_]_}',

    r'{_foo_:{}, _bar_: [], _baz_: __}',

    r'{_error_:_invalid_client_, _error_description_:_AADSTS7000215: Invalid '
    r'client secret is provided.\r\nTrace ID: '
    r'a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00\r\nCorrelation ID: '
    r'aa0aaa00-0aaa-0000-00a0-00000aaaa0aa\r\nTimestamp: 1997-10-10 00:00:00Z_, '
    r'_error_codes_:[7000215], _timestamp_:_1997-10-10 00:00:00Z_, '
    r'_trace_id_:_a0a0aaaa-a0a0-0a00-000a-00a00aaa0a00_, '
    r'_correlation_id_:_aa0aaa00-0aaa-0000-00a0-00000aaaa0aa_, '
    r'_error_uri_:_https://example.com_}',

]
for example in EXAMPLES:
    tree = parser.parse(example)
    tree = RemoveAmbiguities().transform(tree)
    result = TreeToJson().transform(tree)
    pprint(result)
