# Copyright (c) Microsoft. All rights reserved.

import logging

from semantic_kernel.exceptions import CodeBlockSyntaxError
from semantic_kernel.template_engine.blocks.block import Block
from semantic_kernel.template_engine.blocks.block_types import BlockTypes
from semantic_kernel.template_engine.blocks.function_id_block import FunctionIdBlock
from semantic_kernel.template_engine.blocks.named_arg_block import NamedArgBlock
from semantic_kernel.template_engine.blocks.symbols import Symbols
from semantic_kernel.template_engine.blocks.val_block import ValBlock
from semantic_kernel.template_engine.blocks.var_block import VarBlock

logger: logging.Logger = logging.getLogger(__name__)


# BNF parsed by CodeTokenizer:
# [template]       ::= "" | [variable] " " [template]
#                         | [value] " " [template]
#                         | [function-call] " " [template]
# [variable]       ::= "$" [valid-name]
# [value]          ::= "'" [text] "'" | '"' [text] '"'
# [function-call]  ::= [function-id] | [function-id] [parameter]
# [parameter]      ::= [variable] | [value]
class CodeTokenizer:
    """Tokenize the code text into blocks."""

    @staticmethod
    def tokenize(text: str) -> list[Block]:
        """Tokenize the code text into blocks."""
        # Remove spaces, which are ignored anyway
        text = text.strip() if text else ""
        # Render None/empty to []
        if not text:
            return []
        # 1 char only edge case, var and val blocks are invalid with one char, so it must be a function id block
        if len(text) == 1:
            return [FunctionIdBlock(content=text)]

        # Track what type of token we're reading
        current_token_type = None

        # Track the content of the current token
        current_token_content: list[str] = []

        # Other state we need to track
        text_value_delimiter = None
        space_separator_found = False
        skip_next_char = False
        next_char = ""
        blocks: list[Block] = []

        for index, current_char in enumerate(text[:-1]):
            next_char = text[index + 1]

            if skip_next_char:
                skip_next_char = False
                continue

            # First char is easy
            if index == 0:
                if current_char == Symbols.VAR_PREFIX:
                    current_token_type = BlockTypes.VARIABLE
                elif current_char in (Symbols.DBL_QUOTE, Symbols.SGL_QUOTE):
                    current_token_type = BlockTypes.VALUE
                    text_value_delimiter = current_char
                else:
                    current_token_type = BlockTypes.FUNCTION_ID

                current_token_content.append(current_char)
                continue

            # While reading values between quotes
            if current_token_type in (BlockTypes.VALUE, BlockTypes.NAMED_ARG):
                # If the current char is escaping the next special char we:
                #  - skip the current char (escape char)
                #  - add the next char (special char)
                #  - jump to the one after (to handle "\\" properly)
                if current_char == Symbols.ESCAPE_CHAR and next_char in (
                    Symbols.DBL_QUOTE,
                    Symbols.SGL_QUOTE,
                    Symbols.ESCAPE_CHAR,
                ):
                    current_token_content.append(next_char)
                    skip_next_char = True
                    continue

                current_token_content.append(current_char)

                # When we reach the end of the value, we add the block
                if current_char == text_value_delimiter:
                    blocks.append(ValBlock(content="".join(current_token_content)))
                    current_token_content.clear()
                    current_token_type = None
                    space_separator_found = False

                continue

            # If we're not between quotes, a space signals the end of the current token
            # Note: there might be multiple consecutive spaces
            if current_char in (
                Symbols.SPACE,
                Symbols.NEW_LINE,
                Symbols.CARRIAGE_RETURN,
                Symbols.TAB,
            ):
                if current_token_type == BlockTypes.VARIABLE:
                    blocks.append(VarBlock(content="".join(current_token_content)))
                    current_token_content.clear()
                elif current_token_type == BlockTypes.FUNCTION_ID:
                    if Symbols.NAMED_ARG_BLOCK_SEPARATOR.value in current_token_content:
                        blocks.append(NamedArgBlock(content="".join(current_token_content)))
                    else:
                        blocks.append(FunctionIdBlock(content="".join(current_token_content)))
                    current_token_content.clear()

                space_separator_found = True
                current_token_type = None

                continue

            # If we're not inside a quoted value, and we're not processing a space
            current_token_content.append(current_char)

            if current_token_type is None:
                if not space_separator_found:
                    raise CodeBlockSyntaxError("Tokens must be separated by one space least")

                if current_char in (Symbols.DBL_QUOTE, Symbols.SGL_QUOTE):
                    # A quoted value starts here
                    current_token_type = BlockTypes.VALUE
                    text_value_delimiter = current_char
                elif current_char == Symbols.VAR_PREFIX:
                    # A variable starts here
                    current_token_type = BlockTypes.VARIABLE
                else:
                    # A function id starts here
                    current_token_type = BlockTypes.FUNCTION_ID

        # end of main for loop

        # Capture last token
        current_token_content.append(next_char)

        if current_token_type == BlockTypes.VALUE:
            blocks.append(ValBlock(content="".join(current_token_content)))
        elif current_token_type == BlockTypes.VARIABLE:
            blocks.append(VarBlock(content="".join(current_token_content)))
        elif current_token_type == BlockTypes.FUNCTION_ID:
            if Symbols.NAMED_ARG_BLOCK_SEPARATOR.value in current_token_content:
                blocks.append(NamedArgBlock(content="".join(current_token_content)))
            else:
                blocks.append(FunctionIdBlock(content="".join(current_token_content)))
        else:
            raise CodeBlockSyntaxError("Tokens must be separated by one space least")

        return blocks