plasma-expert/spark-lessons/app/utils/variable_wrapper.py


								"""

								Variable Wrapper Utility

								Automatically wraps variables in HTML content with tooltip spans

								"""


								import re

								import html

								from typing import List, Tuple

								from .symbol_loader import get_symbol_definitions


								class VariableWrapper:

								    """Wraps known variables in HTML content with tooltip markup"""


								    def __init__(self):

								        """Initialize variable wrapper with symbol definitions"""

								        self.symbols = get_symbol_definitions()

								        self._build_patterns()


								    def _build_patterns(self) -> None:

								        """Build regex patterns for all known symbols"""

								        # Get all symbols and sort by length (longest first) to avoid partial matches

								        symbols_list = sorted(

								            self.symbols.get_all_symbols(),

								            key=len,

								            reverse=True

								        )


								        # Single letters that commonly appear in regular text

								        # Only match these in specific mathematical contexts

								        common_words = {'A', 'I', 'V', 'P', 'Q', 'R', 'L', 'C', 'E', 'B', 'G', 'X', 'Y', 'Z', 'f', 'd', 'h'}


								        # Very common English words that need extra-strict matching

								        very_common = {'A', 'I'}


								        self.patterns: List[Tuple[str, str]] = []

								        self.context_patterns: List[Tuple[str, str]] = []  # Patterns requiring context


								        for symbol in symbols_list:

								            # Escape special regex characters

								            escaped = re.escape(symbol)


								            # For single-letter variables, only match in formula/code contexts

								            if symbol in common_words:

								                if symbol in very_common:

								                    # Extra restrictive for A, I - only in clear math context

								                    # Must be preceded by =, ×, +, -, /, ( with optional single space

								                    # Multiple patterns to handle both "=A" and "= A" cases

								                    # Use alternation to avoid variable-width lookbehind

								                    pattern = f'(?<=[=×+\\-/\\(])\\s?({escaped})(?=[\\s=+\\-*/()\\[\\]])'

								                    self.context_patterns.append((pattern, symbol))

								                else:

								                    # More restrictive pattern - requires mathematical context

								                    # Match if preceded by: =, mathematical operators, but NOT punctuation

								                    pattern = f'(?<=[=])\\s?({escaped})(?=[\\s=+\\-*/()\\[\\],;<>])|(?<=\\s)({escaped})(?=[\\s=+\\-*/()\\[\\],;<>])'

								                    self.context_patterns.append((pattern, symbol))

								            else:

								                # Normal pattern for multi-character symbols

								                # Use word boundaries but allow underscores and subscripts

								                pattern = f'(?<!\\w)({escaped})(?!\\w)'

								                self.patterns.append((pattern, symbol))


								        print(f"[VariableWrapper] Built {len(self.patterns)} normal patterns + {len(self.context_patterns)} context-sensitive patterns")


								    def wrap_variables(self, html_content: str) -> str:

								        """

								        Wrap known variables in HTML content with tooltip spans


								        Args:

								            html_content: HTML content to process


								        Returns:

								            HTML content with variables wrapped in tooltip spans

								        """

								        # Track which variables were found (for debugging)

								        wrapped_vars = set()


								        # Process normal patterns

								        all_patterns = self.patterns + self.context_patterns


								        for pattern, symbol in all_patterns:

								            tooltip_text = self.symbols.get_tooltip(symbol)

								            if not tooltip_text:

								                continue


								            # Escape for HTML attribute (newlines become &#10;)

								            tooltip_escaped = html.escape(tooltip_text, quote=True).replace('\n', '&#10;')


								            # Create replacement span with tooltip

								            replacement = (

								                f'<span class="var-tooltip" '

								                f'data-symbol="{symbol}" '

								                f'title="{tooltip_escaped}">'

								                f'\\1'  # Captured group (the symbol itself)

								                f'</span>'

								            )


								            # Count matches before replacement

								            matches = list(re.finditer(pattern, html_content))


								            if matches:

								                wrapped_vars.add(symbol)


								                # Replace pattern with wrapped version

								                # Use negative lookahead to avoid wrapping already-wrapped variables

								                pattern_with_check = f'(?<!var-tooltip">)(?<!var-tooltip" )(?<!title=")({pattern})(?!</span>)'

								                html_content = re.sub(

								                    pattern_with_check,

								                    replacement,

								                    html_content

								                )


								        if wrapped_vars:

								            print(f"[VariableWrapper] Wrapped {len(wrapped_vars)} unique variables: {', '.join(sorted(wrapped_vars)[:10])}...")


								        return html_content


								    def wrap_in_context(self, html_content: str) -> str:

								        """

								        More sophisticated wrapping that parses HTML structure

								        to avoid wrapping in code blocks, headings, etc.


								        Args:

								            html_content: HTML content to process


								        Returns:

								            HTML content with variables wrapped (context-aware)

								        """

								        # For now, use simple wrapping

								        # TODO: Implement HTML parsing to be more selective

								        # (e.g., skip <code>, <pre>, <h1>-<h6> tags)


								        # Simple exclusion: Don't process content inside <code> or <pre>

								        code_blocks = []


								        def preserve_code(match):

								            """Preserve code blocks and replace with placeholder"""

								            code_blocks.append(match.group(0))

								            return f"___CODE_BLOCK_{len(code_blocks) - 1}___"


								        # Temporarily remove code blocks

								        html_content = re.sub(

								            r'<(code|pre)>(.*?)</\1>',

								            preserve_code,

								            html_content,

								            flags=re.DOTALL

								        )


								        # Wrap variables

								        html_content = self.wrap_variables(html_content)


								        # Restore code blocks

								        for i, code_block in enumerate(code_blocks):

								            html_content = html_content.replace(

								                f"___CODE_BLOCK_{i}___",

								                code_block

								            )


								        return html_content