import warnings from io import StringIO from django.template.base import Lexer, TokenType from django.utils.regex_helper import _lazy_re_compile from . import TranslatorCommentWarning, trim_whitespace TRANSLATOR_COMMENT_MARK = "Translators" dot_re = _lazy_re_compile(r"\S") def blankout(src, char): """ Change every non-whitespace character to the given char. Used in the templatize function. """ return dot_re.sub(char, src) context_re = _lazy_re_compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""") inline_re = _lazy_re_compile( # Match the trans/translate 'some text' part. r"""^\s*trans(?:late)?\s+((?:"[^"]*?")|(?:'[^']*?'))""" # Match and ignore optional filters r"""(?:\s*\|\s*[^\s:]+(?::(?:[^\s'":]+|(?:"[^"]*?")|(?:'[^']*?')))?)*""" # Match the optional context part r"""(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*""" ) block_re = _lazy_re_compile( r"""^\s*blocktrans(?:late)?(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)""" ) endblock_re = _lazy_re_compile(r"""^\s*endblocktrans(?:late)?$""") plural_re = _lazy_re_compile(r"""^\s*plural$""") constant_re = _lazy_re_compile(r"""_\(((?:".*?")|(?:'.*?'))\)""") def templatize(src, origin=None): """ Turn a Django template into something that is understood by xgettext. It does so by translating the Django translation tags into standard gettext function invocations. """ out = StringIO("") message_context = None intrans = False inplural = False trimmed = False singular = [] plural = [] incomment = False comment = [] lineno_comment_map = {} comment_lineno_cache = None # Adding the u prefix allows gettext to recognize the string (#26093). raw_prefix = "u" def join_tokens(tokens, trim=False): message = "".join(tokens) if trim: message = trim_whitespace(message) return message for t in Lexer(src).tokenize(): if incomment: if t.token_type == TokenType.BLOCK and t.contents == "endcomment": content = "".join(comment) translators_comment_start = None for lineno, line in enumerate(content.splitlines(True)): if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK): translators_comment_start = lineno for lineno, line in enumerate(content.splitlines(True)): if ( translators_comment_start is not None and lineno >= translators_comment_start ): out.write(" # %s" % line) else: out.write(" #\n") incomment = False comment = [] else: comment.append(t.contents) elif intrans: if t.token_type == TokenType.BLOCK: endbmatch = endblock_re.match(t.contents) pluralmatch = plural_re.match(t.contents) if endbmatch: if inplural: if message_context: out.write( " npgettext({p}{!r}, {p}{!r}, {p}{!r},count) ".format( message_context, join_tokens(singular, trimmed), join_tokens(plural, trimmed), p=raw_prefix, ) ) else: out.write( " ngettext({p}{!r}, {p}{!r}, count) ".format( join_tokens(singular, trimmed), join_tokens(plural, trimmed), p=raw_prefix, ) ) for part in singular: out.write(blankout(part, "S")) for part in plural: out.write(blankout(part, "P")) else: if message_context: out.write( " pgettext({p}{!r}, {p}{!r}) ".format( message_context, join_tokens(singular, trimmed), p=raw_prefix, ) ) else: out.write( " gettext({p}{!r}) ".format( join_tokens(singular, trimmed), p=raw_prefix, ) ) for part in singular: out.write(blankout(part, "S")) message_context = None intrans = False inplural = False singular = [] plural = [] elif pluralmatch: inplural = True else: filemsg = "" if origin: filemsg = "file %s, " % origin raise SyntaxError( "Translation blocks must not include other block tags: " "%s (%sline %d)" % (t.contents, filemsg, t.lineno) ) elif t.token_type == TokenType.VAR: if inplural: plural.append("%%(%s)s" % t.contents) else: singular.append("%%(%s)s" % t.contents) elif t.token_type == TokenType.TEXT: contents = t.contents.replace("%", "%%") if inplural: plural.append(contents) else: singular.append(contents) else: # Handle comment tokens (`{# ... #}`) plus other constructs on # the same line: if comment_lineno_cache is not None: cur_lineno = t.lineno + t.contents.count("\n") if comment_lineno_cache == cur_lineno: if t.token_type != TokenType.COMMENT: for c in lineno_comment_map[comment_lineno_cache]: filemsg = "" if origin: filemsg = "file %s, " % origin warn_msg = ( "The translator-targeted comment '%s' " "(%sline %d) was ignored, because it wasn't " "the last item on the line." ) % (c, filemsg, comment_lineno_cache) warnings.warn(warn_msg, TranslatorCommentWarning) lineno_comment_map[comment_lineno_cache] = [] else: out.write( "# %s" % " | ".join(lineno_comment_map[comment_lineno_cache]) ) comment_lineno_cache = None if t.token_type == TokenType.BLOCK: imatch = inline_re.match(t.contents) bmatch = block_re.match(t.contents) cmatches = constant_re.findall(t.contents) if imatch: g = imatch[1] if g[0] == '"': g = g.strip('"') elif g[0] == "'": g = g.strip("'") g = g.replace("%", "%%") if imatch[2]: # A context is provided context_match = context_re.match(imatch[2]) message_context = context_match[1] if message_context[0] == '"': message_context = message_context.strip('"') elif message_context[0] == "'": message_context = message_context.strip("'") out.write( " pgettext({p}{!r}, {p}{!r}) ".format( message_context, g, p=raw_prefix ) ) message_context = None else: out.write(" gettext({p}{!r}) ".format(g, p=raw_prefix)) elif bmatch: for fmatch in constant_re.findall(t.contents): out.write(" _(%s) " % fmatch) if bmatch[1]: # A context is provided context_match = context_re.match(bmatch[1]) message_context = context_match[1] if message_context[0] == '"': message_context = message_context.strip('"') elif message_context[0] == "'": message_context = message_context.strip("'") intrans = True inplural = False trimmed = "trimmed" in t.split_contents() singular = [] plural = [] elif cmatches: for cmatch in cmatches: out.write(" _(%s) " % cmatch) elif t.contents == "comment": incomment = True else: out.write(blankout(t.contents, "B")) elif t.token_type == TokenType.VAR: parts = t.contents.split("|") cmatch = constant_re.match(parts[0]) if cmatch: out.write(" _(%s) " % cmatch[1]) for p in parts[1:]: if p.find(":_(") >= 0: out.write(" %s " % p.split(":", 1)[1]) else: out.write(blankout(p, "F")) elif t.token_type == TokenType.COMMENT: if t.contents.lstrip().startswith(TRANSLATOR_COMMENT_MARK): lineno_comment_map.setdefault(t.lineno, []).append(t.contents) comment_lineno_cache = t.lineno else: out.write(blankout(t.contents, "X")) return out.getvalue()