# (c) 2013-2018 Sebastian Humenda # This code is licenced under the terms of the LGPL-3+, see the file COPYING for # more details. """This module contains functionality to parse formulas from a given Pandoc document AST and to replace these through formatted HTML equations. Even though this could be done in a single run, this would conflict with the internal GleeTeX structure and allows for an easy parallelisation of the formula conversion.""" import json from .htmlhandling import ParseException def __extract_formulas(formulas, ast): """Recursively extract 'Math' elements from the given AST and add them to `formulas (list)`.""" if isinstance(ast, list): for item in ast: __extract_formulas(formulas, item) elif isinstance(ast, dict): if 't' in ast and ast['t'] == 'Math': style, formula = ast['c'] # style = {'t': 'blah'} -> we want blah style = next(iter(style.values())) if style not in ['InlineMath', 'DisplayMath']: raise ParseException("[pandoc] unknown formula formatting: " + \ repr(ast['c'])) style = (True if style == 'DisplayMath' else False) # position is None (only applicable for HTML parsing) formulas.append((None, style, formula)) elif 'c' in ast: __extract_formulas(formulas, ast['c']) # ^ all other cases do not matter def extract_formulas(ast): """Extract formulas from a given Pandoc document AST. The returned formulas are typed like those form the HTML parser, therefore the first argument of the tuple is unused and hence None. :param ast Structure of lists and dicts representing a Pandoc document AST :return a list of formulas where each formula is (None, style, formula)""" formulas = [] __extract_formulas(formulas, ast['blocks']) return formulas def replace_formulas_in_ast(formatter, ast, formulas): """replace 'Math' elements from the given AST with a formatted variant Each 'Math' element found in the Pandoc AST will be replaced through a formatted (HTML) image link. The formulas are taken from the supplied formulas list. The number of formulas in the document has to match the number of formulas form the list.""" if not formulas: return if isinstance(ast, list): for item in ast: replace_formulas_in_ast(formatter, item, formulas) elif isinstance(ast, dict): if 't' in ast and ast['t'] == 'Math': ast['t'] = 'RawInline' # raw HTML eqn = formulas.pop(0) ast['c'] = ['html',formatter.format(eqn['pos'], eqn['formula'], eqn['path'], eqn['displaymath'])] elif 'c' in ast: replace_formulas_in_ast(formatter, ast['c'], formulas) # ^ ignore all other cases def write_pandoc_ast(file, document, formatter): """Replace 'Math' elements from a Pandoc AST with 'RawInline' elements, containing formatted HTML image tags. :param formatter A formatter offering the "format" method (see ImageFormatter) :param formulas A list of formulas with the information (pos, formula, path, displaymath) :param ast Document ast to modified""" ast, formulas = document replace_formulas_in_ast(formatter, ast['blocks'], formulas) file.write(json.dumps(ast))