Source code for docupy.markdown

"""This module contains for functions for parsing markdown."""

import re
from collections import OrderedDict

BLOCKS = ["h", "code", "div", "figure", "video", "li"]

PATTERNS = OrderedDict((
 (r"<", "&#60;"),
 (r">", "&#62;"),
 (r"\n### (.+)", "\n<h3>\\1</h3>"),
 (r"\n## (.+)", "\n<h2>\\1</h2>"),
 (r"\n# (.+)", "\n<h1>\\1</h1>"),
 (r"\~\~(.+?)\~\~", "<del>\\1</del>"),
 (r"\*\*(.+?)\*\*", "<strong>\\1</strong>"),
 (r"\*(.+?)\*", "<em>\\1</em>"),
 (r"\n- *(.+)", "\n<li>\\1</li>u"),
 (r"\n\d+\. *(.+)", "\n<li>\\1</li>o"),
 (r"\n\!\[(.+?)\]\[(.+?)\]\((.+?)\)", "\n<figure><img src=\"\\3\" "
  "title=\"\\1\"><figcaption>\\2</figcaption></figure>"),
 (r"\n\!\[(.+?)\]\((.+?)\)", "\n<figure><img src=\"\\2\" title=\"\\1\"></figure>"),
 (r"\n\!\((.+?)\)", "\n<video src=\"\\1\" controls></video>"),
 (r"\n\!\{(.+?)\}", "\n<div class=\"youtube\">""<iframe src=\"//www.youtube.com"
  "/embed/\\1/\" frameborder=\"0\" allowfullscreen></iframe></div>"),
 (r"\[(.+?)\]\(((.(?!\"))+?)\)", "<a href=\"\\2\">\\1</a>"),
 (r"\[(.+?)\]\((.+?) \"(.+)\"\)", "<a href=\"\\2\" title=\"\\3\">\\1</a>"),
 (r"\{(.+?)\}\(((.(?!\"))+?)\)", "<a href=\"\\2\" target=\"_blank\">\\1</a>"),
 (r"\{(.+?)\}\((.+?) \"(.+)\"\)", "<a href=\"\\2\" target=\"_blank\" "
  "title=\"\\3\">\\1</a>"),
 (r"\{(.+?)\}\((.+?)\)", "<a href=\"\\2\" target=\"_blank\">\\1</a>"),
 (r"```(.+?)\n([\S\s]+?)\n```", "<pre><code data-language=\"\\1\">\\2</code></pre>"),
 (r"```\n([\S\s]+?)\n```", "<pre><code>\\1</code></pre>"),
 (r"`([\S\s]+?)`", "<code>\\1</code>")
))

[docs]def markdown_to_html(markdown, paths=None): """Takes a string in markdown, and converts it to HTML. :param str markdown: The maekdown to convert. :param dict paths: If given, any paths will be replaced using this lookup. :rtype: ``str``""" markdown, characters = escape_characters(markdown) html = "\n" + markdown for key, value in PATTERNS.items(): html = re.sub(key, value, html) if paths: for k, v in paths.items(): html = html.replace('src="{}"'.format(k), 'src="{}"'.format(v)) for character in characters: html = html.replace("\x1A", character, 1) lines = html.splitlines() lines = add_p_tags(lines) lines = add_list_tags(lines) return "\n".join(lines)
[docs]def escape_characters(markdown): """Takes some markdown and replaces escaped characters with the substition character. This is returned along with a list of escaped characters. You cannot escape line breaks - the backslash will be removed but not the line break. :param str markdown: The string to break up. :rtype: ``str``, ``list``""" characters = [] while "\\" in markdown: loc = markdown.find("\\") if loc != len(markdown) - 1: character = markdown[loc + 1] if character != "\n": characters.append(character) markdown = markdown[:loc] + "\x1A" + markdown[loc + 2:] else: markdown = markdown[:loc] + markdown[loc + 1:] else: break return markdown, characters
[docs]def add_p_tags(lines): """Takes a list of HTML lines and puts p tags around all the lines that need it. It will also remove empty lines. :param list lines: the lines to convert. :rtype: ``list``""" output_lines = [] in_code = False for line in lines: if line.strip(): if line.strip().startswith("<pre"): in_code = True for b in BLOCKS: if line.startswith("<" + b) or line.startswith("</" + b)\ or in_code: break else: line = "<p>{}</p>".format(line) if line.strip().endswith("</pre>"): in_code = False output_lines.append(line) elif in_code: output_lines.append(line) return output_lines
[docs]def add_list_tags(lines): """Takes a list of HTML lines and puts list tags around all the lines that need it. :param list lines: the lines to convert. :rtype: ``list``""" output_lines = [] list_type = "u" while lines: if lines[0][:3] == "<li" and output_lines[-1][:3] != "<li": list_type = lines[0][-1] output_lines.append("<{}l>".format(list_type)) if lines[0][:3] == "<li": output_lines.append(lines.pop(0)[:-1]) else: output_lines.append(lines.pop(0)) if output_lines[-1][:3] == "<li": if not len(lines) or lines[0][:3] != "<li": output_lines.append("</{}l>".format(list_type)) return output_lines