"""This module contains for functions for parsing markdown."""
import re
from collections import OrderedDict
BLOCKS = ["h", "code", "div", "figure", "video", "li"]
PATTERNS = OrderedDict((
(r"<", "<"),
(r">", ">"),
(r"\n### (.+)", "\n<h3>\\1</h3>"),
(r"\n## (.+)", "\n<h2>\\1</h2>"),
(r"\n# (.+)", "\n<h1>\\1</h1>"),
(r"\~\~(.+?)\~\~", "<del>\\1</del>"),
(r"\*\*(.+?)\*\*", "<strong>\\1</strong>"),
(r"\*(.+?)\*", "<em>\\1</em>"),
(r"\n- *(.+)", "\n<li>\\1</li>u"),
(r"\n\d+\. *(.+)", "\n<li>\\1</li>o"),
(r"\n\!\[(.+?)\]\[(.+?)\]\((.+?)\)", "\n<figure><img src=\"\\3\" "
"title=\"\\1\"><figcaption>\\2</figcaption></figure>"),
(r"\n\!\[(.+?)\]\((.+?)\)", "\n<figure><img src=\"\\2\" title=\"\\1\"></figure>"),
(r"\n\!\((.+?)\)", "\n<video src=\"\\1\" controls></video>"),
(r"\n\!\{(.+?)\}", "\n<div class=\"youtube\">""<iframe src=\"//www.youtube.com"
"/embed/\\1/\" frameborder=\"0\" allowfullscreen></iframe></div>"),
(r"\[(.+?)\]\(((.(?!\"))+?)\)", "<a href=\"\\2\">\\1</a>"),
(r"\[(.+?)\]\((.+?) \"(.+)\"\)", "<a href=\"\\2\" title=\"\\3\">\\1</a>"),
(r"\{(.+?)\}\(((.(?!\"))+?)\)", "<a href=\"\\2\" target=\"_blank\">\\1</a>"),
(r"\{(.+?)\}\((.+?) \"(.+)\"\)", "<a href=\"\\2\" target=\"_blank\" "
"title=\"\\3\">\\1</a>"),
(r"\{(.+?)\}\((.+?)\)", "<a href=\"\\2\" target=\"_blank\">\\1</a>"),
(r"```(.+?)\n([\S\s]+?)\n```", "<pre><code data-language=\"\\1\">\\2</code></pre>"),
(r"```\n([\S\s]+?)\n```", "<pre><code>\\1</code></pre>"),
(r"`([\S\s]+?)`", "<code>\\1</code>")
))
[docs]def markdown_to_html(markdown, paths=None):
"""Takes a string in markdown, and converts it to HTML.
:param str markdown: The maekdown to convert.
:param dict paths: If given, any paths will be replaced using this lookup.
:rtype: ``str``"""
markdown, characters = escape_characters(markdown)
html = "\n" + markdown
for key, value in PATTERNS.items():
html = re.sub(key, value, html)
if paths:
for k, v in paths.items():
html = html.replace('src="{}"'.format(k), 'src="{}"'.format(v))
for character in characters:
html = html.replace("\x1A", character, 1)
lines = html.splitlines()
lines = add_p_tags(lines)
lines = add_list_tags(lines)
return "\n".join(lines)
[docs]def escape_characters(markdown):
"""Takes some markdown and replaces escaped characters with the substition
character. This is returned along with a list of escaped characters.
You cannot escape line breaks - the backslash will be removed but not the
line break.
:param str markdown: The string to break up.
:rtype: ``str``, ``list``"""
characters = []
while "\\" in markdown:
loc = markdown.find("\\")
if loc != len(markdown) - 1:
character = markdown[loc + 1]
if character != "\n":
characters.append(character)
markdown = markdown[:loc] + "\x1A" + markdown[loc + 2:]
else:
markdown = markdown[:loc] + markdown[loc + 1:]
else:
break
return markdown, characters