tags.
"""
yield 0, ""
yield from inner
yield 0, ""
def _add_newline(self, inner):
# Add newlines around the inner contents so that _strict_tag_block_re matches the outer div.
yield 0, "\n"
yield from inner
yield 0, "\n"
def wrap(self, source, outfile=None):
"""Return the source with a code, pre, and div."""
if outfile is None:
# pygments >= 2.12
return self._add_newline(self._wrap_pre(self._wrap_code(source)))
else:
# pygments < 2.12
return self._wrap_div(self._add_newline(self._wrap_pre(self._wrap_code(source))))
formatter_opts.setdefault("cssclass", "codehilite")
formatter = HtmlCodeFormatter(**formatter_opts)
return pygments.highlight(codeblock, lexer, formatter)
def _code_block_sub(self, match: re.Match) -> str:
codeblock = match.group(1)
codeblock = self._outdent(codeblock)
codeblock = self._detab(codeblock)
codeblock = codeblock.lstrip('\n') # trim leading newlines
codeblock = codeblock.rstrip() # trim trailing whitespace
pre_class_str = self._html_class_str_from_tag("pre")
code_class_str = self._html_class_str_from_tag("code")
codeblock = self._encode_code(codeblock)
return "\n{}\n
\n".format(
pre_class_str, code_class_str, codeblock)
def _html_class_str_from_tag(self, tag: str) -> str:
"""Get the appropriate ' class="..."' string (note the leading
space), if any, for the given tag.
"""
if "html-classes" not in self.extras:
return ""
try:
html_classes_from_tag = self.extras["html-classes"]
except TypeError:
return ""
else:
if isinstance(html_classes_from_tag, dict):
if tag in html_classes_from_tag:
return ' class="%s"' % html_classes_from_tag[tag]
return ""
@mark_stage(Stage.CODE_BLOCKS)
def _do_code_blocks(self, text: str) -> str:
"""Process Markdown `` blocks."""
code_block_re = re.compile(r'''
(?:\n\n|\A\n?)
( # $1 = the code block -- one or more lines, starting with a space/tab
(?:
(?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
.*\n+
)+
)
((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
# Lookahead to make sure this block isn't already in a code block.
# Needed when syntax highlighting is being used.
(?!([^<]|<(/?)span)*\)
''' % (self.tab_width, self.tab_width),
re.M | re.X)
return code_block_re.sub(self._code_block_sub, text)
# Rules for a code span:
# - backslash escapes are not interpreted in a code span
# - to include one or or a run of more backticks the delimiters must
# be a longer run of backticks
# - cannot start or end a code span with a backtick; pad with a
# space and that space will be removed in the emitted HTML
# See `test/tm-cases/escapes.text` for a number of edge-case
# examples.
_code_span_re = re.compile(r'''
(? str:
c = match.group(2).strip(" \t")
c = self._encode_code(c)
return "{}".format(self._html_class_str_from_tag("code"), c)
@mark_stage(Stage.CODE_SPANS)
def _do_code_spans(self, text: str) -> str:
# * Backtick quotes are used for spans.
#
# * You can use multiple backticks as the delimiters if you want to
# include literal backticks in the code span. So, this input:
#
# Just type ``foo `bar` baz`` at the prompt.
#
# Will translate to:
#
# Just type foo `bar` baz at the prompt.
#
# There's no arbitrary limit to the number of backticks you
# can use as delimters. If you need three consecutive backticks
# in your code, use four for delimiters, etc.
#
# * You can use spaces to get literal backticks at the edges:
#
# ... type `` `bar` `` ...
#
# Turns to:
#
# ... type `bar` ...
return self._code_span_re.sub(self._code_span_sub, text)
def _encode_code(self, text: str) -> str:
"""Encode/escape certain characters inside Markdown code runs.
The point is that in code, these characters are literals,
and lose their special Markdown meanings.
"""
replacements = [
# Encode all ampersands; HTML entities are not
# entities within a Markdown code span.
('&', '&'),
# Do the angle bracket song and dance:
('<', '<'),
('>', '>'),
]
for before, after in replacements:
text = text.replace(before, after)
hashed = _hash_text(text)
self._code_table[text] = hashed
return hashed
_strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]?)(?<=\S)\1", re.S)
_em_re = re.compile(r"(\*|_)(?=\S)(.*?\S)\1", re.S)
@mark_stage(Stage.ITALIC_AND_BOLD)
def _do_italics_and_bold(self, text: str) -> str:
# must go first:
text = self._strong_re.sub(r"\2", text)
text = self._em_re.sub(r"\2", text)
return text
_block_quote_base = r'''
( # Wrap whole match in \1
(
^[ \t]*>%s[ \t]? # '>' at the start of a line
.+\n # rest of the first line
(.+\n)* # subsequent consecutive lines
)+
)
'''
_block_quote_re = re.compile(_block_quote_base % '', re.M | re.X)
_block_quote_re_spoiler = re.compile(_block_quote_base % '[ \t]*?!?', re.M | re.X)
_bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M)
_bq_one_level_re_spoiler = re.compile('^[ \t]*>[ \t]*?![ \t]?', re.M)
_bq_all_lines_spoilers = re.compile(r'\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z', re.M)
_html_pre_block_re = re.compile(r'(\s*.+?
)', re.S)
def _dedent_two_spaces_sub(self, match: re.Match) -> str:
return re.sub(r'(?m)^ ', '', match.group(1))
def _block_quote_sub(self, match: re.Match) -> str:
bq = match.group(1)
is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq)
# trim one level of quoting
if is_spoiler:
bq = self._bq_one_level_re_spoiler.sub('', bq)
else:
bq = self._bq_one_level_re.sub('', bq)
# trim whitespace-only lines
bq = self._ws_only_line_re.sub('', bq)
bq = self._run_block_gamut(bq) # recurse
bq = re.sub('(?m)^', ' ', bq)
# These leading spaces screw with content, so we need to fix that:
bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
if is_spoiler:
return '\n%s\n
\n\n' % bq
else:
return '\n%s\n
\n\n' % bq
@mark_stage(Stage.BLOCK_QUOTES)
def _do_block_quotes(self, text: str) -> str:
if '>' not in text:
return text
if 'spoiler' in self.extras:
return self._block_quote_re_spoiler.sub(self._block_quote_sub, text)
else:
return self._block_quote_re.sub(self._block_quote_sub, text)
@mark_stage(Stage.PARAGRAPHS)
def _form_paragraphs(self, text: str) -> str:
# Strip leading and trailing lines:
text = text.strip('\n')
# Wrap tags.
grafs = []
for i, graf in enumerate(re.split(r"\n{2,}", text)):
if graf in self.html_blocks:
# Unhashify HTML blocks
grafs.append(self.html_blocks[graf])
else:
cuddled_list = None
if "cuddled-lists" in self.extras:
# Need to put back trailing '\n' for `_list_item_re`
# match at the end of the paragraph.
li = self._list_item_re.search(graf + '\n')
# Two of the same list marker in this paragraph: a likely
# candidate for a list cuddled to preceding paragraph
# text (issue 33). Note the `[-1]` is a quick way to
# consider numeric bullets (e.g. "1." and "2.") to be
# equal.
if (li and len(li.group(2)) <= 3
and (
(li.group("next_marker") and li.group("marker")[-1] == li.group("next_marker")[-1])
or
li.group("next_marker") is None
)
):
start = li.start()
cuddled_list = self._do_lists(graf[start:]).rstrip("\n")
if re.match(r'^<(?:ul|ol).*?>', cuddled_list):
graf = graf[:start]
else:
# Not quite a cuddled list. (See not_quite_a_list_cuddled_lists test case)
# Store as a simple paragraph.
graf = cuddled_list
cuddled_list = None
# Wrap
tags.
graf = self._run_span_gamut(graf)
grafs.append("
" % self._html_class_str_from_tag('p') + graf.lstrip(" \t") + "
")
if cuddled_list:
grafs.append(cuddled_list)
return "\n\n".join(grafs)
def _add_footnotes(self, text: str) -> str:
if self.footnotes:
footer = [
'')
return text + '\n\n' + '\n'.join(footer)
else:
return text
_naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I)
_naked_gt_re = re.compile(r'''(?''', re.I)
def _encode_amps_and_angles(self, text: str) -> str:
# Smart processing for ampersands and angle brackets that need
# to be encoded.
text = _AMPERSAND_RE.sub('&', text)
# Encode naked <'s
text = self._naked_lt_re.sub('<', text)
# Encode naked >'s
# Note: Other markdown implementations (e.g. Markdown.pl, PHP
# Markdown) don't do this.
text = self._naked_gt_re.sub('>', text)
return text
_incomplete_tags_re = re.compile(r"<(!--|/?\w+?(?!\w)\s*?.+?(?:[\s/]+?|$))")
def _encode_incomplete_tags(self, text: str) -> str:
if self.safe_mode not in ("replace", "escape"):
return text
if self._is_auto_link(text):
return text # this is not an incomplete tag, this is a link in the form
def incomplete_tags_sub(match):
return match.group().replace('<', '<')
return self._incomplete_tags_re.sub(incomplete_tags_sub, text)
def _encode_backslash_escapes(self, text: str) -> str:
for ch, escape in list(self._escape_table.items()):
text = text.replace("\\"+ch, escape)
return text
_auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
def _auto_link_sub(self, match: re.Match) -> str:
g1 = match.group(1)
return '{}'.format(self._protect_url(g1), g1)
_auto_email_link_re = re.compile(r"""
<
(?:mailto:)?
(
[-.\w]+
\@
[-\w]+(\.[-\w]+)*\.[a-z]+
)
>
""", re.I | re.X | re.U)
def _auto_email_link_sub(self, match: re.Match) -> str:
return self._encode_email_address(
self._unescape_special_chars(match.group(1)))
def _do_auto_links(self, text: str) -> str:
text = self._auto_link_re.sub(self._auto_link_sub, text)
text = self._auto_email_link_re.sub(self._auto_email_link_sub, text)
return text
def _encode_email_address(self, addr: str) -> str:
# Input: an email address, e.g. "foo@example.com"
#
# Output: the email address as a mailto link, with each character
# of the address encoded as either a decimal or hex entity, in
# the hopes of foiling most address harvesting spam bots. E.g.:
#
# foo
# @example.com
#
# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
# mailing list:
chars = [_xml_encode_email_char_at_random(ch)
for ch in "mailto:" + addr]
# Strip the mailto: from the visible part.
addr = '%s' \
% (''.join(chars), ''.join(chars[7:]))
return addr
def _unescape_special_chars(self, text: str) -> str:
# Swap back in all the special characters we've hidden.
hashmap = tuple(self._escape_table.items()) + tuple(self._code_table.items())
# html_blocks table is in format {hash: item} compared to usual {item: hash}
hashmap += tuple(tuple(reversed(i)) for i in self.html_blocks.items())
while True:
orig_text = text
for ch, hash in hashmap:
text = text.replace(hash, ch)
if text == orig_text:
break
return text
def _outdent(self, text: str) -> str:
# Remove one level of line-leading tabs or spaces
return self._outdent_re.sub('', text)
def _hash_span(self, text: str) -> str:
'''
Wrapper around `_hash_text` that also adds the hash to `self.hash_spans`,
meaning it will be automatically unhashed during conversion.
'''
key = _hash_text(text)
self.html_spans[key] = text
return key
@staticmethod
def _uniform_outdent(
text: str,
min_outdent: Optional[str] = None,
max_outdent: Optional[str] = None
) -> tuple[str, str]:
'''
Removes the smallest common leading indentation from each (non empty)
line of `text` and returns said indent along with the outdented text.
Args:
min_outdent: make sure the smallest common whitespace is at least this size
max_outdent: the maximum amount a line can be outdented by
'''
# find the leading whitespace for every line
whitespace: list[Union[str, None]] = [
re.findall(r'^[ \t]*', line)[0] if line else None
for line in text.splitlines()
]
whitespace_not_empty = [i for i in whitespace if i is not None]
# if no whitespace detected (ie: no lines in code block, issue #505)
if not whitespace_not_empty:
return '', text
# get minimum common whitespace
outdent = min(whitespace_not_empty)
# adjust min common ws to be within bounds
if min_outdent is not None:
outdent = min([i for i in whitespace_not_empty if i >= min_outdent] or [min_outdent])
if max_outdent is not None:
outdent = min(outdent, max_outdent)
outdented = []
for line_ws, line in zip(whitespace, text.splitlines(True)):
if line.startswith(outdent):
# if line starts with smallest common ws, dedent it
outdented.append(line.replace(outdent, '', 1))
elif line_ws is not None and line_ws < outdent:
# if less indented than min common whitespace then outdent as much as possible
outdented.append(line.replace(line_ws, '', 1))
else:
outdented.append(line)
return outdent, ''.join(outdented)
@staticmethod
def _uniform_indent(
text: str,
indent: str,
include_empty_lines: bool = False,
indent_empty_lines: bool = False
) -> str:
'''
Uniformly indent a block of text by a fixed amount
Args:
text: the text to indent
indent: a string containing the indent to apply
include_empty_lines: don't remove whitespace only lines
indent_empty_lines: indent whitespace only lines with the rest of the text
'''
blocks = []
for line in text.splitlines(True):
if line.strip() or indent_empty_lines:
blocks.append(indent + line)
elif include_empty_lines:
blocks.append(line)
else:
blocks.append('')
return ''.join(blocks)
@staticmethod
def _match_overlaps_substr(text, match: re.Match, substr: str) -> bool:
'''
Checks if a regex match overlaps with a substring in the given text.
'''
for instance in re.finditer(re.escape(substr), text):
start, end = instance.span()
if start <= match.start() <= end:
return True
if start <= match.end() <= end:
return True
return False
class MarkdownWithExtras(Markdown):
"""A markdowner class that enables most extras:
- footnotes
- fenced-code-blocks (only highlights code if 'pygments' Python module on path)
These are not included:
- pyshell (specific to Python-related documenting)
- code-friendly (because it *disables* part of the syntax)
- link-patterns (because you need to specify some actual
link-patterns anyway)
"""
extras = ["footnotes", "fenced-code-blocks"] # type: ignore
# ----------------------------------------------------------
# Extras
# ----------------------------------------------------------
# Base classes
# ----------------------------------------------------------
class Extra(ABC):
_registry: dict[str, type['Extra']] = {}
_exec_order: dict[Stage, tuple[list[type['Extra']], list[type['Extra']]]] = {}
name: str
'''
An identifiable name that users can use to invoke the extra
in the Markdown class
'''
order: tuple[Collection[Union[Stage, type['Extra']]], Collection[Union[Stage, type['Extra']]]]
'''
Tuple of two iterables containing the stages/extras this extra will run before and
after, respectively
'''
def __init__(self, md: Markdown, options: Optional[dict]):
'''
Args:
md: An instance of `Markdown`
options: a dict of settings to alter the extra's behaviour
'''
self.md = md
self.options = options if options is not None else {}
@classmethod
def deregister(cls):
'''
Removes the class from the extras registry and unsets its execution order.
'''
if cls.name in cls._registry:
del cls._registry[cls.name]
for exec_order in Extra._exec_order.values():
# find everywhere this extra is mentioned and remove it
for section in exec_order:
while cls in section:
section.remove(cls)
@classmethod
def register(cls):
'''
Registers the class for use with `Markdown` and calculates its execution order based on
the `order` class attribute.
'''
cls._registry[cls.name] = cls
for index, item in enumerate((*cls.order[0], *cls.order[1])):
before = index < len(cls.order[0])
if not isinstance(item, Stage) and issubclass(item, Extra):
# eg: FencedCodeBlocks
for exec_orders in Extra._exec_order.values():
# insert this extra everywhere the other one is mentioned
for section in exec_orders:
if item in section:
to_index = section.index(item)
if not before:
to_index += 1
section.insert(to_index, cls)
else:
# eg: Stage.PREPROCESS
Extra._exec_order.setdefault(item, ([], []))
if cls in Extra._exec_order[item][0 if before else 1]:
# extra is already runnig after this stage. Don't duplicate that effort
continue
if before:
Extra._exec_order[item][0].insert(0, cls)
else:
Extra._exec_order[item][1].append(cls)
@abstractmethod
def run(self, text: str) -> str:
'''
Run the extra against the given text.
Returns:
The new text after being modified by the extra
'''
...
def test(self, text: str) -> bool:
'''
Check a section of markdown to see if this extra should be run upon it.
The default implementation will always return True but it's recommended to override
this behaviour to improve performance.
'''
return True
class ItalicAndBoldProcessor(Extra):
'''
An ABC that provides hooks for dealing with italics and bold syntax.
This class is set to trigger both before AND after the italics and bold stage.
This allows any child classes to intercept instances of bold or italic syntax and
change the output or hash it to prevent it from being processed.
After the I&B stage any hashes in the `hash_tables` instance variable are replaced.
'''
name = 'italic-and-bold-processor'
order = (Stage.ITALIC_AND_BOLD,), (Stage.ITALIC_AND_BOLD,)
strong_re = Markdown._strong_re
em_re = Markdown._em_re
def __init__(self, md: Markdown, options: Optional[dict]):
super().__init__(md, options)
self.hash_table = {}
def run(self, text):
if self.md.order < Stage.ITALIC_AND_BOLD:
text = self.strong_re.sub(self.sub, text)
text = self.em_re.sub(self.sub, text)
else:
# push any hashed values back, using a while loop to deal with recursive hashes
orig_text = ''
while orig_text != text:
orig_text = text
for key, substr in self.hash_table.items():
text = text.replace(key, substr)
return text
@abstractmethod
def sub(self, match: re.Match) -> str:
# do nothing. Let `Markdown._do_italics_and_bold` do its thing later
return match.string[match.start(): match.end()]
def sub_hash(self, match: re.Match) -> str:
substr = match.string[match.start(): match.end()]
key = _hash_text(substr)
self.hash_table[key] = substr
return key
def test(self, text):
if self.md.order < Stage.ITALIC_AND_BOLD:
return '*' in text or '_' in text
return self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
class _LinkProcessorExtraOpts(TypedDict, total=False):
'''Options for the `LinkProcessor` extra'''
tags: List[str]
'''List of tags to be processed by the extra. Default is `['a', 'img']`'''
inline: bool
'''Whether to process inline links. Default: True'''
ref: bool
'''Whether to process reference links. Default: True'''
class LinkProcessor(Extra):
name = 'link-processor'
order = (Stage.ITALIC_AND_BOLD,), (Stage.ESCAPE_SPECIAL,)
options: _LinkProcessorExtraOpts
def __init__(self, md: Markdown, options: Optional[dict]):
options = options or {}
super().__init__(md, options)
def parse_inline_anchor_or_image(self, text: str, _link_text: str, start_idx: int) -> Optional[Tuple[str, str, Optional[str], int]]:
'''
Parse a string and extract a link from it. This can be an inline anchor or an image.
Args:
text: the whole text containing the link
link_text: the human readable text inside the link
start_idx: the index of the link within `text`
Returns:
None if a link was not able to be parsed from `text`.
If successful, a tuple is returned containing:
1. potentially modified version of the `text` param
2. the URL
3. the title (can be None if not present)
4. the index where the link ends within text
'''
idx = self.md._find_non_whitespace(text, start_idx + 1)
if idx == len(text):
return
end_idx = idx
has_anglebrackets = text[idx] == "<"
if has_anglebrackets:
end_idx = self.md._find_balanced(text, end_idx+1, "<", ">")
end_idx = self.md._find_balanced(text, end_idx, "(", ")")
match = self.md._inline_link_title.search(text, idx, end_idx)
if not match:
return
url, title = text[idx:match.start()], match.group("title")
if has_anglebrackets:
url = self.md._strip_anglebrackets.sub(r'\1', url)
return text, url, title, end_idx
def process_link_shortrefs(self, text: str, link_text: str, start_idx: int) -> Tuple[Optional[re.Match], str]:
'''
Detects shortref links within a string and converts them to normal references
Args:
text: the whole text containing the link
link_text: the human readable text inside the link
start_idx: the index of the link within `text`
Returns:
A tuple containing:
1. A potential `re.Match` against the link reference within `text` (will be None if not found)
2. potentially modified version of the `text` param
'''
match = None
# check if there's no tailing id section
if link_text and re.match(r'[ ]?(?:\n[ ]*)?(?!\[)', text[start_idx:]):
# try a match with `[]` inserted into the text
match = self.md._tail_of_reference_link_re.match(f'{text[:start_idx]}[]{text[start_idx:]}', start_idx)
if match:
# if we get a match, we'll have to modify the `text` variable to insert the `[]`
# but we ONLY want to do that if the link_id is valid. This makes sure that we
# don't get stuck in any loops and also that when a user inputs `[abc]` we don't
# output `[abc][]` in the final HTML
if (match.group("id").lower() or link_text.lower()) in self.md.urls:
text = f'{text[:start_idx]}[]{text[start_idx:]}'
else:
match = None
return match, text
def parse_ref_anchor_or_ref_image(self, text: str, link_text: str, start_idx: int) -> Optional[Tuple[str, Optional[str], Optional[str], int]]:
'''
Parse a string and extract a link from it. This can be a reference anchor or image.
Args:
text: the whole text containing the link
link_text: the human readable text inside the link
start_idx: the index of the link within `text`
Returns:
None if a link was not able to be parsed from `text`.
If successful, a tuple is returned containing:
1. potentially modified version of the `text` param
2. the URL (can be None if the reference doesn't exist)
3. the title (can be None if not present)
4. the index where the link ends within text
'''
match = None
if 'link-shortrefs' in self.md.extras:
match, text = self.process_link_shortrefs(text, link_text, start_idx)
match = match or self.md._tail_of_reference_link_re.match(text, start_idx)
if not match:
# text isn't markup
return
link_id = match.group("id").lower() or link_text.lower() # for links like [this][]
url = self.md.urls.get(link_id)
title = self.md.titles.get(link_id)
url_end_idx = match.end()
return text, url, title, url_end_idx
def process_image(self, url: str, title_attr: str, link_text: str) -> Tuple[str, int]:
'''
Takes a URL, title and link text and returns an HTML `
` tag
Args:
url: the image URL/src
title_attr: a string containing the title attribute of the tag (eg: `' title="..."'`)
link_text: the human readable text portion of the link
Returns:
A tuple containing:
1. The HTML string
2. The length of the opening HTML tag in the string. For `
` it's the whole string.
This section will be skipped by the link processor
'''
img_class_str = self.md._html_class_str_from_tag("img")
result = (
f'
Tuple[str, int]:
'''
Takes a URL, title and link text and returns an HTML `` tag
Args:
url: the URL
title_attr: a string containing the title attribute of the tag (eg: `' title="..."'`)
link_text: the human readable text portion of the link
Returns:
A tuple containing:
1. The HTML string
2. The length of the opening HTML tag in the string. This section will be skipped
by the link processor
'''
if self.md.safe_mode and not self.md._safe_href.match(url):
result_head = f''
else:
result_head = f''
return f'{result_head}{link_text}', len(result_head)
def run(self, text: str):
MAX_LINK_TEXT_SENTINEL = 3000 # markdown2 issue 24
# `anchor_allowed_pos` is used to support img links inside
# anchors, but not anchors inside anchors. An anchor's start
# pos must be `>= anchor_allowed_pos`.
anchor_allowed_pos = 0
curr_pos = 0
while True:
# The next '[' is the start of:
# - an inline anchor: [text](url "title")
# - a reference anchor: [text][id]
# - an inline img: 
# - a reference img: ![text][id]
# - a footnote ref: [^id]
# (Only if 'footnotes' extra enabled)
# - a footnote defn: [^id]: ...
# (Only if 'footnotes' extra enabled) These have already
# been stripped in _strip_footnote_definitions() so no
# need to watch for them.
# - a link definition: [id]: url "title"
# These have already been stripped in
# _strip_link_definitions() so no need to watch for them.
# - not markup: [...anything else...
try:
start_idx = text.index('[', curr_pos)
except ValueError:
break
text_length = len(text)
# Find the matching closing ']'.
# Markdown.pl allows *matching* brackets in link text so we
# will here too. Markdown.pl *doesn't* currently allow
# matching brackets in img alt text -- we'll differ in that
# regard.
bracket_depth = 0
for p in range(
start_idx + 1,
min(start_idx + MAX_LINK_TEXT_SENTINEL, text_length)
):
ch = text[p]
if ch == ']':
bracket_depth -= 1
if bracket_depth < 0:
break
elif ch == '[':
bracket_depth += 1
else:
# Closing bracket not found within sentinel length.
# This isn't markup.
curr_pos = start_idx + 1
continue
link_text = text[start_idx + 1: p]
# Fix for issue 341 - Injecting XSS into link text
if self.md.safe_mode:
link_text = self.md._hash_html_spans(link_text)
link_text = self.md._unhash_html_spans(link_text)
# Possibly a footnote ref?
if "footnotes" in self.md.extras and link_text.startswith("^"):
normed_id = re.sub(r'\W', '-', link_text[1:])
if normed_id in self.md.footnotes:
result = (
f''
)
text = text[:start_idx] + result + text[p+1:]
else:
# This id isn't defined, leave the markup alone.
curr_pos = p + 1
continue
# Now determine what this is by the remainder.
p += 1
# -- Extract the URL, title and end index from the link
# inline anchor or inline img
if text[p:p + 1] == '(':
if not self.options.get('inline', True):
curr_pos = start_idx + 1
continue
parsed = self.parse_inline_anchor_or_image(text, link_text, p)
if not parsed:
# text isn't markup
curr_pos = start_idx + 1
continue
text, url, title, url_end_idx = parsed
url = self.md._unhash_html_spans(url, code=True)
# reference anchor or reference img
else:
if not self.options.get('ref', True):
curr_pos = start_idx + 1
continue
parsed = self.parse_ref_anchor_or_ref_image(text, link_text, p)
if not parsed:
curr_pos = start_idx + 1
continue
text, url, title, url_end_idx = parsed
if url is None:
# This id isn't defined, leave the markup alone.
# set current pos to end of link title and continue from there
curr_pos = p
continue
# -- Encode and hash the URL and title to avoid conflicts with italics/bold
url = (
url
.replace('*', self.md._escape_table['*'])
.replace('_', self.md._escape_table['_'])
)
if title:
title = (
_xml_escape_attr(title)
.replace('*', self.md._escape_table['*'])
.replace('_', self.md._escape_table['_'])
)
title_str = f' title="{title}"'
else:
title_str = ''
# -- Process the anchor/image
is_img = start_idx > 0 and text[start_idx-1] == "!"
if is_img:
if 'img' not in self.options.get('tags', ['img']):
curr_pos = start_idx + 1
continue
start_idx -= 1
result, skip = self.process_image(url, title_str, link_text)
elif start_idx >= anchor_allowed_pos:
if 'a' not in self.options.get('tags', ['a']):
curr_pos = start_idx + 1
continue
result, skip = self.process_anchor(url, title_str, link_text)
else:
# anchor not allowed here/invalid markup
curr_pos = start_idx + 1
continue
if "smarty-pants" in self.md.extras:
result = result.replace('"', self.md._escape_table['"'])
#
allowed from curr_pos onwards, allowed from anchor_allowed_pos onwards.
# this means images can exist within `` tags but anchors can only come after the
# current anchor has been closed
curr_pos = start_idx + skip
anchor_allowed_pos = start_idx + len(result)
text = text[:start_idx] + result + text[url_end_idx:]
return text
def test(self, text):
return '(' in text or '[' in text
# User facing extras
# ----------------------------------------------------------
class Admonitions(Extra):
'''
Enable parsing of RST admonitions
'''
name = 'admonitions'
order = (Stage.BLOCK_GAMUT, Stage.LINK_DEFS), ()
admonitions = r'admonition|attention|caution|danger|error|hint|important|note|tip|warning'
admonitions_re = re.compile(r'''
^(\ *)\.\.\ (%s)::\ * # $1 leading indent, $2 the admonition
(.*)? # $3 admonition title
((?:\s*\n\1\ {3,}.*)+?) # $4 admonition body (required)
(?=\s*(?:\Z|\n{4,}|\n\1?\ {0,2}\S)) # until EOF, 3 blank lines or something less indented
''' % admonitions,
re.IGNORECASE | re.MULTILINE | re.VERBOSE
)
def test(self, text):
return self.admonitions_re.search(text) is not None
def sub(self, match: re.Match) -> str:
lead_indent, admonition_name, title, body = match.groups()
admonition_type = '%s' % admonition_name
# figure out the class names to assign the block
if admonition_name.lower() == 'admonition':
admonition_class = 'admonition'
else:
admonition_class = 'admonition %s' % admonition_name.lower()
# titles are generally optional
if title:
title = '%s' % title
# process the admonition body like regular markdown
body = self.md._run_block_gamut("\n%s\n" % self.md._uniform_outdent(body)[1])
# indent the body before placing inside the aside block
admonition = self.md._uniform_indent(
'{}\n{}\n\n{}\n'.format(admonition_type, title, body),
self.md.tab, False
)
# wrap it in an aside
admonition = ''.format(admonition_class, admonition)
# now indent the whole admonition back to where it started
return self.md._uniform_indent(admonition, lead_indent, False)
def run(self, text):
return self.admonitions_re.sub(self.sub, text)
class Alerts(Extra):
'''
Markdown Alerts as per
https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts
'''
name = 'alerts'
order = (), (Stage.BLOCK_QUOTES, )
alert_re = re.compile(r'''
\s*
\[!(?PNOTE|TIP|IMPORTANT|WARNING|CAUTION)\]
(?P
[ \t]*\n?)?
(?P[\s\S]+?)
''', re.X
)
def test(self, text):
return "" in text
def sub(self, match: re.Match) -> str:
typ = match["type"].lower()
heading = f"{match['type'].title()}"
contents = match["contents"].strip()
if match["closing_tag"]:
return f'\n{heading}\n{contents}\n
'
else:
return f'\n{heading}\n
{contents}\n
'
def run(self, text):
return self.alert_re.sub(self.sub, text)
class _BreaksExtraOpts(TypedDict, total=False):
'''Options for the `Breaks` extra'''
on_backslash: bool
'''Replace backslashes at the end of a line with
'''
on_newline: bool
'''Replace single new line characters with
when True'''
class Breaks(Extra):
name = 'breaks'
order = (), (Stage.ITALIC_AND_BOLD,)
options: _BreaksExtraOpts
def run(self, text):
on_backslash = self.options.get('on_backslash', False)
on_newline = self.options.get('on_newline', False)
if on_backslash and on_newline:
pattern = r' *\\?'
elif on_backslash:
pattern = r'(?: *\\| {2,})'
elif on_newline:
pattern = r' *'
else:
pattern = r' {2,}'
break_tag = "
)", break_tag, text)
return text
class CodeFriendly(ItalicAndBoldProcessor):
'''
Disable _ and __ for em and strong.
'''
name = 'code-friendly'
def sub(self, match: re.Match) -> str:
syntax = match.group(1)
text: str = match.string[match.start(): match.end()]
if '_' in syntax:
# if using _this_ syntax, hash the whole thing so that it doesn't get processed
key = _hash_text(text)
self.hash_table[key] = text
return key
elif '_' in text:
# if the text within the bold/em markers contains '_' then hash those contents to protect them from em_re
text = text[len(syntax): -len(syntax)]
key = _hash_text(text)
self.hash_table[key] = text
return syntax + key + syntax
# if no underscores are present, the text is fine and we can just leave it alone
return super().sub(match)
class FencedCodeBlocks(Extra):
'''
Allows a code block to not have to be indented
by fencing it with '```' on a line before and after. Based on
with support for
syntax highlighting.
'''
name = 'fenced-code-blocks'
order = (Stage.LINK_DEFS, Stage.BLOCK_GAMUT), (Stage.PREPROCESS,)
fenced_code_block_re = re.compile(r'''
(?:\n+|\A\n?|(?<=\n))
(^[ \t]*`{3,})\s{0,99}?([\w+-]+)?\s{0,99}?\n # $1 = opening fence (captured for back-referencing), $2 = optional lang
(.*?) # $3 = code block content
\1[ \t]*\n # closing fence
''', re.M | re.X | re.S)
def test(self, text):
if '```' not in text:
return False
if self.md.stage == Stage.PREPROCESS and not self.md.safe_mode:
return True
if self.md.stage == Stage.LINK_DEFS and self.md.safe_mode:
return True
return self.md.stage == Stage.BLOCK_GAMUT
def _code_block_with_lexer_sub(
self,
codeblock: str,
leading_indent: str,
lexer
) -> str:
'''
Args:
codeblock: the codeblock to format
leading_indent: the indentation to prefix the block with
lexer (pygments.Lexer): the lexer to use
'''
formatter_opts = self.md.extras['fenced-code-blocks'] or {}
def unhash_code(codeblock):
for key, sanitized in list(self.md.html_spans.items()):
codeblock = codeblock.replace(key, sanitized)
replacements = [
("&", "&"),
("<", "<"),
(">", ">")
]
for old, new in replacements:
codeblock = codeblock.replace(old, new)
return codeblock
# remove leading indent from code block
_, codeblock = self.md._uniform_outdent(codeblock, max_outdent=leading_indent)
codeblock = unhash_code(codeblock)
colored = self.md._color_with_pygments(codeblock, lexer,
**formatter_opts)
# add back the indent to all lines
return "\n%s\n" % self.md._uniform_indent(colored, leading_indent, True)
def tags(self, lexer_name: str) -> tuple[str, str]:
'''
Returns the tags that the encoded code block will be wrapped in, based
upon the lexer name.
This function can be overridden by subclasses to piggy-back off of the
fenced code blocks syntax (see `Mermaid` extra).
Returns:
The opening and closing tags, as strings within a tuple
'''
pre_class = self.md._html_class_str_from_tag('pre')
if "highlightjs-lang" in self.md.extras and lexer_name:
code_class = ' class="{} language-{}"'.format(lexer_name, lexer_name)
else:
code_class = self.md._html_class_str_from_tag('code')
return (''.format(pre_class, code_class), '
')
def sub(self, match: re.Match) -> str:
lexer_name = match.group(2)
codeblock = match.group(3)
codeblock = codeblock[:-1] # drop one trailing newline
# Use pygments only if not using the highlightjs-lang extra
if lexer_name and "highlightjs-lang" not in self.md.extras:
lexer = self.md._get_pygments_lexer(lexer_name)
if lexer:
leading_indent = ' '*(len(match.group(1)) - len(match.group(1).lstrip()))
return self._code_block_with_lexer_sub(codeblock, leading_indent, lexer)
# Fenced code blocks need to be outdented before encoding, and then reapplied
leading_indent = ' ' * (len(match.group(1)) - len(match.group(1).lstrip()))
if codeblock:
# only run the codeblock through the outdenter if not empty
leading_indent, codeblock = self.md._uniform_outdent(codeblock, max_outdent=leading_indent)
codeblock = self.md._encode_code(codeblock)
tags = self.tags(lexer_name)
return "\n{}{}{}\n{}{}\n".format(leading_indent, tags[0], codeblock, leading_indent, tags[1])
def run(self, text):
return self.fenced_code_block_re.sub(self.sub, text)
class Latex(Extra):
'''
Convert $ and $$ to tags for inline and block math.
'''
name = 'latex'
order = (Stage.CODE_BLOCKS, FencedCodeBlocks), ()
_single_dollar_re = re.compile(r'(?(.*?)
", re.DOTALL) # Wraped in
_triple_re = re.compile(r'```(.*?)```', re.DOTALL) # Wrapped in a code block ```
_single_re = re.compile(r'(?"
self.code_blocks[placeholder] = match.group(0)
return placeholder
def run(self, text):
try:
import latex2mathml.converter
self.converter = latex2mathml.converter
except ImportError:
raise ImportError('The "latex" extra requires the "latex2mathml" package to be installed.')
# Escape by replacing with a code block
text = self._pre_code_block_re.sub(self.code_placeholder, text)
text = self._single_re.sub(self.code_placeholder, text)
text = self._triple_re.sub(self.code_placeholder, text)
text = self._single_dollar_re.sub(self._convert_single_match, text)
text = self._double_dollar_re.sub(self._convert_double_match, text)
# Convert placeholder tag back to original code
for placeholder, code_block in self.code_blocks.items():
text = text.replace(placeholder, code_block)
return text
class LinkPatterns(Extra):
'''
Auto-link given regex patterns in text (e.g. bug number
references, revision number references).
'''
name = 'link-patterns'
order = (Stage.LINKS,), ()
options: _link_patterns
_basic_link_re = re.compile(r'!?\[.*?\]\(.*?\)')
def run(self, text):
link_from_hash = {}
for regex, repl in self.options:
replacements = []
for match in regex.finditer(text):
if any(self.md._match_overlaps_substr(text, match, h) for h in link_from_hash):
continue
if callable(repl):
href = repl(match)
else:
href = match.expand(repl)
replacements.append((match.span(), href))
for (start, end), href in reversed(replacements):
# Do not match against links inside brackets.
if text[start - 1:start] == '[' and text[end:end + 1] == ']':
continue
# Do not match against links in the standard markdown syntax.
if text[start - 2:start] == '](' or text[end:end + 2] == '")':
continue
# Do not match against links which are escaped.
if text[start - 3:start] == '"""' and text[end:end + 3] == '"""':
text = text[:start - 3] + text[start:end] + text[end + 3:]
continue
# search the text for anything that looks like a link
is_inside_link = False
for link_re in (self.md._auto_link_re, self._basic_link_re):
for match in link_re.finditer(text):
if any((r[0] <= start and end <= r[1]) for r in match.regs):
# if the link pattern start and end pos is within the bounds of
# something that looks like a link, then don't process it
is_inside_link = True
break
else:
continue
break
if is_inside_link:
continue
escaped_href = (
href.replace('"', '"') # b/c of attr quote
# To avoid markdown and :
.replace('*', self.md._escape_table['*'])
.replace('_', self.md._escape_table['_']))
link = '{}'.format(escaped_href, text[start:end])
hash = _hash_text(link)
link_from_hash[hash] = link
text = text[:start] + hash + text[end:]
for hash, link in list(link_from_hash.items()):
text = text.replace(hash, link)
return text
def test(self, text):
return True
class MarkdownInHTML(Extra):
'''
Allow the use of `markdown="1"` in a block HTML tag to
have markdown processing be done on its contents. Similar to
but with
some limitations.
'''
name = 'markdown-in-html'
order = (), (Stage.HASH_HTML,)
def run(self, text):
def callback(block):
indent, block = self.md._uniform_outdent(block)
block = self.md._hash_html_block_sub(block)
block = self.md._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False)
return block
return self.md._strict_tag_block_sub(text, self.md._block_tags_a, callback, True)
def test(self, text):
return True
class _MarkdownFileLinksExtraOpts(_LinkProcessorExtraOpts, total=False):
'''Options for the `MarkdownFileLinks` extra'''
link_defs: bool
'''Whether to convert link definitions as well. Default: True'''
class MarkdownFileLinks(LinkProcessor):
'''
Replace links to `.md` files with `.html` links
'''
name = 'markdown-file-links'
order = (Stage.LINKS,), (Stage.LINK_DEFS,)
options: _MarkdownFileLinksExtraOpts
def __init__(self, md: Markdown, options: Optional[dict]):
# override LinkProcessor defaults
options = {'tags': ['a'], 'ref': False, **(options or {})}
super().__init__(md, options)
def parse_inline_anchor_or_image(self, text: str, _link_text: str, start_idx: int):
result = super().parse_inline_anchor_or_image(text, _link_text, start_idx)
if not result or not result[1] or not result[1].endswith('.md'):
# return None for invalid markup, or links that don't end with '.md'
# so that we don't touch them, and other extras can process them freely
return
url = result[1].removesuffix('.md') + '.html'
return result[0], url, *result[2:]
def run(self, text: str):
if Stage.LINKS > self.md.order > Stage.LINK_DEFS and self.options.get('link_defs', True):
# running just after link defs have been stripped
for key, url in self.md.urls.items():
if url.endswith('.md'):
self.md.urls[key] = url.removesuffix('.md') + '.html'
return super().run(text)
def test(self, text):
return super().test(text) and '.md' in text
class Mermaid(FencedCodeBlocks):
name = 'mermaid'
order = (FencedCodeBlocks,), ()
def tags(self, lexer_name):
if lexer_name == 'mermaid':
return ('', '
')
return super().tags(lexer_name)
class MiddleWordEm(ItalicAndBoldProcessor):
'''
Allows or disallows emphasis syntax in the middle of words,
defaulting to allow. Disabling this means that `this_text_here` will not be
converted to `thistexthere`.
'''
name = 'middle-word-em'
order = (CodeFriendly,), (Stage.ITALIC_AND_BOLD,)
def __init__(self, md: Markdown, options: Union[dict, bool, None]):
'''
Args:
md: the markdown instance
options: can be bool for backwards compatibility but will be converted to a dict
in the constructor. All options are:
- allowed (bool): whether to allow emphasis in the middle of a word.
If `options` is a bool it will be placed under this key.
'''
if isinstance(options, bool):
options = {'allowed': options}
else:
options = options or {}
options.setdefault('allowed', True)
super().__init__(md, options)
self.liberal_em_re = self.em_re
if not options['allowed']:
self.em_re = re.compile(r'(?<=\b)%s(?=\b)' % self.em_re.pattern, self.em_re.flags)
self.liberal_em_re = re.compile(
r'''
( # \1 - must be a single em char in the middle of a word
(? str:
syntax = match.group(1)
if len(syntax) != 1:
# strong syntax
return super().sub(match)
return '%s' % match.group(2)
class Numbering(Extra):
'''
Support of generic counters. Non standard extension to
allow sequential numbering of figures, tables, equations, exhibits etc.
'''
name = 'numbering'
order = (Stage.LINK_DEFS,), ()
def run(self, text):
# First pass to define all the references
regex_defns = re.compile(r'''
\[\#(\w+) # the counter. Open square plus hash plus a word \1
([^@]*) # Some optional characters, that aren't an @. \2
@(\w+) # the id. Should this be normed? \3
([^\]]*)\] # The rest of the text up to the terminating ] \4
''', re.VERBOSE)
regex_subs = re.compile(r"\[@(\w+)\s*\]") # [@ref_id]
counters = {}
references = {}
replacements = []
definition_html = '{}{}{}'
reference_html = '{}'
for match in regex_defns.finditer(text):
# We must have four match groups otherwise this isn't a numbering reference
if len(match.groups()) != 4:
continue
counter = match.group(1)
text_before = match.group(2).strip()
ref_id = match.group(3)
text_after = match.group(4)
number = counters.get(counter, 1)
references[ref_id] = (number, counter)
replacements.append((match.start(0),
definition_html.format(counter,
ref_id,
text_before,
number,
text_after),
match.end(0)))
counters[counter] = number + 1
for repl in reversed(replacements):
text = text[:repl[0]] + repl[1] + text[repl[2]:]
# Second pass to replace the references with the right
# value of the counter
# Fwiw, it's vaguely annoying to have to turn the iterator into
# a list and then reverse it but I can't think of a better thing to do.
for match in reversed(list(regex_subs.finditer(text))):
number, counter = references.get(match.group(1), (None, None))
if number is not None:
repl = reference_html.format(counter,
match.group(1),
number)
else:
repl = reference_html.format(match.group(1),
'countererror',
'?' + match.group(1) + '?')
if "smarty-pants" in self.md.extras:
repl = repl.replace('"', self.md._escape_table['"'])
text = text[:match.start()] + repl + text[match.end():]
return text
class PyShell(Extra):
'''
Treats unindented Python interactive shell sessions as
blocks.
'''
name = 'pyshell'
order = (), (Stage.LISTS,)
def test(self, text):
return ">>>" in text
def sub(self, match: re.Match) -> str:
if "fenced-code-blocks" in self.md.extras:
dedented = _dedent(match.group(0))
return self.md.extra_classes['fenced-code-blocks'].run("```pycon\n" + dedented + "```\n")
lines = match.group(0).splitlines(0)
_dedentlines(lines)
indent = ' ' * self.md.tab_width
s = ('\n' # separate from possible cuddled paragraph
+ indent + ('\n'+indent).join(lines)
+ '\n')
return s
def run(self, text):
less_than_tab = self.md.tab_width - 1
_pyshell_block_re = re.compile(r"""
^([ ]{0,%d})>>>[ ].*\n # first line
^(\1[^\S\n]*\S.*\n)* # any number of subsequent lines with at least one character
(?=^\1?\n|\Z) # ends with a blank line or end of document
""" % less_than_tab, re.M | re.X)
return _pyshell_block_re.sub(self.sub, text)
class SmartyPants(Extra):
'''
Replaces ' and " with curly quotation marks or curly
apostrophes. Replaces --, ---, ..., and . . . with en dashes, em dashes,
and ellipses.
'''
name = 'smarty-pants'
order = (), (Stage.SPAN_GAMUT,)
_opening_single_quote_re = re.compile(r"(? str:
text = self._apostrophe_year_re.sub(r"’\1", text)
for c in self._contractions:
text = text.replace("'%s" % c, "’%s" % c)
text = text.replace("'%s" % c.capitalize(),
"’%s" % c.capitalize())
return text
def run(self, text):
"""Fancifies 'single quotes', "double quotes", and apostrophes.
Converts --, ---, and ... into en dashes, em dashes, and ellipses.
Inspiration is:
See "test/tm-cases/smarty_pants.text" for a full discussion of the
support here and
for a
discussion of some diversion from the original SmartyPants.
"""
if "'" in text: # guard for perf
text = self.contractions(text)
text = self._opening_single_quote_re.sub("‘", text)
text = self._closing_single_quote_re.sub("’", text)
if '"' in text: # guard for perf
text = self._opening_double_quote_re.sub("“", text)
text = self._closing_double_quote_re.sub("”", text)
text = text.replace("---", "—")
text = text.replace("--", "–")
text = text.replace("...", "…")
text = text.replace(" . . . ", "…")
text = text.replace(". . .", "…")
# TODO: Temporary hack to fix https://github.com/trentm/python-markdown2/issues/150
if "footnotes" in self.md.extras and "footnote-ref" in text:
# Quotes in the footnote back ref get converted to "smart" quotes
# Change them back here to ensure they work.
text = text.replace('class="footnote-ref”', 'class="footnote-ref"')
return text
def test(self, text):
return any(i in text for i in (
"'",
'"',
'--',
'...',
'. . .'
))
class Strike(Extra):
'''
Text inside of double tilde is ~~strikethrough~~
'''
name = 'strike'
order = (Stage.ITALIC_AND_BOLD,), ()
_strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
def run(self, text):
return self._strike_re.sub(r"\1", text)
def test(self, text):
return '~~' in text
class Tables(Extra):
'''
Tables using the same format as GFM
and
PHP-Markdown Extra .
'''
name = 'tables'
order = (), (Stage.LISTS,)
def run(self, text):
"""Copying PHP-Markdown and GFM table syntax. Some regex borrowed from
https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538
"""
less_than_tab = self.md.tab_width - 1
table_re = re.compile(r'''
(?:(?<=\n)|\A\n?) # leading blank line
^[ ]{0,%d} # allowed whitespace
(.*[|].*)[ ]*\n # $1: header row (at least one pipe)
^[ ]{0,%d} # allowed whitespace
( # $2: underline row
# underline row with leading bar
(?: \|\ *:?-+:?\ * )+ \|? \s?[ ]*\n
|
# or, underline row without leading bar
(?: \ *:?-+:?\ *\| )+ (?: \ *:?-+:?\ * )? \s?[ ]*\n
)
( # $3: data rows
(?:
^[ ]{0,%d}(?!\ ) # ensure line begins with 0 to less_than_tab spaces
.*\|.*[ ]*\n
)*
)
''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
return table_re.sub(self.sub, text)
def sub(self, match: re.Match) -> str:
trim_space_re = r'^\s+|\s+$'
trim_bar_re = r'^\||\|$'
split_bar_re = r'^\||(?' % self.md._html_class_str_from_tag('table'), '' % self.md._html_class_str_from_tag('thead'), '']
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)))]
for col_idx, col in enumerate(cols):
hlines.append(' | {} | '.format(
align_from_col_idx.get(col_idx, ''),
self.md._run_span_gamut(col)
))
hlines.append('
')
hlines.append('')
# tbody
body = body.strip('\n')
if body:
hlines.append('')
for line in body.split('\n'):
hlines.append('')
cols = [re.sub(escape_bar_re, '|', cell.strip()) for cell in re.split(split_bar_re, re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)))]
for col_idx, col in enumerate(cols):
hlines.append(' | {} | '.format(
align_from_col_idx.get(col_idx, ''),
self.md._run_span_gamut(col)
))
hlines.append('
')
hlines.append('')
hlines.append('