From 938d81b064192d2dedad59ab12593ffb7514feac Mon Sep 17 00:00:00 2001
From: Roman
Date: Fri, 7 May 2021 10:51:48 -0400
Subject: [PATCH] lists, headings, todos + tests working
---
.eggs/README.txt | 6 ++++++
htmlslacker/htmlslacker.py | 44 +++++++++++++++++++++++++++++++++-----
test_general.py | 36 ++++++++++++++++++++++++++++++-
3 files changed, 80 insertions(+), 6 deletions(-)
create mode 100644 .eggs/README.txt
diff --git a/.eggs/README.txt b/.eggs/README.txt
new file mode 100644
index 0000000..5d01668
--- /dev/null
+++ b/.eggs/README.txt
@@ -0,0 +1,6 @@
+This directory contains eggs that were downloaded by setuptools to build, test, and run plug-ins.
+
+This directory caches those eggs to prevent repeated downloads.
+
+However, it is safe to delete this directory.
+
diff --git a/htmlslacker/htmlslacker.py b/htmlslacker/htmlslacker.py
index a81c5de..c104c4e 100644
--- a/htmlslacker/htmlslacker.py
+++ b/htmlslacker/htmlslacker.py
@@ -4,6 +4,7 @@
except ImportError:
from HTMLParser import HTMLParser
from htmlentitydefs import name2codepoint
+import re
LINEBR = "::LINEBR::"
@@ -23,6 +24,9 @@ def __init__(self, html, *args, **kwargs):
except TypeError:
HTMLParser.__init__(self, *args, **kwargs)
self.skip = False
+ self.isProcessingList = False
+ self.isProcessingOrderedList = False
+ self.orderedNumber = 0
# slackified string
self.output = ''
@@ -43,9 +47,11 @@ def handle_starttag(self, tag, attrs):
if tag == 'br' or tag == 'p':
self.output += LINEBR
if tag == 'b' or tag == 'strong':
- self.output += '*'
+ self.output += ' *'
+ if re.match("h[1-6]{1}", tag):
+ self.output += ' *'
if tag == 'i' or tag == 'em':
- self.output += '_'
+ self.output += ' _'
if tag == 'code':
self.output += '`'
if tag == 'a':
@@ -55,6 +61,16 @@ def handle_starttag(self, tag, attrs):
self.output += attr[1] + '|'
if tag == 'style' or tag == 'script':
self.skip = True
+ if tag == 'ul':
+ self.isProcessingList = True
+ if tag == 'li' and self.isProcessingList:
+ self.output += '• '
+ if tag == 'ol':
+ self.orderedNumber = 1
+ self.isProcessingOrderedList = True
+ if tag == 'li' and self.isProcessingOrderedList:
+ self.output += '{}. '.format(self.orderedNumber)
+ self.orderedNumber = self.orderedNumber + 1
def handle_endtag(self, tag):
"""
@@ -63,15 +79,25 @@ def handle_endtag(self, tag):
:return:
"""
if tag == 'b' or tag == 'strong':
- self.output += '*'
+ self.output += '* '
+ if re.match("h[1-6]{1}", tag):
+ self.output += '* '+LINEBR
if tag == 'i' or tag == 'em':
- self.output += '_'
+ self.output += '_ '
if tag == 'a':
self.output += '>'
if tag == 'code':
self.output += '`'
if tag == 'style' or tag == 'script':
self.skip = False
+ if tag == 'ul':
+ self.isProcessingList = False
+ if tag == 'li' and self.isProcessingList:
+ self.output += LINEBR
+ if tag == 'ol':
+ self.isProcessingOrderedList = False
+ if tag == 'li' and self.isProcessingOrderedList:
+ self.output += LINEBR
def handle_data(self, data):
"""
@@ -105,4 +131,12 @@ def get_output(self):
link: https://stackoverflow.com/questions/2077897/substitute-multiple-whitespace-with-single-whitespace-in-python
:return:
"""
- return ' '.join(self.output.split()).replace(LINEBR, "\n")
+ output = self.output
+ output = re.sub(r'\*(\s\*)+', '*', output)
+ output = re.sub(r'_( _)+', '_', output)
+ output = output.replace('[] ', '☐ ').replace('[x] ', '☑︎ ')
+ output = ' '.join(output.split())
+ output = output.replace(LINEBR, "\n")
+ output = re.sub(r' *\n *', '\n', output)
+ output = output.strip()
+ return output
diff --git a/test_general.py b/test_general.py
index 3bd2a2f..8f80fe1 100644
--- a/test_general.py
+++ b/test_general.py
@@ -11,7 +11,7 @@ def test_example_1():
link in a paragraph!
"""
- expected = "*Hello*\n There is _something_ interesting about `this doc` \n And "
+ expected = "*Hello*\nThere is _something_ interesting about `this doc`\nAnd "
output = HTMLSlacker(html).get_output()
assert(output == expected)
@@ -35,3 +35,37 @@ def test_link_with_target():
expected = "Please click "
output = HTMLSlacker(html).get_output()
assert(output == expected)
+
+def test_unordered_list():
+ html = 'Here is my cool list - The Shining
- Memento
- Blade Runner
'
+ expected = 'Here is my cool list • The Shining\n• Memento\n• Blade Runner'
+ output = HTMLSlacker(html).get_output()
+ assert(output == expected)
+
+def test_ordered_list():
+ html = 'Here is my cool list - The Shining
- Memento
- Blade Runner
'
+ expected = 'Here is my cool list 1. The Shining\n2. Memento\n3. Blade Runner'
+ output = HTMLSlacker(html).get_output()
+ assert(output == expected)
+
+def test_unordered_list_with_text_modifications():
+ html = 'Here is my cool list - The Shining
- Memento
- Blade Runner
'
+ expected = 'Here is my cool list • The Shining\n• Memento\n• Blade *Runner*'
+
+def test_headers_rendered():
+ html = '''Hello
new world
'''
+ expected = "*Hello*\nnew *world*"
+ output = HTMLSlacker(html).get_output()
+ assert(output == expected)
+
+def test_headers_rendered_no_spaces():
+ html = '''Hello
newworld
'''
+ expected = "*Hello*\nnew *world*"
+ output = HTMLSlacker(html).get_output()
+ assert(output == expected)
+
+def test_task_list_rendered():
+ html = '''[] Grocery
[x] Laundary'''
+ expected = "☐ Grocery\n☑︎ Laundary"
+ output = HTMLSlacker(html).get_output()
+ assert(output == expected)