118 lines
3.2 KiB
Diff
118 lines
3.2 KiB
Diff
--- rss2email-2.71-orig/rss2email.py 2019-09-27 02:24:47.182023614 +0200
|
|
+++ rss2email-2.71/rss2email.py 2019-09-27 03:13:01.529137689 +0200
|
|
@@ -350,6 +350,106 @@
|
|
if type(s) is types.UnicodeType: return s.encode('utf-8')
|
|
else: return s
|
|
|
|
+### Extracted code from deprecated html2text.unescape(s) ###
|
|
+
|
|
+import htmlentitydefs
|
|
+import re
|
|
+
|
|
+class HTML2TextUnescape:
|
|
+ def __init__(self):
|
|
+ self.RE_UNESCAPE = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
|
|
+ self.UNIFIABLE = {
|
|
+ 'rsquo': "'",
|
|
+ 'lsquo': "'",
|
|
+ 'rdquo': '"',
|
|
+ 'ldquo': '"',
|
|
+ 'copy': '(C)',
|
|
+ 'mdash': '--',
|
|
+ 'nbsp': ' ',
|
|
+ 'rarr': '->',
|
|
+ 'larr': '<-',
|
|
+ 'middot': '*',
|
|
+ 'ndash': '-',
|
|
+ 'oelig': 'oe',
|
|
+ 'aelig': 'ae',
|
|
+ 'agrave': 'a',
|
|
+ 'aacute': 'a',
|
|
+ 'acirc': 'a',
|
|
+ 'atilde': 'a',
|
|
+ 'auml': 'a',
|
|
+ 'aring': 'a',
|
|
+ 'egrave': 'e',
|
|
+ 'eacute': 'e',
|
|
+ 'ecirc': 'e',
|
|
+ 'euml': 'e',
|
|
+ 'igrave': 'i',
|
|
+ 'iacute': 'i',
|
|
+ 'icirc': 'i',
|
|
+ 'iuml': 'i',
|
|
+ 'ograve': 'o',
|
|
+ 'oacute': 'o',
|
|
+ 'ocirc': 'o',
|
|
+ 'otilde': 'o',
|
|
+ 'ouml': 'o',
|
|
+ 'ugrave': 'u',
|
|
+ 'uacute': 'u',
|
|
+ 'ucirc': 'u',
|
|
+ 'uuml': 'u',
|
|
+ 'lrm': '',
|
|
+ 'rlm': ''
|
|
+ }
|
|
+
|
|
+ self.unifiable_n = {}
|
|
+ for k in self.UNIFIABLE:
|
|
+ self.unifiable_n[self.name2cp(k)] = self.UNIFIABLE[k]
|
|
+
|
|
+ def name2cp(self, k):
|
|
+ """Return sname to codepoint"""
|
|
+ if k == 'apos':
|
|
+ return ord("'")
|
|
+ return htmlentitydefs.name2codepoint[k]
|
|
+
|
|
+ def charref(self, name):
|
|
+ if name[0] in ['x', 'X']:
|
|
+ c = int(name[1:], 16)
|
|
+ else:
|
|
+ c = int(name)
|
|
+
|
|
+ if c in unifiable_n:
|
|
+ return unifiable_n[c]
|
|
+ else:
|
|
+ try:
|
|
+ return chr(c)
|
|
+ except ValueError: # invalid unicode
|
|
+ return ''
|
|
+
|
|
+ def entityref(self, c):
|
|
+ if c in UNIFIABLE:
|
|
+ return UNIFIABLE[c]
|
|
+ else:
|
|
+ try:
|
|
+ name2cp(c)
|
|
+ except KeyError:
|
|
+ return "&" + c + ';'
|
|
+ else:
|
|
+ if c == 'nbsp':
|
|
+ return UNIFIABLE[c]
|
|
+ else:
|
|
+ return chr(name2cp(c))
|
|
+
|
|
+ def replaceEntities(self, s):
|
|
+ s = s.group(1)
|
|
+ if s[0] == "#":
|
|
+ return self.charref(s[1:])
|
|
+ else:
|
|
+ return self.entityref(s)
|
|
+
|
|
+ def unescape(self, s):
|
|
+ return self.RE_UNESCAPE.sub(self.replaceEntities, s)
|
|
+
|
|
+def h2t_unescape(s):
|
|
+ return HTML2TextUnescape().unescape(s)
|
|
+
|
|
### Parsing Utilities ###
|
|
|
|
def getContent(entry, HTMLOK=0):
|
|
@@ -678,7 +778,7 @@
|
|
|
|
from_addr = getEmail(r, entry)
|
|
|
|
- name = h2t.unescape(getName(r, entry))
|
|
+ name = h2t_unescape(getName(r, entry))
|
|
fromhdr = formataddr((name, from_addr,))
|
|
tohdr = (f.to or default_to)
|
|
subjecthdr = title
|