Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
David Kaufmann
6e4e56282c fix build error, extract unescape method from html2text 2019-09-27 03:45:40 +02:00
3 changed files with 153 additions and 1 deletions

View file

@ -0,0 +1,26 @@
--- rss2email-2.71/feedparser.py 2019-09-27 02:24:47.181023634 +0200
+++ rss2email-2.71/feedparser.py 2019-09-27 02:28:29.474661430 +0200
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
"""Universal feed parser
Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
diff -ur rss2email-2.71-orig/html2text.py rss2email-2.71/html2text.py
--- rss2email-2.71-orig/html2text.py 2019-09-27 02:24:47.182023614 +0200
+++ rss2email-2.71/html2text.py 2019-09-27 02:28:33.928574026 +0200
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
"""html2text: Turn HTML into equivalent Markdown-structured text."""
__version__ = "3.01"
__author__ = "Aaron Swartz (me@aaronsw.com)"
diff -ur rss2email-2.71-orig/rss2email.py rss2email-2.71/rss2email.py
--- rss2email-2.71-orig/rss2email.py 2019-09-27 02:24:47.182023614 +0200
+++ rss2email-2.71/rss2email.py 2019-09-27 02:25:05.269668674 +0200
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python2
"""rss2email: get RSS feeds emailed to you
http://rss2email.infogami.com

View file

@ -0,0 +1,118 @@
--- rss2email-2.71-orig/rss2email.py 2019-09-27 02:24:47.182023614 +0200
+++ rss2email-2.71/rss2email.py 2019-09-27 03:13:01.529137689 +0200
@@ -350,6 +350,106 @@
if type(s) is types.UnicodeType: return s.encode('utf-8')
else: return s
+### Extracted code from deprecated html2text.unescape(s) ###
+
+import htmlentitydefs
+import re
+
+class HTML2TextUnescape:
+ def __init__(self):
+ self.RE_UNESCAPE = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
+ self.UNIFIABLE = {
+ 'rsquo': "'",
+ 'lsquo': "'",
+ 'rdquo': '"',
+ 'ldquo': '"',
+ 'copy': '(C)',
+ 'mdash': '--',
+ 'nbsp': ' ',
+ 'rarr': '->',
+ 'larr': '<-',
+ 'middot': '*',
+ 'ndash': '-',
+ 'oelig': 'oe',
+ 'aelig': 'ae',
+ 'agrave': 'a',
+ 'aacute': 'a',
+ 'acirc': 'a',
+ 'atilde': 'a',
+ 'auml': 'a',
+ 'aring': 'a',
+ 'egrave': 'e',
+ 'eacute': 'e',
+ 'ecirc': 'e',
+ 'euml': 'e',
+ 'igrave': 'i',
+ 'iacute': 'i',
+ 'icirc': 'i',
+ 'iuml': 'i',
+ 'ograve': 'o',
+ 'oacute': 'o',
+ 'ocirc': 'o',
+ 'otilde': 'o',
+ 'ouml': 'o',
+ 'ugrave': 'u',
+ 'uacute': 'u',
+ 'ucirc': 'u',
+ 'uuml': 'u',
+ 'lrm': '',
+ 'rlm': ''
+ }
+
+ self.unifiable_n = {}
+ for k in self.UNIFIABLE:
+ self.unifiable_n[self.name2cp(k)] = self.UNIFIABLE[k]
+
+ def name2cp(self, k):
+ """Return sname to codepoint"""
+ if k == 'apos':
+ return ord("'")
+ return htmlentitydefs.name2codepoint[k]
+
+ def charref(self, name):
+ if name[0] in ['x', 'X']:
+ c = int(name[1:], 16)
+ else:
+ c = int(name)
+
+ if c in unifiable_n:
+ return unifiable_n[c]
+ else:
+ try:
+ return chr(c)
+ except ValueError: # invalid unicode
+ return ''
+
+ def entityref(self, c):
+ if c in UNIFIABLE:
+ return UNIFIABLE[c]
+ else:
+ try:
+ name2cp(c)
+ except KeyError:
+ return "&" + c + ';'
+ else:
+ if c == 'nbsp':
+ return UNIFIABLE[c]
+ else:
+ return chr(name2cp(c))
+
+ def replaceEntities(self, s):
+ s = s.group(1)
+ if s[0] == "#":
+ return self.charref(s[1:])
+ else:
+ return self.entityref(s)
+
+ def unescape(self, s):
+ return self.RE_UNESCAPE.sub(self.replaceEntities, s)
+
+def h2t_unescape(s):
+ return HTML2TextUnescape().unescape(s)
+
### Parsing Utilities ###
def getContent(entry, HTMLOK=0):
@@ -678,7 +778,7 @@
from_addr = getEmail(r, entry)
- name = h2t.unescape(getName(r, entry))
+ name = h2t_unescape(getName(r, entry))
fromhdr = formataddr((name, from_addr,))
tohdr = (f.to or default_to)
subjecthdr = title

View file

@ -1,6 +1,6 @@
Name: rss2email
Version: 2.71
Release: 14%{?dist}
Release: 16%{?dist}
Summary: Deliver news from RSS feeds to your SMTP server as text or HTML mail
Group: Applications/Internet
@ -24,6 +24,8 @@ Patch0: rss2email-2.70-config-location.patch
Patch1003: 0003-Setup-the-correct-version-number-in-rss2email.py.patch
Patch1006: 0006-Prefer-utf8-in-CHARSET_LIST.patch
Patch1008: 0008-Fix-encoding-of-From-and-To-headers.patch.diff
Patch1009: 0009-Show-python2-explicitely-in-shebang.patch
Patch1010: 0010-Extract-deprecated-html2text-unescape.patch
BuildArch: noarch
@ -65,6 +67,8 @@ cat %{SOURCE3} | sed -e 's!@datadir@!%{_datadir}!' > r2e
%patch1003 -p1 -b .correct-version-number
%patch1006 -p1 -b .prefer-utf8-in-charset-list
%patch1008 -p1 -b .fix-encoding-of-from-and-to-headers
%patch1009 -p1 -b .show-python2-explicitely-in-shebang
%patch1010 -p1 -b .extract-deprecated-html2text-unescape
sed -i -e 's/\r//' CHANGELOG rss2email.py config.py.example
@ -93,6 +97,10 @@ install -p -m 0644 config.py.example $RPM_BUILD_ROOT%{_datadir}/%{name}/
%changelog
* Fri Sep 27 2019 David Kaufmann <astra@ionic.at> - 2.71-16
- Fix build error due to missing explicite python shebang
- Extract deprecated and removed method unescape from html2text
* Sat Jul 14 2018 Fedora Release Engineering <releng@fedoraproject.org> - 2.71-14
- Rebuilt for https://fedoraproject.org/wiki/Fedora_29_Mass_Rebuild