aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2018-04-17 16:33:49 -0400
committerKen Kellner <ken@kenkellner.com>2018-04-17 16:33:49 -0400
commit4353161437bbf7a77ac2cca3d0e167b90da3ab77 (patch)
tree90eea3e5a974a710c68f0b44deb454e2a0e6a405
parenta8dff81b75c46b49c72d6015d4cce680012339b8 (diff)
Fix some bugs
-rw-r--r--article_epub/publishers/plosone.py9
-rw-r--r--article_epub/publishers/royalsociety.py17
2 files changed, 17 insertions, 9 deletions
diff --git a/article_epub/publishers/plosone.py b/article_epub/publishers/plosone.py
index f096641..8638fad 100644
--- a/article_epub/publishers/plosone.py
+++ b/article_epub/publishers/plosone.py
@@ -37,8 +37,13 @@ class PLoSONE(Publisher):
for p in body_raw.find_all('p',class_='caption_object'):
p.decompose()
-
- body_raw.find('div',class_='figshare_widget').decompose()
+ try:
+ body_raw.find('div',class_='figshare_widget').decompose()
+ except:
+ pass
+ for i in body_raw.find_all('img',class_='inline-graphic'):
+ link = 'http://journals.plos.org/plosone/'+i['src']
+ i['src'] = link
body_parts = body_raw.find_all('div',class_='section toc-section',
recursive=False)
diff --git a/article_epub/publishers/royalsociety.py b/article_epub/publishers/royalsociety.py
index 2849956..0c3b2d4 100644
--- a/article_epub/publishers/royalsociety.py
+++ b/article_epub/publishers/royalsociety.py
@@ -56,13 +56,16 @@ class RoyalSociety(Publisher):
tables = self.soup.find_all('div',class_='table')
for i in tables:
- src = 'http://rstb.royalsocietypublishing.org'+ \
- i.find('a')['data-table-url']
- dat = requests.get(src, headers={'User-Agent':'Mozilla/5.0'})
- tabsoup = BeautifulSoup(dat.content,'html.parser') \
- .find('table')
- i.append(tabsoup)
- i.find('div',class_='table-callout-links').decompose()
+ try:
+ src = 'http://rstb.royalsocietypublishing.org'+ \
+ i.find('a')['data-table-url']
+ dat = requests.get(src, headers={'User-Agent':'Mozilla/5.0'})
+ tabsoup = BeautifulSoup(dat.content,'html.parser') \
+ .find('table')
+ i.append(tabsoup)
+ i.find('div',class_='table-callout-links').decompose()
+ except:
+ pass
self.body = ''
for i in body_raw: