diff options
author | Ken Kellner <ken@kenkellner.com> | 2018-04-17 16:33:49 -0400 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2018-04-17 16:33:49 -0400 |
commit | 4353161437bbf7a77ac2cca3d0e167b90da3ab77 (patch) | |
tree | 90eea3e5a974a710c68f0b44deb454e2a0e6a405 | |
parent | a8dff81b75c46b49c72d6015d4cce680012339b8 (diff) |
Fix some bugs
-rw-r--r-- | article_epub/publishers/plosone.py | 9 | ||||
-rw-r--r-- | article_epub/publishers/royalsociety.py | 17 |
2 files changed, 17 insertions, 9 deletions
diff --git a/article_epub/publishers/plosone.py b/article_epub/publishers/plosone.py index f096641..8638fad 100644 --- a/article_epub/publishers/plosone.py +++ b/article_epub/publishers/plosone.py @@ -37,8 +37,13 @@ class PLoSONE(Publisher): for p in body_raw.find_all('p',class_='caption_object'): p.decompose() - - body_raw.find('div',class_='figshare_widget').decompose() + try: + body_raw.find('div',class_='figshare_widget').decompose() + except: + pass + for i in body_raw.find_all('img',class_='inline-graphic'): + link = 'http://journals.plos.org/plosone/'+i['src'] + i['src'] = link body_parts = body_raw.find_all('div',class_='section toc-section', recursive=False) diff --git a/article_epub/publishers/royalsociety.py b/article_epub/publishers/royalsociety.py index 2849956..0c3b2d4 100644 --- a/article_epub/publishers/royalsociety.py +++ b/article_epub/publishers/royalsociety.py @@ -56,13 +56,16 @@ class RoyalSociety(Publisher): tables = self.soup.find_all('div',class_='table') for i in tables: - src = 'http://rstb.royalsocietypublishing.org'+ \ - i.find('a')['data-table-url'] - dat = requests.get(src, headers={'User-Agent':'Mozilla/5.0'}) - tabsoup = BeautifulSoup(dat.content,'html.parser') \ - .find('table') - i.append(tabsoup) - i.find('div',class_='table-callout-links').decompose() + try: + src = 'http://rstb.royalsocietypublishing.org'+ \ + i.find('a')['data-table-url'] + dat = requests.get(src, headers={'User-Agent':'Mozilla/5.0'}) + tabsoup = BeautifulSoup(dat.content,'html.parser') \ + .find('table') + i.append(tabsoup) + i.find('div',class_='table-callout-links').decompose() + except: + pass self.body = '' for i in body_raw: |