diff options
author | Ken Kellner <ken@kenkellner.com> | 2018-04-06 22:37:45 -0400 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2018-04-06 22:37:45 -0400 |
commit | 50e825524c8a93be5729b174e8999c690af9a45d (patch) | |
tree | 44385d9f315a30aff0d1bc8da77b6c8d3af2de01 | |
parent | d79f3a327ee3cf605ade2e110e478830815baa6f (diff) |
Add fulltext checks for rest of publishers
-rw-r--r-- | article_epub/publishers/bioone.py | 7 | ||||
-rw-r--r-- | article_epub/publishers/nrc.py | 7 | ||||
-rw-r--r-- | article_epub/publishers/oxford.py | 7 | ||||
-rw-r--r-- | article_epub/publishers/springer.py | 7 | ||||
-rw-r--r-- | article_epub/publishers/wiley.py | 7 |
5 files changed, 35 insertions, 0 deletions
diff --git a/article_epub/publishers/bioone.py b/article_epub/publishers/bioone.py index 73c4379..343aae8 100644 --- a/article_epub/publishers/bioone.py +++ b/article_epub/publishers/bioone.py @@ -6,6 +6,13 @@ class BioOne(Publisher): """Class for BioOne articles""" domains = ["bioone.org"] + + def check_fulltext(self): + if self.soup.find('div',class_='hlFld-Fulltext') == None: + print('Error: Can\'t access fulltext of article') + sys.exit() + else: + return(True) def get_final_url(self): if '/abs/' in self.url: diff --git a/article_epub/publishers/nrc.py b/article_epub/publishers/nrc.py index cad59e9..6195a0e 100644 --- a/article_epub/publishers/nrc.py +++ b/article_epub/publishers/nrc.py @@ -6,6 +6,13 @@ class NRC(Publisher): domains = ["nrcresearchpress.com"] + def check_fulltext(self): + if self.soup.find('div',class_='NLM_sec_level_1') == None: + print('Error: Can\'t access fulltext of article') + sys.exit() + else: + return(True) + def get_doi(self): if self.doi == None: doi_raw = self.soup.find('p',class_='citationLine').find('a') \ diff --git a/article_epub/publishers/oxford.py b/article_epub/publishers/oxford.py index 1bca8ae..8ffea6d 100644 --- a/article_epub/publishers/oxford.py +++ b/article_epub/publishers/oxford.py @@ -6,6 +6,13 @@ class Oxford(Publisher): domains = ["oup.com"] + def check_fulltext(self): + if self.soup.find('div',{'data-widgetname':'ArticleFulltext'}) == None: + print('Error: Can\'t access fulltext of article') + sys.exit() + else: + return(True) + def get_doi(self): if self.doi == None: doi_raw = self.soup.find('div',class_='ww-citation-primary') \ diff --git a/article_epub/publishers/springer.py b/article_epub/publishers/springer.py index d53eafc..8b9458e 100644 --- a/article_epub/publishers/springer.py +++ b/article_epub/publishers/springer.py @@ -5,6 +5,13 @@ class Springer(Publisher): domains = ["springer.com"] + def check_fulltext(self): + if self.soup.find('div',{'id':'body'}) == None: + print('Error: Can\'t access fulltext of article') + sys.exit() + else: + return(True) + def get_doi(self): if self.doi == None: doi_raw = self.soup.find('span',{"id":"doi-url"}).text.split('/') diff --git a/article_epub/publishers/wiley.py b/article_epub/publishers/wiley.py index 0133c1d..716466a 100644 --- a/article_epub/publishers/wiley.py +++ b/article_epub/publishers/wiley.py @@ -9,6 +9,13 @@ class Wiley(Publisher): if '/abs/' in self.url: self.url = self.url.replace('/abs/','/full/') + def check_fulltext(self): + if self.soup.find('div',class_='article-section__content') == None: + print('Error: Can\'t access fulltext of article') + sys.exit() + else: + return(True) + def get_doi(self): if self.doi == None: doi_raw = self.soup.find('a',class_='epub-doi').text.split('/') |