diff options
author | Ken Kellner <ken@kenkellner.com> | 2018-04-06 22:04:25 -0400 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2018-04-06 22:04:25 -0400 |
commit | d79f3a327ee3cf605ade2e110e478830815baa6f (patch) | |
tree | 9eff6f2d18931d6e1d62489d059a8927106fa2ad | |
parent | 7c884373caf733b0896cbd66f3e539a4970f084b (diff) |
Add example of check for fulltext
-rw-r--r-- | article_epub/publisher.py | 4 | ||||
-rw-r--r-- | article_epub/publishers/sciencedirect.py | 10 |
2 files changed, 13 insertions, 1 deletions
diff --git a/article_epub/publisher.py b/article_epub/publisher.py index 4161e78..7216d7d 100644 --- a/article_epub/publisher.py +++ b/article_epub/publisher.py @@ -21,6 +21,9 @@ class Publisher(object): def get_final_url(self): pass + def check_fulltext(self): + pass + def soupify(self): """Get HTML from article's page""" self.get_final_url() @@ -107,6 +110,7 @@ class Publisher(object): +self.journal+'. '+' doi: '+self.doi def extract_data(self): + self.check_fulltext() print('Extracting data from HTML...',end='',flush=True) self.get_doi() self.get_metadata() diff --git a/article_epub/publishers/sciencedirect.py b/article_epub/publishers/sciencedirect.py index 6ca4d8f..2078a45 100644 --- a/article_epub/publishers/sciencedirect.py +++ b/article_epub/publishers/sciencedirect.py @@ -1,10 +1,18 @@ from article_epub.publisher import Publisher, register_publisher +import sys class ScienceDirect(Publisher): """Class for Science Direct (Elsevier) articles""" domains = ["sciencedirect.com","elsevier.com"] - + + def check_fulltext(self): + if self.soup.find('div',class_='Body') == None: + print('Error: Can\'t access fulltext of article') + sys.exit() + else: + return(True) + def get_doi(self): if self.doi == None: doi_raw = self.soup.find('a',class_='doi').get('href').split('/') |