aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2018-04-06 22:04:25 -0400
committerKen Kellner <ken@kenkellner.com>2018-04-06 22:04:25 -0400
commitd79f3a327ee3cf605ade2e110e478830815baa6f (patch)
tree9eff6f2d18931d6e1d62489d059a8927106fa2ad
parent7c884373caf733b0896cbd66f3e539a4970f084b (diff)
Add example of check for fulltext
-rw-r--r--article_epub/publisher.py4
-rw-r--r--article_epub/publishers/sciencedirect.py10
2 files changed, 13 insertions, 1 deletions
diff --git a/article_epub/publisher.py b/article_epub/publisher.py
index 4161e78..7216d7d 100644
--- a/article_epub/publisher.py
+++ b/article_epub/publisher.py
@@ -21,6 +21,9 @@ class Publisher(object):
def get_final_url(self):
pass
+ def check_fulltext(self):
+ pass
+
def soupify(self):
"""Get HTML from article's page"""
self.get_final_url()
@@ -107,6 +110,7 @@ class Publisher(object):
+self.journal+'. '+' doi: '+self.doi
def extract_data(self):
+ self.check_fulltext()
print('Extracting data from HTML...',end='',flush=True)
self.get_doi()
self.get_metadata()
diff --git a/article_epub/publishers/sciencedirect.py b/article_epub/publishers/sciencedirect.py
index 6ca4d8f..2078a45 100644
--- a/article_epub/publishers/sciencedirect.py
+++ b/article_epub/publishers/sciencedirect.py
@@ -1,10 +1,18 @@
from article_epub.publisher import Publisher, register_publisher
+import sys
class ScienceDirect(Publisher):
"""Class for Science Direct (Elsevier) articles"""
domains = ["sciencedirect.com","elsevier.com"]
-
+
+ def check_fulltext(self):
+ if self.soup.find('div',class_='Body') == None:
+ print('Error: Can\'t access fulltext of article')
+ sys.exit()
+ else:
+ return(True)
+
def get_doi(self):
if self.doi == None:
doi_raw = self.soup.find('a',class_='doi').get('href').split('/')