aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2018-04-20 14:52:58 -0400
committerKen Kellner <ken@kenkellner.com>2018-04-20 14:52:58 -0400
commit8f61b92000c9a03a8d0740f268dc1befca66ad88 (patch)
tree127374affe8140c45fd51c3ea09b06953ce9912e
parentf2eb10a42b8772c082875296f2d426ecd85e7770 (diff)
Add Nature Publishing support; links to tables only
-rw-r--r--article_epub/publishers/__init__.py1
-rw-r--r--article_epub/publishers/nature.py97
-rw-r--r--article_epub/utilities.py7
3 files changed, 104 insertions, 1 deletions
diff --git a/article_epub/publishers/__init__.py b/article_epub/publishers/__init__.py
index d153124..206b7bc 100644
--- a/article_epub/publishers/__init__.py
+++ b/article_epub/publishers/__init__.py
@@ -9,3 +9,4 @@ from article_epub.publishers.nrc import NRC
from article_epub.publishers.royalsociety import RoyalSociety
from article_epub.publishers.tandf import TandF
from article_epub.publishers.annualreviews import AnnualReviews
+from article_epub.publishers.nature import Nature
diff --git a/article_epub/publishers/nature.py b/article_epub/publishers/nature.py
new file mode 100644
index 0000000..77e1f9c
--- /dev/null
+++ b/article_epub/publishers/nature.py
@@ -0,0 +1,97 @@
+from article_epub.publisher import Publisher, register_publisher
+import sys
+import copy
+
+class Nature(Publisher):
+ """Class for Nature Publishing articles"""
+
+ name = "Nature Publishing"
+ domains = ["nature.com"]
+
+ def check_fulltext(self):
+ test = self.soup.find('a',{'data-track-action':'subscribe'})
+ if test != None:
+ sys.exit('Error: Can\'t access fulltext of article')
+ else:
+ return(True)
+
+ def get_doi(self):
+ if self.doi == None:
+ self.doi = str(self.soup.find('meta',{'name':'DOI'})['content'])
+
+ def get_abstract(self):
+ """Get article abstract"""
+ abstract_raw = self.soup.find('div',{'id':'abstract-section'})
+ try:
+ abstract_raw.find('span').decompose()
+ except:
+ pass
+
+ self.abstract = str(abstract_raw)
+
+ def get_keywords(self):
+ """Get article keywords"""
+ self.keywords = []
+ try:
+ keywords_raw = self.soup.find_all('a',class_='subject-tag-link')
+ for i in keywords_raw:
+ self.keywords.append(i.text)
+ except:
+ pass
+
+ def get_body(self):
+ """Get body of article"""
+ body_raw = copy.copy(self.soup.find('div',class_='article-body'))
+
+ try:
+ body_raw.find('section',{'aria-labelledby':'abstract'}).decompose()
+ except:
+ pass
+
+ try:
+ body_raw.find('section',{'aria-labelledby':'references'}).decompose()
+ except:
+ pass
+
+ try:
+ body_raw.find('section', \
+ {'aria-labelledby':'author-information'}).decompose()
+ body_raw.find('section',{'aria-labelledby':'rightslink'}) \
+ .decompose()
+ body_raw.find('section',{'aria-labelledby':'article-comments'}) \
+ .decompose()
+ except:
+ pass
+
+ for i in body_raw.find_all('span',class_='js-section-title-label'):
+ i.decompose()
+
+ for i in body_raw.find_all('a',{'data-track-action':'view table'}):
+ link = 'https://www.nature.com'+i['href']
+ i['href'] = link
+
+ for i in body_raw.find_all('a',{'data-track-action':'reference anchor'}):
+ part = i['href'].split('#')[1]
+ i['href'] = '#'+part
+
+ for i in body_raw.find_all('a',{'data-track-action':'view figure'}):
+ link = 'https://www.nature.com'+i['href']
+ i['href'] = link
+
+ self.body = str(body_raw)
+
+ def get_references(self):
+ """Get references list"""
+ ref_all = self.soup.find('div',{'id':'references-section'})
+ ref_all.find('span',class_='js-section-title-label').decompose()
+ refs = ref_all.find('ol').find_all('li',recursive=False)
+ for i in refs:
+ try:
+ i.find('span').decompose()
+ i.find('ul',class_='js-ref-links').decompose()
+ except:
+ pass
+
+ self.references = str(ref_all)
+
+register_publisher(Nature)
diff --git a/article_epub/utilities.py b/article_epub/utilities.py
index 1fa1f61..60a2d97 100644
--- a/article_epub/utilities.py
+++ b/article_epub/utilities.py
@@ -14,11 +14,16 @@ def url_from_title(title):
.find('div',class_='gs_ri').find('a')
possible_title = result.text
possible_link = result['href']
+
+ if possible_title == '':
+ print('No matching link available.')
+ sys.exit('Getting URL from title failed')
+
print('Provided title:')
print(title)
print('Found following article:')
print(possible_title)
- choice = input("Is this correct (y/n)? ")
+ choice = input("\033[0;37m"+"Is this correct (y/n)? "+"\033[00m")
if choice == 'y':
return(possible_link)
else: