aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2018-04-05 16:04:03 -0400
committerKen Kellner <ken@kenkellner.com>2018-04-05 16:04:03 -0400
commit26a784ccd482f4e7ac995e3147cae6185df65b7f (patch)
tree53fbdd500caa856fa947b41aed805d56674c153a
parent34913dd9ff573cd506e45dda5c13008f451cec11 (diff)
Add PLoS ONE support
-rw-r--r--article_epub/publishers/__init__.py1
-rw-r--r--article_epub/publishers/plosone.py56
2 files changed, 57 insertions, 0 deletions
diff --git a/article_epub/publishers/__init__.py b/article_epub/publishers/__init__.py
index f17fab8..3e975c4 100644
--- a/article_epub/publishers/__init__.py
+++ b/article_epub/publishers/__init__.py
@@ -2,3 +2,4 @@ from article_epub.publishers.sciencedirect import ScienceDirect
from article_epub.publishers.springer import Springer
from article_epub.publishers.wiley import Wiley
from article_epub.publishers.bioone import BioOne
+from article_epub.publishers.plosone import PLoSONE
diff --git a/article_epub/publishers/plosone.py b/article_epub/publishers/plosone.py
new file mode 100644
index 0000000..578826e
--- /dev/null
+++ b/article_epub/publishers/plosone.py
@@ -0,0 +1,56 @@
+from article_epub.publisher import Publisher, register_publisher
+import requests
+from bs4 import BeautifulSoup
+
+class PLoSONE(Publisher):
+ """Class for PLoS ONE articles"""
+
+ domains = ["plos.org"]
+
+ def get_doi(self):
+ if self.doi == None:
+ doi_raw = self.soup.find('li',{'id':'artDoi'}).find('a') \
+ .text.split('/')
+ self.doi = str(doi_raw[3]+'/'+doi_raw[4])
+
+ def get_abstract(self):
+ """Get article abstract"""
+ self.abstract = self.soup.find('div',class_='abstract')
+
+ def get_keywords(self):
+ """Get article keywords"""
+ keywords_raw = self.soup.find('ul',{'id':'subjectList'}).find_all('li')
+ self.keywords = []
+ for i in keywords_raw:
+ self.keywords.append(i.find('a').text)
+
+ def get_body(self):
+ """Get body of article"""
+ body_raw = self.soup.find('div',class_='article-text')
+ img = body_raw.find_all('div',class_='img-box')
+ for i in img:
+ link = i.find('a')
+ new_img = 'http://journals.plos.org/plosone/'+str(link['href'])
+ link.find('img')['src'] = new_img
+ #link['href'] = ''
+
+ for div in body_raw.find_all('div',class_='figure-inline-download'):
+ div.decompose()
+
+ for p in body_raw.find_all('p',class_='caption_object'):
+ p.decompose()
+
+ body_raw.find('div',class_='figshare_widget').decompose()
+
+ body_parts = body_raw.find_all('div',class_='section toc-section',
+ recursive=False)
+ self.body = ''
+ for i in body_parts:
+ self.body += str(i)
+
+ def get_references(self):
+ """Get references list"""
+ references_raw = self.soup.find('ol',class_='references')
+ self.references = '<h2>References</h2>\n'+str(references_raw)
+
+register_publisher(PLoSONE)