aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2018-04-05 17:17:22 -0400
committerKen Kellner <ken@kenkellner.com>2018-04-05 17:17:22 -0400
commit59de534956c5dcacdd641afd0c5399c6279445fe (patch)
treecbc14fdc43a4b61a75410a5c39624ec8fda4a848
parent26a784ccd482f4e7ac995e3147cae6185df65b7f (diff)
Add Oxford support. Still have issues with bold font and lit cited looking bad
-rw-r--r--article_epub/publisher.py10
-rw-r--r--article_epub/publishers/__init__.py1
-rw-r--r--article_epub/publishers/oxford.py45
-rw-r--r--article_epub/publishers/plosone.py2
4 files changed, 53 insertions, 5 deletions
diff --git a/article_epub/publisher.py b/article_epub/publisher.py
index 7af9a47..7465f7f 100644
--- a/article_epub/publisher.py
+++ b/article_epub/publisher.py
@@ -72,13 +72,17 @@ class Publisher(object):
self.journal = self.meta['container-title']
if 'published-print' in self.meta.keys():
- self.volume = str(self.meta['volume'])
- self.pages = str(self.meta['page'])
self.year = str(self.meta['published-print']['date-parts'][0][0])
else:
+ self.year = str(self.meta['published-online']['date-parts'][0][0])
+ try:
+ self.volume = str(self.meta['volume'])
+ except:
self.volume = ''
+ try:
+ self.pages = str(self.meta['page'])
+ except:
self.pages = ''
- self.year = str(self.meta['published-online']['date-parts'][0][0])
def get_citation(self):
diff --git a/article_epub/publishers/__init__.py b/article_epub/publishers/__init__.py
index 3e975c4..6aa2f43 100644
--- a/article_epub/publishers/__init__.py
+++ b/article_epub/publishers/__init__.py
@@ -3,3 +3,4 @@ from article_epub.publishers.springer import Springer
from article_epub.publishers.wiley import Wiley
from article_epub.publishers.bioone import BioOne
from article_epub.publishers.plosone import PLoSONE
+from article_epub.publishers.oxford import Oxford
diff --git a/article_epub/publishers/oxford.py b/article_epub/publishers/oxford.py
new file mode 100644
index 0000000..f8a95a1
--- /dev/null
+++ b/article_epub/publishers/oxford.py
@@ -0,0 +1,45 @@
+from article_epub.publisher import Publisher, register_publisher
+import copy
+
+class Oxford(Publisher):
+ """Class for Oxford articles"""
+
+ domains = ["oup.com"]
+
+ def get_doi(self):
+ if self.doi == None:
+ doi_raw = self.soup.find('div',class_='ww-citation-primary') \
+ .find('a')['href'].split('/')
+ self.doi = str(doi_raw[3]+'/'+doi_raw[4])
+
+ def get_abstract(self):
+ """Get article abstract"""
+ abstract_raw = self.soup.find('section',class_='abstract')
+ self.abstract = '<h2>Abstract<h2>\n'+str(abstract_raw)
+
+ def get_keywords(self):
+ """Get article keywords"""
+ keywords_raw = self.soup.find('div',class_='kwd-group').find_all('a')
+ self.keywords = []
+ for i in keywords_raw:
+ self.keywords.append(i.text)
+
+ def get_body(self):
+ """Get body of article"""
+ body_raw = copy.copy(self.soup.find(
+ 'div',{'data-widgetname':'ArticleFulltext'}))
+ body_raw.find('h2',class_='abstract-title').decompose()
+ body_raw.find('div',class_='article-metadata-panel').decompose()
+ body_raw.find('div',class_='ref-list').decompose()
+ body_raw.find('span',{'id':'UserHasAccess'}).decompose()
+ body_raw.find('div',class_='copyright').decompose()
+ body_raw.find('h2',class_='backreferences-title').decompose()
+ self.body = body_raw
+
+ def get_references(self):
+ """Get references list"""
+ references_title = self.soup.find('h2',class_='backreferences-title')
+ references_raw = self.soup.find('div',class_='ref-list')
+ self.references = str(references_title)+str(references_raw)
+
+register_publisher(Oxford)
diff --git a/article_epub/publishers/plosone.py b/article_epub/publishers/plosone.py
index 578826e..f096641 100644
--- a/article_epub/publishers/plosone.py
+++ b/article_epub/publishers/plosone.py
@@ -1,6 +1,4 @@
from article_epub.publisher import Publisher, register_publisher
-import requests
-from bs4 import BeautifulSoup
class PLoSONE(Publisher):
"""Class for PLoS ONE articles"""