aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2018-04-24 11:45:28 -0400
committerKen Kellner <ken@kenkellner.com>2018-04-24 11:45:28 -0400
commitace395fc372ead82216af906aee1c97dfea821ca (patch)
tree3afd265c65cb6a524a55919c04740ad94ed296fd
parent4eed12039fca7322305b550933b94856a408fed2 (diff)
Add U Chicago Press support
-rw-r--r--article_epub/publishers/__init__.py1
-rw-r--r--article_epub/publishers/uchicago.py86
2 files changed, 87 insertions, 0 deletions
diff --git a/article_epub/publishers/__init__.py b/article_epub/publishers/__init__.py
index 206b7bc..790140c 100644
--- a/article_epub/publishers/__init__.py
+++ b/article_epub/publishers/__init__.py
@@ -10,3 +10,4 @@ from article_epub.publishers.royalsociety import RoyalSociety
from article_epub.publishers.tandf import TandF
from article_epub.publishers.annualreviews import AnnualReviews
from article_epub.publishers.nature import Nature
+from article_epub.publishers.uchicago import UChicago
diff --git a/article_epub/publishers/uchicago.py b/article_epub/publishers/uchicago.py
new file mode 100644
index 0000000..9492e75
--- /dev/null
+++ b/article_epub/publishers/uchicago.py
@@ -0,0 +1,86 @@
+from article_epub.publisher import Publisher, register_publisher
+import sys
+import copy
+
+class UChicago(Publisher):
+ """Class for University of Chicago Press articles"""
+
+ name = "University of Chicago Press"
+ domains = ["uchicago.edu"]
+
+ def check_fulltext(self):
+ if self.soup.find('div',class_='hlFld-Fulltext') == None:
+ sys.exit('Error: Can\'t access fulltext of article')
+ else:
+ return(True)
+
+ def get_doi(self):
+ if self.doi == None:
+ self.doi = str(self.soup.find('meta',{'scheme':'doi'})['content'])
+
+ def get_abstract(self):
+ """Get article abstract"""
+ abstract_raw = self.soup.find('div',class_='abstractSection')
+ abstract_title = '<h2>Abstract</h2>\n'
+ self.abstract = abstract_title+str(abstract_raw)
+
+ def get_keywords(self):
+ """Get article keywords"""
+ self.keywords = []
+ try:
+ keywords_raw = self.soup \
+ .find('div',class_='hlFld-KeywordText').text
+ keywords_raw = keywords_raw.strip('Keywords: ').strip('.')
+ self.keywords = keywords_raw.split(',')
+ except:
+ pass
+
+ def get_body(self):
+ """Get body of article"""
+ body_raw = copy.copy(self.soup.find('div',class_='hlFld-Fulltext'))
+
+ for i in body_raw.find_all('div',class_='sectionHeading'):
+ i.name = 'h2'
+
+ for i in body_raw.find_all('div',class_='sectionJumpTo'):
+ i.decompose()
+
+ for i in body_raw.find_all('div',class_='head-b'):
+ i.name = 'h3'
+
+ for i in body_raw.find_all('a',class_='showFiguresEEvent'):
+ try:
+ i['href'] = '#'+i['data-id']
+ except:
+ pass
+
+ for i in body_raw.find_all('img',{'alt':'figure'}):
+ link = 'https://www.journals.uchicago.edu'+i['src']
+ link = link.replace('small','medium')
+ i['src'] = link
+
+ for i in body_raw.find_all('div',class_='htmlTable'):
+ i.decompose()
+
+ for i in body_raw.find_all('span',class_='NLM_inline-graphic'):
+ img = i.find('img')
+ link = 'https://www.journals.uchicago.edu'+img['src']
+ img['src'] = link
+
+ self.body = str(body_raw)
+
+ def get_references(self):
+ """Get references list"""
+ refs_raw = self.soup.find_all('div',class_='ref_layout')
+
+ for i in refs_raw:
+ for j in i.find_all('a'):
+ j.decompose()
+
+ refs = '<h2>Literature Cited</h2>\n'
+ for i in refs_raw:
+ refs += str(i)
+
+ self.references = refs
+
+register_publisher(UChicago)