article_epub/publishers/wiley.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

from article_epub.publisher import Publisher, register_publisher
import sys

class Wiley(Publisher):
    """Class for Springer articles"""

    name = "Wiley"
    domains = ["wiley.com"]

    def get_final_url(self):
        if '/abs/' in self.url:
            self.url = self.url.replace('/abs/','/full/')
   
    def check_fulltext(self):
        full = self.soup.find('section',class_='article-section__full')
        try:
            if full != None:
                if full.find('div',class_='article-section__content') \
                    .text == '\n\xa0\n':
                    sys.exit('Error: Can\'t access fulltext of article')
                else:
                    return(True)
            else:
                sys.exit('Error: Can\'t access fulltext of article')
        except:
            sys.exit('Error: Can\'t access fulltext of article')
 
    def get_doi(self):
        if self.doi == None:
            doi_raw = self.soup.find('a',class_='epub-doi').text.split('/')
            self.doi = str(doi_raw[3]+'/'+doi_raw[4])

    def get_abstract(self):
        """Get article abstract"""
        self.abstract = self.soup.find('section',
                class_='article-section__abstract')

    def get_keywords(self):
        """Get article keywords"""
        self.keywords = []
        try:
            keywords_raw = self.soup.find('section',class_='keywords') \
                .find_all('a',class_='badge-type')
            for i in keywords_raw:
                self.keywords.append(i.text.replace('\n','') \
                        .replace('\u200a',''))
        except:
            pass

    def get_body(self):
        """Get body of article"""
        body_raw = self.soup.find_all('div',class_='article-section__content') 
        body_raw = body_raw[1:]
        self.body = ''
        for i in body_raw:
            self.body += str(i)
    
    def get_references(self):
        """Get references list"""
        references_raw = str(self.soup.find('section',
            {'id':'references-section'}))
        references_raw = references_raw.replace('"display: none;"','')
        references_raw = references_raw.replace('Literature Cited','')
        references_raw = references_raw.replace('data-bib-id','id')
        self.references = '<h2>Literature Cited</h2>\n'+references_raw

register_publisher(Wiley)