aboutsummaryrefslogtreecommitdiff
path: root/article_epub/utilities.py
blob: 60a2d97433b69aa4b8f73fe6b8aa6a3d10886760 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import requests
from bs4 import BeautifulSoup
import sys

def url_from_title(title):
    print("Getting URL from title......")
    try:
        url_stem = 'https://scholar.google.com/scholar?hl=en&as_sdt=0%2C49&q="'
        search = title.replace(' ','+').replace('\n','')
        full_url = url_stem+search+'"'
        out = requests.get(full_url,headers={'User-Agent':'Mozilla/5.0'})
        soup = BeautifulSoup(out.content,'html.parser')
        result = soup.find('div',class_='gs_scl') \
            .find('div',class_='gs_ri').find('a')
        possible_title = result.text
        possible_link = result['href']
        
        if possible_title == '':
            print('No matching link available.')
            sys.exit('Getting URL from title failed')
        
        print('Provided title:')
        print(title)
        print('Found following article:')
        print(possible_title)
        choice = input("\033[0;37m"+"Is this correct (y/n)? "+"\033[00m")
        if choice == 'y':
            return(possible_link)
        else:
            sys.exit('Getting URL from title failed')
    except:
        sys.exit('Getting URL from title failed')

def url_from_doi(doi):
    print("Getting URL from DOI........",end='',flush=True)
    url = requests.get('https://doi.org/'+doi,
            headers={'User-Agent':'Mozilla/5.0'}).url
    print('done')
    return(url)