blob: 1fa1f6138d828a7de913cf412e099ed74c7f84cc (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
import requests
from bs4 import BeautifulSoup
import sys
def url_from_title(title):
print("Getting URL from title......")
try:
url_stem = 'https://scholar.google.com/scholar?hl=en&as_sdt=0%2C49&q="'
search = title.replace(' ','+').replace('\n','')
full_url = url_stem+search+'"'
out = requests.get(full_url,headers={'User-Agent':'Mozilla/5.0'})
soup = BeautifulSoup(out.content,'html.parser')
result = soup.find('div',class_='gs_scl') \
.find('div',class_='gs_ri').find('a')
possible_title = result.text
possible_link = result['href']
print('Provided title:')
print(title)
print('Found following article:')
print(possible_title)
choice = input("Is this correct (y/n)? ")
if choice == 'y':
return(possible_link)
else:
sys.exit('Getting URL from title failed')
except:
sys.exit('Getting URL from title failed')
def url_from_doi(doi):
print("Getting URL from DOI........",end='',flush=True)
url = requests.get('https://doi.org/'+doi,
headers={'User-Agent':'Mozilla/5.0'}).url
print('done')
return(url)
|