aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2018-04-21 14:17:16 -0400
committerKen Kellner <ken@kenkellner.com>2018-04-21 14:17:16 -0400
commit4eed12039fca7322305b550933b94856a408fed2 (patch)
tree74ca36dbe82649004a8370bbade157d34b293501
parent8f61b92000c9a03a8d0740f268dc1befca66ad88 (diff)
Handle Elsevier redirects better
-rw-r--r--article_epub/utilities.py14
1 files changed, 12 insertions, 2 deletions
diff --git a/article_epub/utilities.py b/article_epub/utilities.py
index 60a2d97..b9da761 100644
--- a/article_epub/utilities.py
+++ b/article_epub/utilities.py
@@ -1,6 +1,7 @@
import requests
from bs4 import BeautifulSoup
import sys
+from urllib.parse import unquote
def url_from_title(title):
print("Getting URL from title......")
@@ -33,7 +34,16 @@ def url_from_title(title):
def url_from_doi(doi):
print("Getting URL from DOI........",end='',flush=True)
- url = requests.get('https://doi.org/'+doi,
- headers={'User-Agent':'Mozilla/5.0'}).url
+ r = requests.get('https://doi.org/'+doi,
+ headers={'User-Agent':'Mozilla/5.0'})
+
+ #To handle Elsevier linkinghub redirects
+ soup = BeautifulSoup(r.content,'html.parser')
+ if soup.find('input',{'id':'redirectURL'}) is not None:
+ url_raw = soup.find('input',{'id':'redirectURL'})['value']
+ url = unquote(url_raw.split('_returnURL')[0])
+ else:
+ url = r.url
+
print('done')
return(url)