diff options
author | Ken Kellner <ken@kenkellner.com> | 2018-04-12 20:43:29 -0400 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2018-04-12 20:43:29 -0400 |
commit | 0124785e6616dd4fca06af50b745f14e6e07819e (patch) | |
tree | e4511c2e5c0b7f9c724f58bd8741d0e2d03ac164 | |
parent | 946396dea633572b6b83dd6f568cb4ab4e8395d4 (diff) |
Some reorg
-rwxr-xr-x | article-epub.py | 26 | ||||
-rw-r--r-- | article_epub/__init__.py | 1 | ||||
-rw-r--r-- | article_epub/utilities.py | 28 |
3 files changed, 30 insertions, 25 deletions
diff --git a/article-epub.py b/article-epub.py index 21c80cf..26f1786 100755 --- a/article-epub.py +++ b/article-epub.py @@ -3,7 +3,6 @@ import article_epub import sys import requests import argparse -from bs4 import BeautifulSoup parser = argparse.ArgumentParser() @@ -37,7 +36,7 @@ def main(): doi = args.d print('done') elif args.t != None: - url = url_from_title(args.t) + url = article_epub.url_from_title(args.t) doi = None else: url = args.u @@ -58,29 +57,6 @@ def main(): print('\nCitation: '+art.get_citation()) print('Filename: '+art.output) -def url_from_title(title): - print("Getting URL from title......") - try: - url_stem = 'https://scholar.google.com/scholar?hl=en&as_sdt=0%2C49&q="' - search = title.replace(' ','+').replace('\n','') - full_url = url_stem+search+'"' - out = requests.get(full_url,headers={'User-Agent':'Mozilla/5.0'}) - soup = BeautifulSoup(out.content,'html.parser') - result = soup.find('div',class_='gs_scl') \ - .find('div',class_='gs_ri').find('a') - possible_title = result.text - possible_link = result['href'] - print('Provided title:') - print(title) - print('Found following article:') - print(possible_title) - choice = input("Is this correct (y/n)? ") - if choice == 'y': - return(possible_link) - else: - sys.exit('Getting URL from title failed') - except: - sys.exit('Getting URL from title failed') main() diff --git a/article_epub/__init__.py b/article_epub/__init__.py index 84afc23..f090db7 100644 --- a/article_epub/__init__.py +++ b/article_epub/__init__.py @@ -1,3 +1,4 @@ #!/usr/bin/python3 #https://github.com/mozilla/geckodriver/releases import article_epub.publishers +from article_epub.utilities import * diff --git a/article_epub/utilities.py b/article_epub/utilities.py new file mode 100644 index 0000000..fb8c249 --- /dev/null +++ b/article_epub/utilities.py @@ -0,0 +1,28 @@ +import requests +from bs4 import BeautifulSoup +import sys + +def url_from_title(title): + print("Getting URL from title......") + try: + url_stem = 'https://scholar.google.com/scholar?hl=en&as_sdt=0%2C49&q="' + search = title.replace(' ','+').replace('\n','') + full_url = url_stem+search+'"' + out = requests.get(full_url,headers={'User-Agent':'Mozilla/5.0'}) + soup = BeautifulSoup(out.content,'html.parser') + result = soup.find('div',class_='gs_scl') \ + .find('div',class_='gs_ri').find('a') + possible_title = result.text + possible_link = result['href'] + print('Provided title:') + print(title) + print('Found following article:') + print(possible_title) + choice = input("Is this correct (y/n)? ") + if choice == 'y': + return(possible_link) + else: + sys.exit('Getting URL from title failed') + except: + sys.exit('Getting URL from title failed') + |