aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2018-04-12 20:43:29 -0400
committerKen Kellner <ken@kenkellner.com>2018-04-12 20:43:29 -0400
commit0124785e6616dd4fca06af50b745f14e6e07819e (patch)
treee4511c2e5c0b7f9c724f58bd8741d0e2d03ac164
parent946396dea633572b6b83dd6f568cb4ab4e8395d4 (diff)
Some reorg
-rwxr-xr-xarticle-epub.py26
-rw-r--r--article_epub/__init__.py1
-rw-r--r--article_epub/utilities.py28
3 files changed, 30 insertions, 25 deletions
diff --git a/article-epub.py b/article-epub.py
index 21c80cf..26f1786 100755
--- a/article-epub.py
+++ b/article-epub.py
@@ -3,7 +3,6 @@ import article_epub
import sys
import requests
import argparse
-from bs4 import BeautifulSoup
parser = argparse.ArgumentParser()
@@ -37,7 +36,7 @@ def main():
doi = args.d
print('done')
elif args.t != None:
- url = url_from_title(args.t)
+ url = article_epub.url_from_title(args.t)
doi = None
else:
url = args.u
@@ -58,29 +57,6 @@ def main():
print('\nCitation: '+art.get_citation())
print('Filename: '+art.output)
-def url_from_title(title):
- print("Getting URL from title......")
- try:
- url_stem = 'https://scholar.google.com/scholar?hl=en&as_sdt=0%2C49&q="'
- search = title.replace(' ','+').replace('\n','')
- full_url = url_stem+search+'"'
- out = requests.get(full_url,headers={'User-Agent':'Mozilla/5.0'})
- soup = BeautifulSoup(out.content,'html.parser')
- result = soup.find('div',class_='gs_scl') \
- .find('div',class_='gs_ri').find('a')
- possible_title = result.text
- possible_link = result['href']
- print('Provided title:')
- print(title)
- print('Found following article:')
- print(possible_title)
- choice = input("Is this correct (y/n)? ")
- if choice == 'y':
- return(possible_link)
- else:
- sys.exit('Getting URL from title failed')
- except:
- sys.exit('Getting URL from title failed')
main()
diff --git a/article_epub/__init__.py b/article_epub/__init__.py
index 84afc23..f090db7 100644
--- a/article_epub/__init__.py
+++ b/article_epub/__init__.py
@@ -1,3 +1,4 @@
#!/usr/bin/python3
#https://github.com/mozilla/geckodriver/releases
import article_epub.publishers
+from article_epub.utilities import *
diff --git a/article_epub/utilities.py b/article_epub/utilities.py
new file mode 100644
index 0000000..fb8c249
--- /dev/null
+++ b/article_epub/utilities.py
@@ -0,0 +1,28 @@
+import requests
+from bs4 import BeautifulSoup
+import sys
+
+def url_from_title(title):
+ print("Getting URL from title......")
+ try:
+ url_stem = 'https://scholar.google.com/scholar?hl=en&as_sdt=0%2C49&q="'
+ search = title.replace(' ','+').replace('\n','')
+ full_url = url_stem+search+'"'
+ out = requests.get(full_url,headers={'User-Agent':'Mozilla/5.0'})
+ soup = BeautifulSoup(out.content,'html.parser')
+ result = soup.find('div',class_='gs_scl') \
+ .find('div',class_='gs_ri').find('a')
+ possible_title = result.text
+ possible_link = result['href']
+ print('Provided title:')
+ print(title)
+ print('Found following article:')
+ print(possible_title)
+ choice = input("Is this correct (y/n)? ")
+ if choice == 'y':
+ return(possible_link)
+ else:
+ sys.exit('Getting URL from title failed')
+ except:
+ sys.exit('Getting URL from title failed')
+