diff options
author | Ken Kellner <ken@kenkellner.com> | 2018-04-13 20:52:13 -0400 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2018-04-13 20:52:13 -0400 |
commit | 0f2a5d0502320721d81670e1be96d22c689f6963 (patch) | |
tree | e5c30bbfc4d7a0f5ddffe4ad50a711df85ea4e8f | |
parent | d4694660aa3df9515f7aa3a6ad9d9b906e662a24 (diff) |
Break out more of main() into functions and clean up imports
-rwxr-xr-x | article-epub.py | 38 | ||||
-rw-r--r-- | article_epub/__init__.py | 3 | ||||
-rw-r--r-- | article_epub/publisher.py | 10 | ||||
-rw-r--r-- | article_epub/utilities.py | 6 |
4 files changed, 28 insertions, 29 deletions
diff --git a/article-epub.py b/article-epub.py index 26f1786..6753f6e 100755 --- a/article-epub.py +++ b/article-epub.py @@ -5,7 +5,6 @@ import requests import argparse parser = argparse.ArgumentParser() - parser.add_argument("-u",type=str,help='URL of article', default=None,metavar='URL') parser.add_argument("-d",type=str,help='DOI of article' @@ -20,44 +19,27 @@ args = parser.parse_args() def main(): if args.p: - pubs = article_epub.publisher.list_publishers() + pubs = article_epub.list_publishers() print('Available publishers:') for i in pubs: print('• '+i.__name__) sys.exit() - - if args.d == None and args.u == None and args.t == None: - sys.exit('Must provide URL, DOI or title') - - if args.d != None: - print("Getting URL from DOI........",end='',flush=True) - url = requests.get('https://doi.org/'+args.d, - headers={'User-Agent':'Mozilla/5.0'}).url - doi = args.d - print('done') + + if args.u != None: + url = args.u + elif args.d != None: + url = article_epub.url_from_doi(args.d) elif args.t != None: url = article_epub.url_from_title(args.t) - doi = None else: - url = args.u - doi = None + sys.exit('Must provide URL, DOI or title') - domain = ".".join(url.split("//")[-1].split("/")[0] \ - .split('?')[0].split('.')[-2:]) - - try: - art = article_epub.publisher.get_publishers()[domain](url=url,doi=doi) - print('Matched URL to publisher: '+art.__class__.__name__) - except: - sys.exit('Publisher not supported.') - + art = article_epub.match_publisher(url=url,doi=args.d) art.soupify() art.extract_data() art.epubify(args.o) print('\nCitation: '+art.get_citation()) print('Filename: '+art.output) - - -main() - +if __name__ == "__main__": + main() diff --git a/article_epub/__init__.py b/article_epub/__init__.py index f090db7..82d9fa8 100644 --- a/article_epub/__init__.py +++ b/article_epub/__init__.py @@ -1,4 +1,5 @@ #!/usr/bin/python3 #https://github.com/mozilla/geckodriver/releases import article_epub.publishers -from article_epub.utilities import * +from article_epub.publisher import list_publishers, match_publisher +from article_epub.utilities import url_from_title, url_from_doi diff --git a/article_epub/publisher.py b/article_epub/publisher.py index 89bb8b3..17ce39b 100644 --- a/article_epub/publisher.py +++ b/article_epub/publisher.py @@ -177,3 +177,13 @@ def get_publishers(): def list_publishers(): return _publishers +def match_publisher(url,doi): + domain = ".".join(url.split("//")[-1].split("/")[0] \ + .split('?')[0].split('.')[-2:]) + try: + art = get_publishers()[domain](url=url,doi=doi) + print('Matched URL to publisher: '+art.__class__.__name__) + return(art) + except: + sys.exit('Publisher not supported.') + diff --git a/article_epub/utilities.py b/article_epub/utilities.py index fb8c249..1fa1f61 100644 --- a/article_epub/utilities.py +++ b/article_epub/utilities.py @@ -26,3 +26,9 @@ def url_from_title(title): except: sys.exit('Getting URL from title failed') +def url_from_doi(doi): + print("Getting URL from DOI........",end='',flush=True) + url = requests.get('https://doi.org/'+doi, + headers={'User-Agent':'Mozilla/5.0'}).url + print('done') + return(url) |