Blame: website/copy_notebooks.py - jakevdp/PythonDataScienceHandbook

jakevdp / PythonDataScienceHandbook UNCLAIMED

Python Data Science Handbook: full text in Jupyter Notebooks

0 0 0 Jupyter Notebook

add website framework 2017-08-14 13:04:58 -07:00			`"""`
			`This script copies all notebooks from the book into the website directory, and`
			`creates pages which wrap them and link together.`
			`"""`
			`import os`
			`import nbformat`
copy figures as well 2017-08-14 13:26:42 -07:00			`import shutil`
add website framework 2017-08-14 13:04:58 -07:00
			`PAGEFILE = """title: {title}`
get rid of 'pages' in urls 2017-08-14 19:22:01 -07:00			`url:`
			`save_as: {htmlfile}`
Update publish info 2017-08-14 14:02:21 -07:00			`Template: {template}`
add website framework 2017-08-14 13:04:58 -07:00
copy figures as well 2017-08-14 13:26:42 -07:00			`{{% notebook notebooks/{notebook_file} cells[{cells}] %}}`
add website framework 2017-08-14 13:04:58 -07:00			`"""`

change Index intro text 2017-08-15 07:53:23 -07:00			`INTRO_TEXT = """This website contains the full text of the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook) in the form of Jupyter notebooks.`

			`The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT).`

			`If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!`
			`"""`

add website framework 2017-08-14 13:04:58 -07:00
			`def abspath_from_here(*args):`
			`here = os.path.dirname(__file__)`
			`path = os.path.join(here, *args)`
			`return os.path.abspath(path)`

			`NB_SOURCE_DIR = abspath_from_here('..', 'notebooks')`
			`NB_DEST_DIR = abspath_from_here('content', 'notebooks')`
			`PAGE_DEST_DIR = abspath_from_here('content', 'pages')`


			`def copy_notebooks():`
Adjust website build to work with colab badges: 2018-08-28 14:11:16 -07:00			`if not os.path.exists(NB_DEST_DIR):`
			`os.makedirs(NB_DEST_DIR)`
			`if not os.path.exists(PAGE_DEST_DIR):`
			`os.makedirs(PAGE_DEST_DIR)`

copy figures as well 2017-08-14 13:26:42 -07:00			`nblist = sorted(nb for nb in os.listdir(NB_SOURCE_DIR)`
			`if nb.endswith('.ipynb'))`
get rid of 'pages' in urls 2017-08-14 19:22:01 -07:00			`name_map = {nb: nb.rsplit('.', 1)[0].lower() + '.html'`
add website framework 2017-08-14 13:04:58 -07:00			`for nb in nblist}`

copy figures as well 2017-08-14 13:26:42 -07:00			`figsource = abspath_from_here('..', 'notebooks', 'figures')`
			`figdest = abspath_from_here('content', 'figures')`

			`if os.path.exists(figdest):`
			`shutil.rmtree(figdest)`
			`shutil.copytree(figsource, figdest)`

			`figurelist = os.listdir(abspath_from_here('content', 'figures'))`
Update publish info 2017-08-14 14:02:21 -07:00			`figure_map = {os.path.join('figures', fig) : os.path.join('/PythonDataScienceHandbook/figures', fig)`
copy figures as well 2017-08-14 13:26:42 -07:00			`for fig in figurelist}`

add website framework 2017-08-14 13:04:58 -07:00			`for nb in nblist:`
			`base, ext = os.path.splitext(nb)`
			`print('-', nb)`

			`content = nbformat.read(os.path.join(NB_SOURCE_DIR, nb),`
			`as_version=4)`

copy figures as well 2017-08-14 13:26:42 -07:00			`if nb == 'Index.ipynb':`
Adjust website build to work with colab badges: 2018-08-28 14:11:16 -07:00			`# content[0] is the title`
			`# content[1] is the cover image`
			`# content[2] is the license`
copy figures as well 2017-08-14 13:26:42 -07:00			`cells = '1:'`
Update publish info 2017-08-14 14:02:21 -07:00			`template = 'page'`
copy figures as well 2017-08-14 13:26:42 -07:00			`title = 'Python Data Science Handbook'`
change Index intro text 2017-08-15 07:53:23 -07:00			`content.cells[2].source = INTRO_TEXT`
copy figures as well 2017-08-14 13:26:42 -07:00			`else:`
Adjust website build to work with colab badges: 2018-08-28 14:11:16 -07:00			`# content[0] is the book information`
			`# content[1] is the navigation bar`
			`# content[2] is the title`
copy figures as well 2017-08-14 13:26:42 -07:00			`cells = '2:'`
Update publish info 2017-08-14 14:02:21 -07:00			`template = 'booksection'`
copy figures as well 2017-08-14 13:26:42 -07:00			`title = content.cells[2].source`
			`if not title.startswith('#') or len(title.splitlines()) > 1:`
			`raise ValueError('title not found in third cell')`
			`title = title.lstrip('#').strip()`
add website framework 2017-08-14 13:04:58 -07:00
Update publish info 2017-08-14 14:02:21 -07:00			`# put nav below title`
Adjust website build to work with colab badges: 2018-08-28 14:11:16 -07:00			`content.cells.insert(0, content.cells.pop(2))`
Update publish info 2017-08-14 14:02:21 -07:00
			`# Replace internal URLs and figure links in notebook`
add website framework 2017-08-14 13:04:58 -07:00			`for cell in content.cells:`
			`if cell.cell_type == 'markdown':`
			`for nbname, htmlname in name_map.items():`
			`if nbname in cell.source:`
			`cell.source = cell.source.replace(nbname, htmlname)`
copy figures as well 2017-08-14 13:26:42 -07:00			`for figname, newfigname in figure_map.items():`
			`if figname in cell.source:`
			`cell.source = cell.source.replace(figname, newfigname)`
Adjust website build to work with colab badges: 2018-08-28 14:11:16 -07:00			`if cell.source.startswith("<!--NAVIGATION-->"):`
			`# Undo replacement of notebook link in the colab badge`
			`cell.source = nb.join(cell.source.rsplit(name_map[nb], 1))`

add website framework 2017-08-14 13:04:58 -07:00			`nbformat.write(content, os.path.join(NB_DEST_DIR, nb))`

			`pagefile = os.path.join(PAGE_DEST_DIR, base + '.md')`
get rid of 'pages' in urls 2017-08-14 19:22:01 -07:00			`htmlfile = base.lower() + '.html'`
add website framework 2017-08-14 13:04:58 -07:00			`with open(pagefile, 'w') as f:`
			`f.write(PAGEFILE.format(title=title,`
get rid of 'pages' in urls 2017-08-14 19:22:01 -07:00			`htmlfile=htmlfile,`
copy figures as well 2017-08-14 13:26:42 -07:00			`notebook_file=nb,`
Update publish info 2017-08-14 14:02:21 -07:00			`template=template,`
copy figures as well 2017-08-14 13:26:42 -07:00			`cells=cells))`
add website framework 2017-08-14 13:04:58 -07:00
			`if __name__ == '__main__':`
			`copy_notebooks()`