2017-08-14 13:04:58 -07:00
"""
This script copies all notebooks from the book into the website directory, and
creates pages which wrap them and link together.
"""
import os
import nbformat
2017-08-14 13:26:42 -07:00
import shutil
2017-08-14 13:04:58 -07:00
PAGEFILE = """ title: {title}
2017-08-14 19:22:01 -07:00
url:
save_as: {htmlfile}
2017-08-14 14:02:21 -07:00
Template: {template}
2017-08-14 13:04:58 -07:00
2017-08-14 13:26:42 -07:00
{{ % notebook notebooks/ {notebook_file} cells[ {cells} ] % }}
2017-08-14 13:04:58 -07:00
"""
2017-08-15 07:53:23 -07:00
INTRO_TEXT = """ This website contains the full text of the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook) in the form of Jupyter notebooks.
The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT).
If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!
"""
2017-08-14 13:04:58 -07:00
def abspath_from_here ( * args ) :
here = os . path . dirname ( __file__ )
path = os . path . join ( here , * args )
return os . path . abspath ( path )
NB_SOURCE_DIR = abspath_from_here ( ' .. ' , ' notebooks ' )
NB_DEST_DIR = abspath_from_here ( ' content ' , ' notebooks ' )
PAGE_DEST_DIR = abspath_from_here ( ' content ' , ' pages ' )
def copy_notebooks ( ) :
2018-08-28 14:11:16 -07:00
if not os . path . exists ( NB_DEST_DIR ) :
os . makedirs ( NB_DEST_DIR )
if not os . path . exists ( PAGE_DEST_DIR ) :
os . makedirs ( PAGE_DEST_DIR )
2017-08-14 13:26:42 -07:00
nblist = sorted ( nb for nb in os . listdir ( NB_SOURCE_DIR )
if nb . endswith ( ' .ipynb ' ) )
2017-08-14 19:22:01 -07:00
name_map = { nb : nb . rsplit ( ' . ' , 1 ) [ 0 ] . lower ( ) + ' .html '
2017-08-14 13:04:58 -07:00
for nb in nblist }
2017-08-14 13:26:42 -07:00
figsource = abspath_from_here ( ' .. ' , ' notebooks ' , ' figures ' )
figdest = abspath_from_here ( ' content ' , ' figures ' )
if os . path . exists ( figdest ) :
shutil . rmtree ( figdest )
shutil . copytree ( figsource , figdest )
figurelist = os . listdir ( abspath_from_here ( ' content ' , ' figures ' ) )
2017-08-14 14:02:21 -07:00
figure_map = { os . path . join ( ' figures ' , fig ) : os . path . join ( ' /PythonDataScienceHandbook/figures ' , fig )
2017-08-14 13:26:42 -07:00
for fig in figurelist }
2017-08-14 13:04:58 -07:00
for nb in nblist :
base , ext = os . path . splitext ( nb )
print ( ' - ' , nb )
content = nbformat . read ( os . path . join ( NB_SOURCE_DIR , nb ) ,
as_version = 4 )
2017-08-14 13:26:42 -07:00
if nb == ' Index.ipynb ' :
2018-08-28 14:11:16 -07:00
# content[0] is the title
# content[1] is the cover image
# content[2] is the license
2017-08-14 13:26:42 -07:00
cells = ' 1: '
2017-08-14 14:02:21 -07:00
template = ' page '
2017-08-14 13:26:42 -07:00
title = ' Python Data Science Handbook '
2017-08-15 07:53:23 -07:00
content . cells [ 2 ] . source = INTRO_TEXT
2017-08-14 13:26:42 -07:00
else :
2018-08-28 14:11:16 -07:00
# content[0] is the book information
# content[1] is the navigation bar
# content[2] is the title
2017-08-14 13:26:42 -07:00
cells = ' 2: '
2017-08-14 14:02:21 -07:00
template = ' booksection '
2017-08-14 13:26:42 -07:00
title = content . cells [ 2 ] . source
if not title . startswith ( ' # ' ) or len ( title . splitlines ( ) ) > 1 :
raise ValueError ( ' title not found in third cell ' )
title = title . lstrip ( ' # ' ) . strip ( )
2017-08-14 13:04:58 -07:00
2017-08-14 14:02:21 -07:00
# put nav below title
2018-08-28 14:11:16 -07:00
content . cells . insert ( 0 , content . cells . pop ( 2 ) )
2017-08-14 14:02:21 -07:00
# Replace internal URLs and figure links in notebook
2017-08-14 13:04:58 -07:00
for cell in content . cells :
if cell . cell_type == ' markdown ' :
for nbname , htmlname in name_map . items ( ) :
if nbname in cell . source :
cell . source = cell . source . replace ( nbname , htmlname )
2017-08-14 13:26:42 -07:00
for figname , newfigname in figure_map . items ( ) :
if figname in cell . source :
cell . source = cell . source . replace ( figname , newfigname )
2018-08-28 14:11:16 -07:00
if cell . source . startswith ( " <!--NAVIGATION--> " ) :
# Undo replacement of notebook link in the colab badge
cell . source = nb . join ( cell . source . rsplit ( name_map [ nb ] , 1 ) )
2017-08-14 13:04:58 -07:00
nbformat . write ( content , os . path . join ( NB_DEST_DIR , nb ) )
pagefile = os . path . join ( PAGE_DEST_DIR , base + ' .md ' )
2017-08-14 19:22:01 -07:00
htmlfile = base . lower ( ) + ' .html '
2017-08-14 13:04:58 -07:00
with open ( pagefile , ' w ' ) as f :
f . write ( PAGEFILE . format ( title = title ,
2017-08-14 19:22:01 -07:00
htmlfile = htmlfile ,
2017-08-14 13:26:42 -07:00
notebook_file = nb ,
2017-08-14 14:02:21 -07:00
template = template ,
2017-08-14 13:26:42 -07:00
cells = cells ) )
2017-08-14 13:04:58 -07:00
if __name__ == ' __main__ ' :
copy_notebooks ( )