diff --git a/epubfile.py b/epubfile.py index 23c968f..ba11b2e 100644 --- a/epubfile.py +++ b/epubfile.py @@ -1024,6 +1024,10 @@ class Epub: if not nav_id and not ncx_id: return + # Note: The toc generated by the upcoming loop is in a sort of agnostic + # format, since it needs to be converted into nav.html and toc.ncx which + # have different structural requirements. The attributes that I'm using + # in this initial toc object DO NOT represent any part of the epub format. toc = new_list(root=True) current_level = None current_list = toc.ol @@ -1061,10 +1065,15 @@ class Epub: current_list = current_list.parent if current_list.name == 'li': current_list = current_list.parent - # If the file has headers in a non-ascending order, like an - # h4 and then an h1, then backstepping too far will take us - # out of the list. So at that point we can just snap - # current_level and start using the root list again. + # If the file has headers in a non-ascending order, like the + # first header is an h4 and then an h1 comes later, then + # this while loop would keep attempting to climb the .parent + # which would take us too far, off the top of the tree. + # So, if we reach `current_list == toc.ol` then we've + # reached the root and should stop climbing. At that point + # we can just snap current_level and use the root list again. + # In the resulting toc, that initial h4 would have the same + # toc depth as the later h1 since it never had parents. if current_list == toc: current_level = level current_list = toc.ol @@ -1073,12 +1082,13 @@ class Epub: current_level = level # In order to properly render nested