|
#!/usr/bin/perl -w
##---------------------------------------------------------------------------##
# Generates a nested table of contents for a file, based on the formatting
# model I initially saw in the MySQL documentation.
#
# The script looks for lines that look like the following:
#
# Some text
# =========
#
# Next heading level
# ------------------
#
# Another heading level
# ~~~~~~~~~~~~~~~~~~~~~
#
# Note the underline, which consists of repeating non-alphanumeric characters
# of the same length as the text on the line above. Levels are assigned
# automatically as each new underline is encountered.
#
# The above might be formatted in the Table of Contents as:
# 8 Some text
# 11 Next heading level
# 14 Another heading level
#
# The table of contents is inserted after the line 'Table of Contents' (and
# its underline) is encountered. Any existing table of contents is replaced.
##---------------------------------------------------------------------------##
# BUUS: This script is part of Brian's Useful Utilities Set
# The program 'setext-headings' promotes or demotes headings in an existing
# file, which can be useful at times.
use strict;
die "genTOC: Require the name of one or more text files" if ! $ARGV[0];
my @text; # Text from the file
my @text_stat; # stat($text_fn)
my @toc; # Entries are ARRAY refs: [0]=line num, [1]=level, [2]=text
use constant TOC_LINE_NUM => 0;
use constant TOC_LEVEL => 1;
use constant TOC_TEXT => 2;
my ($toc_start, $toc_end) = (0, 0);
my %level; # Key = underline character, data = level
my $code_fence; # Found in markdown files
##---------------------------------------------------------------------------##
# Open the text file, get its inode data, determine where its existing table
# of contents is, and build a new toc array. Return 0 if no line reading
# 'Table of Contents' line is found.
##---------------------------------------------------------------------------##
sub read_text_file {
my $text_fn = shift;
print STDERR "$text_fn\n";
open TEXT, $text_fn or die " Error opening TEXT file '$text_fn' for reading: $!";
@text_stat = stat($text_fn);
@toc = (); ($toc_start, $toc_end) = (0, 0); my $toc_sw = 0;
%level = ();
# Read the text file, determine where its existing table of contents is, and
# build a new table
@text = ( undef ); # [0]=undef keeps array (0-based) and $. (1-based) in sync
while (<TEXT>) {
chomp;
push @text, $_;
if ($toc_sw) { # Look for the end of the table of contents
$toc_sw = 0; # Assume this is not a TOC line
$toc_sw = 1 if /^\s*$/; # Blank lines are TOC lines
$toc_sw = 1 if /^\s*\d+\.? /; # So are ' ####. TEXT' lines
$toc_sw = 1 if /^\s*([^[:alnum:]])\1+\s*$/ && $. == $toc_start+1;
if (! $toc_sw) { # We found a non-TOC line: back up to last non-empty line
$toc_end = $. - 1;
$toc_end-- while $text[$toc_end] =~ /^\s*$/;
}
}
$toc_start = $., $toc_sw = 1 if /^\s*Table of Contents/i && ! $toc_start;
my $handled_as_ToC = 0;
if (/^(\s*)(([^[:alnum:]` ])\3+)\s*$/) { # This looks like an AUC underline
# Check if this is a closing code fence
if ($code_fence && /^ ? ? ?(([`~])\2\2+)/ && length($1) >= length($code_fence)) {
$code_fence='', $_='';
next;
}
# Check for an AUC underline
my ($ul_lead, $underline) = ($1, $2);
$text[$. - 1] =~ /^(\s*)(.*)\s*$/;
my ($h_lead, $header) = ($1, $2);
if (! $code_fence # Can't be within a code block
&& $ul_lead ne "\t" # "Spaces" before can't be a tab
&& length($ul_lead) < 4 # Must start in columns 1, 2, or 3
&& $h_lead eq $ul_lead # Header and underline must start at same column
&& length($header) - length($underline) <= 1
&& length($header) - length($underline) >= -2
) {
my $u_char = substr($underline, 0, 1);
$level{$u_char} = scalar(keys(%level)) if ! defined $level{$u_char};
if ($toc_start && lc($header) ne 'table of contents') {
push @toc, [ $.-1, $level{$u_char}, $header ];
$handled_as_ToC = 1;
}
}
}
# Handle a Markdown ATX heading
if (! $code_fence && /^ ? ? ?(#+)\s+(.*)/) {
push @toc, [ $. - 1, length($1)-1, $2 ];
$handled_as_ToC = 1;
}
# Check for an opening code fence
$code_fence = $code_fence ? '' : $1
if ! $handled_as_ToC && /^ ? ? ?(([~`])\2\2+)/;
}
close TEXT;
print STDERR " Did not find a 'Table of Contents' line\n" if ! $toc_start;
return $toc_start;
}
##---------------------------------------------------------------------------##
# Determine where the existing table of contents starts and stops
##---------------------------------------------------------------------------##
sub find_curr_toc {
# Determine where the Table of Contents actually starts within the text
$toc_start++; # Skip 'Table of Contents'; next line: skip underline
$toc_start++ if $text[$toc_start] =~ /^\s*([^[:alnum:]])\1+\s*$/;
$toc_start++ while $text[$toc_start] =~ /^\s*$/; # Skip blank lines
# If the Table of Contents ends before it starts, it means we have an empty table
if ($toc_end < $toc_start) {
$toc_start--;
$toc_end = $toc_start-1;
}
}
##---------------------------------------------------------------------------##
# Format the TOC array for insertion into the main text
##---------------------------------------------------------------------------##
sub format_new_toc {
# If we have sub-levels, go through the TOC and insert a blank line before every
# level 0 entry
if (scalar(keys(%level)) > 1) {
for (my $i = @toc-1; $i > 1; $i--) {
splice @toc, $i, 0, '' if $toc[$i][1] == 0;
}
}
my $curr_toc_length = $toc_end-$toc_start+1;
my $new_toc_length = @toc;
my $w = length($toc[-1][TOC_LINE_NUM] + $new_toc_length); # w = width of last line number in TOC
foreach (@toc) {
next if ! ref($_);
my $i = $$_[TOC_LINE_NUM];
$i += ($new_toc_length - $curr_toc_length) if $$_[TOC_LINE_NUM] > $toc_start;
my $x = sprintf(" %${w}i %s%s", $i, ' ' x $$_[TOC_LEVEL], $$_[TOC_TEXT]);
$_ = $x;
}
}
##---------------------------------------------------------------------------##
# Splice the new table of contents into the text array and rewrite the
# text file
##---------------------------------------------------------------------------##
sub update_text_file {
my $text_fn = shift;
# Splice in the table of contents
my $curr_toc_length = $toc_end-$toc_start+1;
my $new_toc_length = @toc;
print STDERR " Removing existing table of contents ($curr_toc_length lines) from text\n" if $curr_toc_length;
print STDERR " Inserting new table of contents ($new_toc_length lines) into text at line $toc_start\n";
splice @text, $toc_start, $curr_toc_length, @toc;
# Re-write the file (preserve its timestamps)
open TEXT, "> $text_fn" or die "Error opening TEXT file '$text_fn' for writing: $!";
for (my $i = 1; $i < @text; $i++) { print TEXT "$text[$i]\n" }
close TEXT;
utime $text_stat[8], $text_stat[9], $text_fn;
}
##---------------------------------------------------------------------------##
# M A I N P R O C E S S I N G
##---------------------------------------------------------------------------##
foreach my $text_fn (@ARGV) {
if (read_text_file($text_fn)) {
find_curr_toc();
format_new_toc();
update_text_file($text_fn);
}
}
# vim: tabstop=4
|