#!/usr/bin/perl -w
#
# a2h.pl
# David Rowe 16 Nov 2010
#
# Convert asciidoc format text file to pastable HTML.

my $textFile = shift;
open FH, $textFile or die "Can't open $textFile\n";

$para = "";
$state = "start";
while (<FH>) {

    $next_state = $state;

    # remove lines consisting of just + in first character.  This is a
    # hack to handle code blocks embedded in dot points.  It means
    # lists will need some manual reformatting.

    s/^\+\s*$//;		

    if ($state eq "start") {
	if (!/^\s*$/) {
	    # non whitespace line gets us started

	    if (/----/) {
		# a code block to be rendered verbatim
		$next_state = "code";
		$para = "";
	    }
	    elsif (/^\s+?\S/) {
		# a code block to be rendered verbatim
		$next_state = "code_indent";
		$para = "$_";
	    }
	    else {
		# start of regular para
		$next_state = "para";
		chomp;
		$para = $_ . ' ';
	    }
	}
    }

    if ($state eq "code") {
	# we are processing a code block

	if (/----/) {
	    # code block finished
	    $next_state = "start";
	    print "<code>\n$para</code>\n\n";
	}

	$para = $para . $_;
    }

    if ($state eq "code_indent") {
	# we are processing an indent based code block

	if (/^\s*$/) {
	    # indent based code block finished
	    $next_state = "start";
	    print "<code>\n$para</code>\n\n";
	}

	$para = $para . $_;
    }

    if ($state eq "para") {
	# we are processing a regular para

	if (/^\s*$/) {
	    # this is a blank line, terminating paragraph

	    $next_state = "start";

	    if ($last =~ /====/) {
		# ignore top level title, we will enter than manually
	    }
	    elsif ($last =~ /----/) {
		# h2 level title
		$para =~ s/--*--//;
		if ($para =~ /\[\[(\w+)\]\]/) {
		    $para =~ s/\[\[(\w+)\]\]//; # extract tag
		    $id = $1;
                    # leading and trailng spaces
		    $para =~ s/\s+$//; $para =~ s/^\s+//; 
		    print "<h2 id=\"$id\">$para</h2>\n\n"; 
		}
		else {
		    $para =~ s/\s+$//; $para =~ s/^\s+//;
		    print "<h2>$para</h2>\n\n"; 
		}
	    }
	    elsif ($last =~ /~~~~/) {
		# h3 level title
		$para =~ s/~~*~~//;
		$para =~ s/\s+$//; $para =~ s/^\s+//;
		print "<h3>$para</h3>\n\n"; 
	    }
	    else  {
		# regular paragraph

		# convert asciidoc format links

		$para =~ s/link:(\S*)\[(.*?)\]/<a href="$1">$2<\/a>/g;
		$para =~ s/(http:\S*)\[(.*?)\]/<a href="$1">$2<\/a>/g;
		$para =~ s/(https:\S*)\[(.*?)\]/<a href="$1">$2<\/a>/g;

		# images

		$para =~ s/image::(\S*)\[(.*?)\]/<img src="$1" alt="$2" \/>/g;

		# bold formatting

		$para =~ s/\*(\w*)\*/<strong>$1<\/strong>/g;
	    
		# if starts with + or . or * it's a list

		if (($para =~ /^\+ \w/) || ($para =~ /^\. \w/)  
		    || ($para =~ /^\* \S/)) {
		    $para =~ s/^\+ //; $para =~ s/^\. //; $para =~ s/^\* //;
		    print "<p><li>$para</li>\n\n";
		}
		else {
		    print "$para\n\n";
		}
	    }
	}
	else {
	    chomp;
	    $para = $para . $_ . ' ';
	}
    }
    
    $last = $_;
    $state = $next_state;
}

close FH;


