#!/usr/bin/perl use strict; use Encode; use Getopt::Long; Getopt::Long::Configure ("bundling"); my $VERBOSE = 0; my $CLASS = "kana"; GetOptions("verbose|v" => \$VERBOSE, "class|c=s" => \$CLASS) or die "usage: $0 [-v] [-c css-class] [file ...]\n"; my $BR = $VERBOSE ? "
" : ""; my $SPACE = decode("utf-8","\343\200\200"); my $SLASH = decode("utf-8","\357\274\217"); printheader() if $VERBOSE; my @entry; while (<>) { chomp; if (/^#@/) { print qq(

$'

\n); next; } next if /^\s*#/; if (/^\s*$/) { if (@entry) { readentry(@entry); @entry = (); } next; } push(@entry,decode("utf-8",$_)); } readentry(@entry) if @entry; print "\n" if $VERBOSE; exit 0; sub readentry { my ($phrase,@rest) = @_; #if exactly one line, just print it if (@rest == 0) { print encode("utf-8",$phrase),"$BR\n"; return; } #if exactly two lines, then it's a single word if (@rest == 1) { printf qq(%s$BR\n),$CLASS, encode("utf-8",$rest[0]),encode("utf-8",$phrase); return; } #if more than two lines, the second contains all of the kana #that should be left alone, and each subsequent line contains #the reading for one or more kanji, starting at the next available #location indicated by the whitespace in the second line. multiple #adjacent kanji words are separated by ending all but the last one #with a kanji "/" character. my $kana = shift(@rest); #strip trailing whitespace (yes, it knows about $SPACE) $kana =~ s/\s*$//; my $i = 0; while ($i < length($phrase)) { if ($i >= length($kana)) { printf qq(%s), $CLASS,encode("utf-8",$rest[0]), encode("utf-8",substr($phrase,$i)); last; } #assemble word needing furigana my $j; for ($j=$i;$j $i) { my $kanji = substr($phrase,$i,$j - $i); my $furigana = shift(@rest); printf qq(%s), $CLASS,encode("utf-8",$furigana), encode("utf-8",$kanji); $i = $j; }else{ print encode("utf-8",substr($phrase,$i,1)); $i++; } } print "$BR\n"; } #hiding this function at the end keeps IE and Safari from #seeing the contents and rendering the script as HTML... # sub printheader { print < EOF }