#!/usr/bin/perl
#
# Generates a Unicode character map javascript/html page
#
# (This perl script is no big deal, just some loops to avoid having to
# produce the most repetitive bits of the HTML form by hand; most of
# the script just prints "here-documents" straight to the HTML page.)
#
$copyright=<<END_COPYRIGHT_STATEMENT;
 COPYRIGHT:

 This code consists of an HTML/javascript character map for Unicode
 and/or a perl script which generates it.  You may have received either
 or both of these.

 Both the perl and javascript code are copyright (c) Alan Iwi 2003, but you
 may copy either or both under the terms of the GNU GPL.
 See http://www.gnu.org/copyleft/gpl.html

 (For those unfamiliar with the GPL...  Note that the GPL mainly exists to
 give freedom to modify and copy code rather than to restrict it.  Your main
 obligation is to grant to others this same freedom which you yourself enjoy.)

 Note that if you use the perl code and put the output on a web site,
 you are DISTRIBUTING the javascript code and you must comply with the
 conditions of the GNU GPL (even if you are not distributing the perl code).

====
 PLEASE EMAIL ME IF YOU MAKE AN IMPROVED VERSION:  A.M.Iwi {AT} rl.ac.uk
 (NB this is only a request, not an obligation.)
====

END_COPYRIGHT_STATEMENT
print $copyright;

$scriptname="charmap.pl";

$filename="index.shtml";

@digits=qw(0 1 2 3 4 5 6 7 8 9 A B C D E F);

%sections=(
	   '00'=>'Basic Latin, Latin 1',
	   '01'=>'Latin ext A, Latin ext B',
	   '02'=>'Latin ext B cont, ...',
	   '03'=>'Diacrit, Greek, Coptic',
	   '04'=>'Cyrillic',
	   '05'=>'Cyrillic Supp., Armenian, Hebrew',
	   '06'=>'Arabic',
	   '07'=>'Syriac, Thaana',
	   '09'=>'Devanagari, Bengali',
	   '0A'=>'Gurmukhi, Gujarati',
	   '0B'=>'Oriya, Tamil',
	   '0C'=>'Telugu, Kannada',
	   '0D'=>'Malayalam, Sinhala',
	   '0E'=>'Thai, Lao',
	   '0F'=>'Tibetan',
	   '10'=>'Mayanmar, Georgian',
	   '11'=>'Hangul Jamo',
	   '12'=>'Ethiopic',
	   '13'=>'Ethiopic, Cherokee',
	   '17'=>'Tagalog,Hanunoo,Buhid,Tagbanwa,Khmer',
	   '18'=>'Mongolian',
	   '19'=>'Limbu, Tai Le, Khmer Symbols',
	   '1D'=>'Phonetic Extensions',
	   '1E'=>'Latin Extended Additional',
	   '1F'=>'Greek Extended',
	   '20'=>'Punctuation,Sub/Superscripts,Currency',
	   '21'=>'Letterlike Syms, Number Forms, Arrows',
	   '22'=>'Mathematical Operators',
	   '23'=>'Misc. Technical',
	   '24'=>'Ctl Picts, OCR, Encl Alphanumerics',
	   '25'=>'Box Drawing, Block Elem, Geom Shapes',
	   '26'=>'Misc Symbols',
	   '27'=>'Dingbats,Misc.Math-A,Supp.Arrows-A',
	   '28'=>'Braille',
	   '29'=>'Suppl. Arrows-B, Misc.Math.Symbols-B',
	   '2A'=>'Supplemental Mathematical Operators',
	   '2B'=>'Miscellaneous Symbols and Arrows',
	   '2E'=>'CJK Radicals Supplement',
	   '2F'=>'Kanji Radicals,Ideographic DescChars',
	   '30'=>'CJK Syms+Punct, Hiragana, Katakana',
	   '31'=>'Bopomofo, Hangul Compat Jamo, ...',
	   '32'=>'Enclosed CJK Letters and Months',
	   '33'=>'CJK Compatibility',
	   '34'=>'CJK Unified Ideographs Extension A',
	   '4D'=>'Yijing Hexagram Symbols',
	   '4E'=>'CJK Unified Ideographs (start)',
	   '9F'=>'CJK Unified Ideographs (end)',
	   'A0'=>'Yi Syllables (start)',
	   'A4'=>'Yi Syllables (end), Yi Radicals',
	   'AC'=>'Hangul Syllables (start)',
	   'D7'=>'Hangul Syllables (end)',
	   'D8'=>'High Surrogates (start)',
	   'DB'=>'High Surrogates (end),',
	   'DC'=>'Low Surrogates (start)',
	   'DF'=>'Low Surrogates (end)',
	   'E0'=>'Private Use Area (start)',
	   'F8'=>'Private Use Area (end)',
	   'F9'=>'CJK Compatibility Ideographs',
	   'FA'=>'CJK Compatibility Ideographs (cont)',
	   'FB'=>'Alpha Pres Fms,Arabic Pres Fms A(st)',
	   'FD'=>'Arabic Presentation Forms-A (end)',
	   'FE'=>'misc...',
	   'FF'=>'Halfwidth+Fullwidth Forms, Specials');

open HTML,">$filename" or die $!;
print HTML <<'EOF';
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta name="Description" content="Interactive map with buttons for selecting arbitrary Unicode characters for copying and pasting into other applications">
<meta name="Keywords" content="Unicode, characters, map, character map, multilingual, foreign, copy, paste, copying, pasting, online, form, input, search, Google, free, GPL, perl, javascript">
<title>Free Online Unicode Character Map</title>
<script language="Javascript" type="text/javascript">
<!--
EOF
($comment=$copyright)=~s,^,//,gm;
print HTML $comment;
print HTML <<'EOF';

var theform;
var textfield;
var d3,d2;
var digits;

//---------------------
function insert (d1d0) 
{
    // eval("textfield.value += \"\\u"+d3+d2+d1d0+"\"");
    eval("insertAtCursor(textfield,\"\\u"+d3+d2+d1d0+"\")");
    textfield.focus();
}

//---------------------
// The function insertAtCursor is modified version of code from
// PHPMyAdmin.  PHPMyAdmin is GPLed, so copyright for this is fine.
//
// (IE and Mozilla code both modified to leave cursor position correct after
//  insert.)
//
function insertAtCursor(myField, myValue)
{
  //IE support
  if (document.selection)
  {
      myField.focus();
      sel = document.selection.createRange();
      sel.text = myValue;
      sel.select();
  }
  //MOZILLA/NETSCAPE support
  else if (myField.selectionStart || myField.selectionStart == '0')
  {
      var startPos = myField.selectionStart;
      var endPos = myField.selectionEnd;
      var newEndPos = startPos + myValue.length;
      myField.value = myField.value.substring(0, startPos)
                      + myValue 
                      + myField.value.substring(endPos, myField.value.length);
      myField.setSelectionRange(newEndPos, newEndPos);
  }
  else 
  {
      var newEndPos = myField.value.length + myValue.length;
      myField.value += myValue;
      myField.setSelectionRange(newEndPos, newEndPos);
  }
}

//---------------------
function initSection()
{
    // assumes d2, d3 already set
    var ctrl;
    for (var v1=0; v1 < 16; v1++)
    {
	d1=digits[v1];
	ctrl=(d3==0 && d2==0 && d1<=1);
	for (var v0=0; v0 < 16; v0++)
	{
	    d0=digits[v0];
	    if (!ctrl)
	    {
		eval("theform.c"+d1+d0+".value=\"\\u"+d3+d2+d1+d0+"\"");
	    }
	    else
	    {
		eval("theform.c"+d1+d0+".value=\"[]\"");
	    }
	}
    }
    textfield.focus();
}

//---------------------
function selectOption(obj,reqdvalue,defaultindex)
{
    var found=0;
    for (var opt=0; opt<obj.options.length;opt++)
    {
	if(obj.options[opt].value==reqdvalue)
	{
	    found=1;
	    obj.selectedIndex=opt;
	    break;
	}
    }
    if (!found && defaultindex>=0)
    {
	obj.selectedIndex=defaultindex;
    }
}

//---------------------
function valueSelected(obj)
{
    return obj.options[obj.selectedIndex].value;
}

//---------------------
function initSectionFromNumeric()
{
    d3=valueSelected(theform.d3);
    d2=valueSelected(theform.d2);
    selectOption(theform.list,d3+d2,0);
    initSection();
}

//---------------------
function initSectionFromList() 
{
    var d3d2=valueSelected(theform.list);
    if (d3d2=='none'){return}
    d3=d3d2.charAt(0);
    d2=d3d2.charAt(1);
    selectOption(theform.d3,d3,-1);
    selectOption(theform.d2,d2,-1);
    initSection();
}

//---------------------
function clearInput() 
{
    // use this function because reset button resets too many things
    textfield.value="";
    htmlfield.value="";
    textfield.focus();
}

//---------------------
function setup()
{
    digits=new Array("0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F");
    lcdigits=new Array("0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f");
    theform = document.mainform
    textfield = theform.myinput;
    htmlfield = theform.html;
    initSectionFromNumeric();
}

//---------------------
function hex(n)
{
    return "%"+lcdigits[n>>4]+lcdigits[n&15];
}

//---------------------
function utf8(text) 
{
    // NB this function is valid up to uFFFF; would need extending
    // for >2 byte characters 
    var enc = "";
    for(var pos=0; pos<text.length; pos++)
    {
        var c=text.charCodeAt(pos);
	if (c<128)
	{
	    enc += escape(text.charAt(pos));
	}
	else if(c<2048)
	{
	    enc += hex((c>>6)|192);
	    enc += hex((c&63)|128);
	}
	else
	{
	    enc += hex((c>>12)|224);
	    enc += hex(((c>>6)&63)|128);
	    enc += hex((c&63)|128);
	}
    }
    return enc;
}

//---------------------
function html(text) 
{
    var enc = "";
    for(var pos=0; pos<text.length; pos++)
    {
        var c=text.charCodeAt(pos);
	if (c==60){
	    enc += "&lt;";
	}
	else if (c==62) {
	    enc += "&gt;";
	}
	else if (c==38) {
	    enc += "&amp;";
	}
	else if (c>=32 && c<128)
	{
	    enc += text.charAt(pos);
	}
	else
	{
	    enc += "&#"+c+";";
	}
    }
    return enc;
}

//---------------------
function copyToClipboard()
{
    var range;
    if (document.selection)
    {
	textfield.focus();
	textfield.select();
	range=textfield.createTextRange();
	range.execCommand("Copy");
    }
}

//---------------------
function htmlify()
{
    htmlfield.value=html(textfield.value);
    textfield.focus();
}

//---------------------
function google()
{
    window.open("http://www.google.com/search?q="+utf8(textfield.value)+"&ie=UTF-8&oe=UTF-8");
}

//---------------------
//-->
</script>
</head>
<body onload="setup()" bgcolor="white" text="black">
<center>
<H1>Free Online Unicode Character Map</H1>
<noscript>
This is an interactive map for selecting arbitrary Unicode characters for
copying and pasting.<br>
<strong><font size="+2" color="red">
You need to enable Javascript for this to work properly.
</font></strong>
<p>
</noscript>
Enter characters by using buttons below (and/or typing).<BR>
Then copy and paste your text as required,<BR>
or click "Google" to use your text as a Google&#8482; search term,<BR>
or click "Make HTML" to show (non-UTF8) HTML code for your text.
<P>
<form name="mainform" onSubmit="return(false);" action="">
<input type=text name="myinput" size=60>
<input type=button value="Google" onClick='google()'>
<SCRIPT language='Javascript' type="text/javascript">
<!--
// "Copy" button for use in IE
if (document.selection)
{
    document.write("<input type=button value=\"Copy\" onClick=copyToClipboard()>");
}
// -->
</SCRIPT>
<input type=button value="Clear" onClick='clearInput()'>

<P>
HTML: 
<input type=text name="html" size=40 readonly>
<input type=button value="Make HTML" onClick='htmlify()'>
EOF

print HTML "<P>Select section by hex code: ";
for $name('d3','d2') 
{
    print HTML "<select name=\"$name\" onChange='initSectionFromNumeric()'>";
    for $opt(@digits)
    {
	print HTML "<option value=\"$opt\">$opt</option>";
    }
    print HTML "</select>\n";
}
print HTML <<EOF;
or from list:
<select name="list" onChange='initSectionFromList()'>
<option value="none"></option>
EOF

@sections=sort{hex $a <=> hex $b}(keys %sections);
for $s (@sections)
{
    print HTML "<option value=\"$s\">$s: $sections{$s}</option>";
}
print HTML "</select>\n";

print HTML "<P><table width=\"90%\" border=\"0\" cellpadding=\"1\" cellspacing=\"0\">\n";
($col1,$col2)=("#aaaaff","#ddddff");

print HTML "<tr align=\"center\"><td> </td>";
for $v0 (0..15)
{
    $d0=$digits[$v0];
    $col=($v0%4==0)?$col1:$col2;
    print HTML "<td bgcolor=\"$col\"><font size=\"-1\">x</font><B>$d0</B></td>";
}
print HTML "</tr>\n";

for $v1 (0..15)
{
    $d1=$digits[$v1];
    
    $col=($v1%4==0)?$col1:$col2;
    print HTML "<tr align=\"center\"><td bgcolor=\"$col\"><B>$d1</B><font size=\"-1\">x</font></td>";
    for $v0 (0..15)
    {
	$d0=$digits[$v0];
	$col=($v1%4==0 or $v0%4==0)?$col1:$col2;
	print HTML "<td bgcolor=\"$col\"><input type=\"button\" name=\"c$d1$d0\" Style=\"background:#f0fff0\" onClick='insert(\"$d1$d0\")'></td>";
    }
    print HTML "</tr>\n";
}

print HTML <<EOF;
</table>
</form>
</center>
<hr>
<font size="-1">
<B>Notes:</B>
Unicode characters up to uFFFF supported here; see also 
<A HREF="http://www.unicode.org/Public/UNIDATA/Blocks.txt">full list</A>
of character ranges. Many codes may be undefined or unsupported 
by your browser. This page uses no server-side code, so a
downloaded copy will work equally.
Or download the <A HREF="$scriptname">perl script</A> which 
made this page. Javascript and perl code are \&#169; 2003 Alan Iwi and others, 
but may be copied under 
<A HREF="http://www.gnu.org/copyleft/gpl.html">GNU GPL</a>;
see comment lines for more info.
<hr>
<center>
<i>The URL of this page has changed slightly &mdash; the best form to use is
now <tt><a
href="http://www-atm.physics.ox.ac.uk/user/iwi/charmap.html">http://www-atm.physics.ox.ac.uk/user/iwi/charmap.html</a></tt></i></center>
<hr>
</font>
</body>
</html>
EOF

close HTML;

print "$filename generated.\n";

