($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time); $date = sprintf "%4d-%02d-%02d", 1900+$year,$mon+1,$mday; print < Official character encoding ("charset") names, in alphabetic order

Official character encoding ("charset") names

The following table is based on the official IANA registry as accessed $date. It was generated using a simple Perl script by Jukka Korpela. See footnotes for details.

EOF sub nextline { chomp ($_ = ); } print "", "", "\n"; while(<>) { if (m/^Name: (\S+)\s*(\[(.*)\])?/) { $name = $1; if(defined($ref{$name})&&!isalias{name}) {print "Multiply defined name: $name\n";} $ref = $3; my %aliases; my $source; while(&nextline) { if (m/^Alias:\s*(\S*)(.*)$/) { if($1 ne '' && $1 ne 'None') { $thisalias = $1; if(defined($ref{$thisalias})) {print "Multiply defined: $thisalias\n";} if($2 =~ m/preferred MIME name/) { $aliases{$name} = 1; $name = $thisalias; } else { $aliases{$thisalias} = 1; }}} elsif (m/^MIBEnum:\s*(.*)$/i) { $mib = $1; } elsif (m/^Source: (.*)$/) { $source = $1; } elsif ($_ eq '') { last; } else { $source .= $_; } } $ref{$name} = $ref; $mib{$name} = $mib; $source{$name} = $source; foreach $alias(%aliases) { $isalias{$alias} = 1; $ref{$alias} = $name; } } } sub byname { uc($a) cmp uc($b) } sub pname { $n=$_[0]; if(length($n)>=30) { $n =~ s/_/_/g; } print $n;} foreach $enc( sort byname keys(%ref) ) { print "\n"; } print "
Name of encodingMIB Reference(s)Source
"; pname($enc); print "",($mib{$enc} || ' '), " "; $ref = $ref{$enc}; if($isalias{$enc}) { print "Alias for "; pname($ref); print ""; } else { $ref =~ s?RFC\s*(\d+)?RFC$1?g; print $ref || ' '; } $src = $source{$enc}; $src =~ s?http://([A-Za-z0-9/\.\-_]*)?http://$1?g; $src =~ s!RFC[\s*-]?(\d+)!RFC$1!g; $src =~ s?(\.\./assignments/character-set-info/)(.*)\)?$1$2\)?; $src =~ s?(\.\./character-set-info/windows-)(\d+)\)?$1$2\)?; $src =~ s?(ECMA registry)?$1?i; print " ",$src||' ',"
"; print <Notes:
  • The ordering of names in the table is lexicographic, so e.g. ISO-8859-13 appears before ISO-8859-2.
  • Some of the names in the first column have been split over two or more lines after an underline (_) character, to avoid making the width requirements for the table excessive.
  • See the registry itself for information about the meanings of MIB enum values and about aliases beginning with "cs".
  • The phrase ECMA registry assumably refers to a document titled ISO International Register of Coded Character Sets To Be Used With Escape Sequences (available in Microsoft Word 6 format), also called ISO 2375 (ECMA) registry of coded character sets. It is based on the ISO 2375 standard which defines the registration procedure. ECMA is the registration authority and has set up specific practices on it.

EOF