[syslinux:fsc] codepage: add a lower-case table

syslinux-bot for H. Peter Anvin hpa at zytor.com
Thu Feb 4 18:09:02 PST 2010


Commit-ID:  117a72aa41edea84a734db09c89d09ca2efb5bf7
Gitweb:     http://syslinux.zytor.com/commit/117a72aa41edea84a734db09c89d09ca2efb5bf7
Author:     H. Peter Anvin <hpa at zytor.com>
AuthorDate: Thu, 4 Feb 2010 17:56:03 -0800
Committer:  H. Peter Anvin <hpa at zytor.com>
CommitDate: Thu, 4 Feb 2010 17:56:03 -0800

codepage: add a lower-case table

Add a lower-case table; necessary for readdir on FAT in the presence
of WinNT case flags.

Signed-off-by: H. Peter Anvin <hpa at zytor.com>


---
 codepage/cptable.pl     |   40 +++++++++++++++++++++++++++++++++++-----
 core/include/codepage.h |    3 ++-
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/codepage/cptable.pl b/codepage/cptable.pl
index 05cfc3e..e29cf00 100755
--- a/codepage/cptable.pl
+++ b/codepage/cptable.pl
@@ -82,7 +82,7 @@ open(CPOUT, '>', $cpout)
 # Magic number, in anticipation of being able to load these
 # files dynamically...
 #
-print CPOUT pack("VV", 0x8fad232b, 0x9c295319);
+print CPOUT pack("VV", 0x58a8b3d4, 0x51d21eb1);
 
 # Header fields available for future use...
 print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
@@ -97,6 +97,7 @@ print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
 # ... where @ytab is console codepage -> Unicode and
 # %tabx is Unicode -> filesystem codepage.
 #
+ at uctab = (undef) x 256;
 for ($i = 0; $i < 256; $i++) {
     $uuc = $ucase{$ytab[$i]};	# Unicode upper case
     if (defined($tabx{$uuc})) {
@@ -106,15 +107,44 @@ for ($i = 0; $i < 256; $i++) {
 	# Upper case equivalent stripped of accents
 	$u = $tabx{${$decomp{$uuc}}[0]};
     } else {
-	# No equivalent at all found.  Set this to zero, which should
-	# prevent shortname matching altogether (still making longname
-	# matching possible, of course.)
-	$u = 0;
+	# No equivalent at all found.  Assume it is a lower-case-only
+	# character, like greek alpha in CP437.
+	$u = $i;
     }
+    $uctab[$i] = $u;
     print CPOUT pack("C", $u);
 }
 
 #
+# Self (shortname) lowercase table.
+# This depends both on the console codepage and the filesystem codepage;
+# the logical transcoding operation is:
+#
+# $taby{$lcase{$xtab[$i]}}
+#
+# ... where @ytab is console codepage -> Unicode and
+# %tabx is Unicode -> filesystem codepage.
+#
+ at lctab = (undef) x 256;
+for ($i = 0; $i < 256; $i++) {
+    $llc = $lcase{$xtab[$i]};	# Unicode lower case
+    if (defined($l = $taby{$llc}) && $uctab[$l] == $i) {
+	# Straight-forward conversion
+    } elsif (defined($l = $tabx{${$decomp{$llc}}[0]}) && $uctab[$l] == $i) {
+	# Lower case equivalent stripped of accents
+    } else {
+	# No equivalent at all found.  Find *anything* that matches the
+	# bijection criterion...
+	for ($l = 0; $l < 256; $l++) {
+	    last if ($uctab[$l] == $i);
+	}
+	$l = $i if ($l == 256);	# If nothing, we're screwed anyway...
+    }
+    $lctab[$i] = $l;
+    print CPOUT pack("C", $l);
+}
+
+#
 # Unicode (longname) matching table.
 # This only depends on the console codepage.
 #
diff --git a/core/include/codepage.h b/core/include/codepage.h
index e2f78aa..a24d90f 100644
--- a/core/include/codepage.h
+++ b/core/include/codepage.h
@@ -6,13 +6,14 @@
 
 #include <stdint.h>
 
-#define CODEPAGE_MAGIC	UINT64_C(0x9c2953198fad232b)
+#define CODEPAGE_MAGIC	UINT64_C(0x51d21eb158a8b3d4)
 
 struct codepage {
     uint64_t	magic;
     uint32_t	reserved[6];
 
     uint8_t	upper[256];	/* Codepage upper case table */
+    uint8_t	lower[256];	/* Codepage lower case table */
 
     /*
      * The primary Unicode match is the same case, i.e. A -> A,



More information about the Syslinux-commits mailing list