[PATCH] msdos fs NLS support

From: Andrzej Krzysztofowicz (ankry@green.mif.pg.gda.pl)
Date: Sat May 27 2000 - 19:12:15 EDT

  • Next message: Andries Brouwer: "Re: 2.4.0 ide driver and old disks"

    Hi,
       The following patch enables NLS support for msdos filesystem for
    2.3.99pre/2.4.0test + Urban's nls16-2.3.99-pre9-2.patch

    ( which can be found at
      http://www.hojdpunkten.ac.se/054/nls16-2.3.99-pre9-2.patch.gz )

    This patch is NOT intended to be included in the kernel tree in the present
    form as I'm not sure whether it breaks or not non-MS fat implementations
    (especially atari GEMDOS).

    I have tested id only with MSDOS cp852 -> iso8859-2 conversion, and it seems
    to work fine.

    Please test it also. Any comments are appreciated.

    Regards
       Andrzej
    *************************************************************************
    diff -u --recursive linux-ac4+nls/Documentation/Configure.help linux/Documentation/Configure.help
    --- linux-ac4+nls/Documentation/Configure.help Sun May 28 00:21:02 2000
    +++ linux/Documentation/Configure.help Sun May 28 00:21:41 2000
    @@ -10977,7 +10977,8 @@
       values are cp437, cp737, cp775, cp850, cp852, cp855, cp857, cp860,
       cp861, cp862, cp863, cp864, cp865, cp866, cp869, cp874, cp932, cp936,
       cp949, cp950, iso8859-1, iso8859-2, iso8859-3, iso8859-4, iso8859-5,
    - iso8859-6, iso8859-7, iso8859-8, iso8859-9, iso8859-15, koi8-r.
    + iso8859-6, iso8859-7, iso8859-8, iso8859-9, iso8859-14, iso8859-15,
    + koi8-r.
       If you specify a wrong value, it will use the build-in NLS; compatible
       with iso8859-1.
     
    diff -u --recursive linux-ac4+nls/fs/fat/dir.c linux/fs/fat/dir.c
    --- linux-ac4+nls/fs/fat/dir.c Sun May 28 00:21:02 2000
    +++ linux/fs/fat/dir.c Sun May 28 00:21:41 2000
    @@ -451,12 +451,10 @@
                             inum = iunique(sb, MSDOS_ROOT_INO);
             }
     
    - if (isvfat) {
    - memset(&bufuname[i], 0, sizeof(struct nls_unicode));
    - i = utf8 ? utf8_wcstombs(bufname, (__u16 *) &bufuname, 56)
    - : uni16_to_x8(bufname, (unsigned char *) &bufuname,
    - uni_xlate, nls_io);
    - }
    + memset(&bufuname[i], 0, sizeof(struct nls_unicode));
    + i = utf8 ? utf8_wcstombs(bufname, (__u16 *) &bufuname, 56)
    + : uni16_to_x8(bufname, (unsigned char *) &bufuname,
    + uni_xlate, nls_io);
     
             if (!long_slots||shortnames) {
                     if (both)
    diff -u --recursive linux-ac4+nls/fs/fat/inode.c linux/fs/fat/inode.c
    --- linux-ac4+nls/fs/fat/inode.c Sun May 28 00:21:02 2000
    +++ linux/fs/fat/inode.c Sun May 28 00:21:41 2000
    @@ -637,7 +637,7 @@
             }
     
             sbi->nls_io = NULL;
    - if (sbi->options.isvfat && !opts.utf8) {
    + if (!opts.utf8) {
                     p = opts.iocharset ? opts.iocharset : CONFIG_NLS_DEFAULT;
                     sbi->nls_io = load_nls(p);
                     if (! sbi->nls_io)
    diff -u --recursive linux-ac4+nls/fs/msdos/namei.c linux/fs/msdos/namei.c
    --- linux-ac4+nls/fs/msdos/namei.c Sun May 28 00:21:02 2000
    +++ linux/fs/msdos/namei.c Sun May 28 00:21:41 2000
    @@ -12,6 +12,7 @@
     
     #include <linux/sched.h>
     #include <linux/msdos_fs.h>
    +#include <linux/nls.h>
     #include <linux/errno.h>
     #include <linux/string.h>
     
    @@ -32,7 +33,7 @@
     
     
     /* Characters that are undesirable in an MS-DOS file name */
    -
    +
     static char bad_chars[] = "*?<>|\"";
     static char bad_if_strict_pc[] = "+=,; ";
     static char bad_if_strict_atari[] = " "; /* GEMDOS is less restrictive */
    @@ -44,60 +45,141 @@
             fat_put_super(sb);
     }
     
    +static inline
    +unsigned char msdos_toupper(struct nls_table *t, unsigned char c)
    +{
    + unsigned char nc = t->charset2upper[c];
    +
    + return nc ? nc : c;
    +}
    +
    +static inline
    +unsigned char msdos_tolower(struct nls_table *t, unsigned char c)
    +{
    + unsigned char nc = t->charset2lower[c];
    +
    + return nc ? nc : c;
    +}
    +
    +static inline
    +void msdos_char2uni(struct nls_table *t, unsigned char c, struct nls_unicode *uc)
    +{
    + int len;
    +
    + t->char2uni(&c, &len, &uc->uni1, &uc->uni2);
    +}
    +
    +static inline
    +unsigned char msdos_uni2char(struct nls_table *t, struct nls_unicode uc)
    +{
    + int len;
    + unsigned char c;
    + int ret;
    +
    + ret = t->uni2char(uc.uni2, uc.uni1, &c, 1, &len);
    + if (len != 1 || ret == -1)
    + return 0;
    + return c;
    +}
    +
    +static inline
    +unsigned char msdos_uni2upper(struct nls_table *t, struct nls_unicode uc)
    +{
    + int len;
    + unsigned char c;
    + int ret;
    +
    + ret = t->uni2char(uc.uni2, uc.uni1, &c, 1, &len);
    + if (len != 1 || ret == -1)
    + return 0;
    + return msdos_toupper(t, c);
    +}
    +
    +static inline
    +int msdos_isupper(struct nls_table *t, unsigned char c)
    +{
    + unsigned char l, u;
    +
    + l = msdos_tolower(t, c);
    + u = msdos_toupper(t, c);
    + return (c == u && l != u);
    +}
    +
     /***** Formats an MS-DOS file name. Rejects invalid names. */
    -static int msdos_format_name(const char *name,int len,
    - char *res,struct fat_mount_options *opts)
    - /* conv is relaxed/normal/strict, name is proposed name,
    +static int msdos_format_name(struct nls_table *nls_disk, struct nls_table *nls_io,
    + const char *name, int len, char *res, struct fat_mount_options *opts)
    + /* nls is NLS table used on disk, name is proposed name,
              * len is the length of the proposed name, res is the result name,
    - * dotsOK is if hidden files get dots.
    + * opts->dotsOK is if hidden files get dots,
    + * opts->conversion is normal/strict/relaxed.
              */
     {
             char *walk;
             const char **reserved;
             unsigned char c;
             int space;
    + struct nls_unicode uni;
     
    - if (name[0] == '.') { /* dotfile because . and .. already done */
    + msdos_char2uni(nls_io, *name, &uni);
    + c = msdos_uni2char(nls_disk, uni);
    + if (!c)
    + return -EINVAL;
    + if (c == '.') { /* dotfile because . and .. already done */
                     if (opts->dotsOK) {
                             /* Get rid of dot - test for it elsewhere */
                             name++; len--;
                     }
    - else if (!opts->atari) return -EINVAL;
    + else if (!opts->atari)
    + return -EINVAL;
             }
    - /* disallow names that _really_ start with a dot for MS-DOS, GEMDOS does
    - * not care */
    + /* disallow names that _really_ start with a dot for MS-DOS,
    + * GEMDOS does not care
    + */
             space = !opts->atari;
             c = 0;
             for (walk = res; len && walk-res < 8; walk++) {
    - c = *name++;
    + msdos_char2uni(nls_io, *name, &uni);
    + c = msdos_uni2upper(nls_disk, uni);
                     len--;
                     if (opts->conversion != 'r' && strchr(bad_chars,c))
                             return -EINVAL;
                     if (opts->conversion == 's' && strchr(bad_if_strict(opts),c))
                             return -EINVAL;
    - if (c >= 'A' && c <= 'Z' && opts->conversion == 's')
    + if (opts->conversion == 's' && msdos_isupper(nls_io, *name))
    + return -EINVAL;
    + if (c < ' ' || c == ':' || c == '\\')
                             return -EINVAL;
    - if (c < ' ' || c == ':' || c == '\\') return -EINVAL;
     /* 0xE5 is legal as a first character, but we must substitute 0x05 */
     /* because 0xE5 marks deleted files. Yes, DOS really does this. */
     /* It seems that Microsoft hacked DOS to support non-US characters */
     /* after the 0xE5 character was already in use to mark deleted files. */
    - if((res==walk) && (c==0xE5)) c=0x05;
    - if (c == '.') break;
    + name++;
    + if ((res==walk) && (c==0xE5))
    + c=0x05;
    + if (c == '.')
    + break;
                     space = (c == ' ');
    - *walk = (c >= 'a' && c <= 'z') ? c-32 : c;
    + *walk = c;
             }
    - if (space) return -EINVAL;
    + if (space)
    + return -EINVAL;
             if (opts->conversion == 's' && len && c != '.') {
    - c = *name++;
    + msdos_char2uni(nls_io, *name++, &uni);
    + c = msdos_uni2upper(nls_disk, uni);
                     len--;
    - if (c != '.') return -EINVAL;
    + if (c != '.')
    + return -EINVAL;
    + }
    + while (c != '.' && len--) {
    + msdos_char2uni(nls_io, *name++, &uni);
    + c = msdos_uni2upper(nls_disk, uni);
             }
    - while (c != '.' && len--) c = *name++;
             if (c == '.') {
    - while (walk-res < 8) *walk++ = ' ';
    + while (walk-res < 8)
    + *walk++ = ' ';
                     while (len > 0 && walk-res < MSDOS_NAME) {
    - c = *name++;
    + msdos_char2uni(nls_io, *name, &uni);
    + c = msdos_uni2upper(nls_disk, uni);
                             len--;
                             if (opts->conversion != 'r' && strchr(bad_chars,c))
                                     return -EINVAL;
    @@ -111,15 +193,20 @@
                                             return -EINVAL;
                                     break;
                             }
    - if (c >= 'A' && c <= 'Z' && opts->conversion == 's')
    + if (opts->conversion == 's' &&
    + msdos_isupper(nls_io, *name))
                                     return -EINVAL;
                             space = c == ' ';
    - *walk++ = c >= 'a' && c <= 'z' ? c-32 : c;
    + *walk++ = c;
    + name++;
                     }
    - if (space) return -EINVAL;
    - if (opts->conversion == 's' && len) return -EINVAL;
    + if (space)
    + return -EINVAL;
    + if (opts->conversion == 's' && len)
    + return -EINVAL;
             }
    - while (walk-res < MSDOS_NAME) *walk++ = ' ';
    + while (walk-res < MSDOS_NAME)
    + *walk++ = ' ';
             if (!opts->atari)
                     /* GEMDOS is less stupid and has no reserved names */
                     for (reserved = reserved_names; *reserved; reserved++)
    @@ -134,9 +221,13 @@
             int res;
             char dotsOK;
             char msdos_name[MSDOS_NAME];
    + struct nls_table *nls_io = MSDOS_SB(dir->i_sb)->nls_io,
    + *nls_disk = MSDOS_SB(dir->i_sb)->nls_disk;
     
             dotsOK = MSDOS_SB(dir->i_sb)->options.dotsOK;
    - res = msdos_format_name(name,len, msdos_name,&MSDOS_SB(dir->i_sb)->options);
    +
    + res = msdos_format_name(nls_disk, nls_io, name, len, msdos_name,
    + &MSDOS_SB(dir->i_sb)->options);
             if (res < 0)
                     return -ENOENT;
             res = fat_scan(dir,msdos_name,bh,de,ino);
    @@ -164,8 +255,11 @@
             struct fat_mount_options *options = & (MSDOS_SB(dentry->d_sb)->options);
             int error;
             char msdos_name[MSDOS_NAME];
    + struct nls_table *nls_io = MSDOS_SB(dentry->d_sb)->nls_io,
    + *nls_disk = MSDOS_SB(dentry->d_sb)->nls_disk;
             
    - error = msdos_format_name(qstr->name, qstr->len, msdos_name, options);
    + error = msdos_format_name(nls_disk, nls_io, qstr->name, qstr->len,
    + msdos_name, options);
             if (!error)
                     qstr->hash = full_name_hash(msdos_name, MSDOS_NAME);
             return 0;
    @@ -180,11 +274,15 @@
             struct fat_mount_options *options = & (MSDOS_SB(dentry->d_sb)->options);
             int error;
             char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME];
    + struct nls_table *nls_io = MSDOS_SB(dentry->d_sb)->nls_io,
    + *nls_disk = MSDOS_SB(dentry->d_sb)->nls_disk;
     
    - error = msdos_format_name(a->name, a->len, a_msdos_name, options);
    + error = msdos_format_name(nls_disk, nls_io, a->name, a->len,
    + a_msdos_name, options);
             if (error)
                     goto old_compare;
    - error = msdos_format_name(b->name, b->len, b_msdos_name, options);
    + error = msdos_format_name(nls_disk, nls_io, b->name, b->len,
    + b_msdos_name, options);
             if (error)
                     goto old_compare;
             error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME);
    @@ -282,9 +380,11 @@
             struct inode *inode;
             int ino,res,is_hid;
             char msdos_name[MSDOS_NAME];
    + struct nls_table *nls_io = MSDOS_SB(dentry->d_sb)->nls_io,
    + *nls_disk = MSDOS_SB(dentry->d_sb)->nls_disk;
     
    - res = msdos_format_name(dentry->d_name.name,dentry->d_name.len,
    - msdos_name, &MSDOS_SB(sb)->options);
    + res = msdos_format_name(nls_disk, nls_io, dentry->d_name.name,
    + dentry->d_name.len, msdos_name, &MSDOS_SB(sb)->options);
             if (res < 0)
                     return res;
             is_hid = (dentry->d_name.name[0]=='.') && (msdos_name[0]!='.');
    @@ -354,9 +454,11 @@
             int res,is_hid;
             char msdos_name[MSDOS_NAME];
             int ino;
    + struct nls_table *nls_io = MSDOS_SB(dentry->d_sb)->nls_io,
    + *nls_disk = MSDOS_SB(dentry->d_sb)->nls_disk;
     
    - res = msdos_format_name(dentry->d_name.name,dentry->d_name.len,
    - msdos_name, &MSDOS_SB(sb)->options);
    + res = msdos_format_name(nls_disk, nls_io, dentry->d_name.name,
    + dentry->d_name.len, msdos_name, &MSDOS_SB(sb)->options);
             if (res < 0)
                     return res;
             is_hid = (dentry->d_name.name[0]=='.') && (msdos_name[0]!='.');
    @@ -546,13 +648,15 @@
             int old_ino, error;
             int is_hid,old_hid; /* if new file and old file are hidden */
             char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME];
    + struct nls_table *nls_io = MSDOS_SB(old_dentry->d_sb)->nls_io,
    + *nls_disk = MSDOS_SB(old_dentry->d_sb)->nls_disk;
     
    - error = msdos_format_name(old_dentry->d_name.name,
    + error = msdos_format_name(nls_disk, nls_io, old_dentry->d_name.name,
                                       old_dentry->d_name.len,old_msdos_name,
                                       &MSDOS_SB(old_dir->i_sb)->options);
             if (error < 0)
                     goto rename_done;
    - error = msdos_format_name(new_dentry->d_name.name,
    + error = msdos_format_name(nls_disk, nls_io, new_dentry->d_name.name,
                                       new_dentry->d_name.len,new_msdos_name,
                                       &MSDOS_SB(new_dir->i_sb)->options);
             if (error < 0)
    *************************************************************************

    -- 
    =======================================================================
      Andrzej M. Krzysztofowicz               ankry@mif.pg.gda.pl
      tel.  (0-58) 347 14 61
    Wydz.Fizyki Technicznej i Matematyki Stosowanej Politechniki Gdanskiej
    

    - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/



    This archive was generated by hypermail 2b29 : Sat May 27 2000 - 19:16:19 EDT