page->mapping == NULL recreated without vmware...

From: Petr Vandrovec (vandrove@vc.cvut.cz)
Date: Fri Oct 06 2000 - 13:25:58 EDT

  • Next message: Anil kumar: "disk partition & RAID"

    Hi,
      month ago I informed here that VMware causes oops on exit.
    After some time, and tons of tweaking I was able to recreate
    it without vmmon... 2.4.0-test9, no special patches, no
    vmware modules loaded...

      Machine is dual PIII/450, 256MB RAM, 18GB IDE disk.

      Here it is... Adjust MSIZE so that it is almost all of your
    memory, to cause swapping... (0x0E000000 is for mine 256MB).

    /*
    Makefile:

    CFLAGS = -W -Wall -O2 -D_GNU_SOURCE -D_BSD_SOURCE

    oopsdemo: oopsdemo.c
    */

    #include <sys/mman.h>
    #include <sys/stat.h>
    #include <sys/types.h>
    #include <errno.h>
    #include <stdio.h>
    #include <unistd.h>
    #include <fcntl.h>
    #include <sys/shm.h>
    #include <sys/ioctl.h>
    #include <sys/raw.h>

    unsigned char zero;

    #define MSIZE 0x0E000000

    int mfd;

    void x4768(void) {
            int fd;
            pid_t pid;
            int x4778_1[2];
            int x4778_2[2];
            int x4779_1[2];
            int x4779_2[2];
            int from4778;
            int to4778;
            int from4779;
            int to4779;
            char* mma[65536];
            unsigned int mml[65536];
            unsigned int mmi = 0;
            unsigned int ln = 0;
            unsigned int x;
            
    #define MAL2(a,l) ftruncate(fd, ln+l); mml[mmi] = l; mma[mmi++] = mmap(a, l, PROT_READ|PROT_WRITE, MAP_SHARED, fd, ln); ln += l; mma[mmi-1][0] = 99;
    #define MAL(l) MAL2(NULL,l)

            fd = open("ram0", O_RDWR | O_CREAT, 0600);
            unlink("ram0");
            MAL(MSIZE);
            pipe(x4778_1);
            pipe(x4778_2);
            pid = fork();
            if (!pid) {
                    int from4768 = x4778_1[0];
                    int to4768 = x4778_2[1];
                    close(x4778_1[1]);
                    close(x4778_2[0]);
                    close(fd);
                    read(from4768, &zero, 1);
    #if 1
                    for (x = 0; x < mml[mmi - 1]; x += 4096)
                            mma[mmi-1][x] = 98;
    #endif
                    write(to4768, &zero, 1);
                    read(from4768, &zero, 1);
                    read(mfd, mma[mmi - 1], mml[mmi - 1]);
                    printf("1a: %02X\n", mma[mmi-1][0]);
                    fflush(stdout);
                    mma[mmi-1][0] = 99;
                    printf("1b: %02X\n", mma[mmi-1][0]);
                    /* well, now we have it... */
    #if 1
                    { char* p; int fda;
                      fda = open("/dev/zero", O_RDONLY);
                      p = mmap(NULL, MSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
                      for (x = 0; x < MSIZE; x += 4096)
                        p[x] = 97;
                      close(fda);
                    }
    #endif
                    
                    exit(0);
                    
            } else if (pid < 0) {
                    perror("fork failed");
                    exit(222);
            }
            to4778 = x4778_1[1];
            from4778 = x4778_2[0];
            close(x4778_1[0]);
            close(x4778_2[1]);

    #if 1
            pipe(x4779_1);
            pipe(x4779_2);
            pid = fork();
            if (!pid) {
                    int from4768 = x4779_1[0];
                    int to4768 = x4779_2[1];
                    close(x4779_1[1]);
                    close(x4779_2[0]);
                    close(to4778);
                    close(from4778);
                    close(fd);
                    read(from4768, &zero, 1);
    #if 0
                    for (x = 0; x < mml[mmi - 1]; x += 4096)
                            mma[mmi-1][x] = 96;
    #endif
                    write(to4768, &zero, 1);
                    read(from4768, &zero, 1);
                    read(mfd, mma[mmi - 1], mml[mmi - 1]);
                    printf("2a: %02X\n", mma[mmi-1][0]);
                    fflush(stdout);
                    mma[mmi-1][0] = 95;
                    printf("2b: %02X\n", mma[mmi-1][0]);
                    exit(0);
                    
            } else if (pid < 0) {
                    perror("fork failed");
                    exit(222);
            }
            to4779 = x4779_1[1];
            from4779 = x4779_2[0];
            close(x4779_1[0]);
            close(x4779_2[1]);
    #else
            /* so that read/write to this fails... */
            from4779 = -1;
            to4779 = -1;
    #endif
            {
                    write(to4778, &zero, 1);
                    write(to4779, &zero, 1);
                    read(from4778, &zero, 1);
                    read(from4779, &zero, 1);

                    write(to4778, &zero, 1);
                    write(to4779, &zero, 1);
                    sleep(5);
                    ftruncate(fd, 0);
            }
            close(fd);
            while (mmi--) {
                    munmap(mma[mmi], mml[mmi]);
            }
            exit(0);
    }

    int main(int argc, char* argv[]) {
            struct stat stb;
            int q;
            char rawsubdev[100];
            int min;

            if (argc < 2) {
                    fprintf(stderr, "testit /dev/hda\n");
                    return 97;
            }
            q = stat(argv[1], &stb);
            if (q != 0) {
                    fprintf(stderr, "%s: Cannot stat: %m\n", argv[1]);
                    return 98;
            }
            q = open("/dev/raw", O_RDWR);
            if (q == -1) {
                    if (errno == ENOENT) {
                            q = mknod("/dev/raw", S_IFCHR | 0600, makedev(RAW_MAJOR, 0));
                            if (q != -1) {
                                    q = open("/dev/raw", O_RDWR);
                            }
                    }
                    if (q == -1) {
                            perror("Opening /dev/raw");
                            exit(99);
                    }
            }
            for (min = 1; min < 256; min++) {
                    struct raw_config_request rcr;
                    int err;
                    
                    rcr.raw_minor = min;
                    rcr.block_major = major(stb.st_rdev);
                    rcr.block_minor = minor(stb.st_rdev);
                    err = ioctl(q, RAW_SETBIND, &rcr);
                    if (!err)
                            break;
                    if (errno != EBUSY) {
                            fprintf(stderr, "Unexpected error from mapping raw %d: ioctl: %m\n", min);
                    }
            }
            if (min >= 256) {
                    close(q);
                    fprintf(stderr, "No usable raw device found...\n");
                    return 97;
            }
            sprintf(rawsubdev, "/dev/raw%u", min);
            mfd = open(rawsubdev, O_RDONLY);
            if (mfd == -1) {
                    if (errno == ENOENT) {
                            mfd = mknod(rawsubdev, S_IFCHR | 0600, makedev(RAW_MAJOR, min));
                            if (mfd != -1) {
                                    mfd = open(rawsubdev, O_RDWR);
                            }
                    }
                    if (mfd == -1) {
                            fprintf(stderr, "Opening %s: %m\n", rawsubdev);
                            exit(99);
                    }
            }

            printf("Go\n");
            
            mlockall(MCL_CURRENT); /* so we do not swap out code, so
                                      signaling through pipes works as
                                      expected... */
            
            x4768();
            close(mfd);
            close(q);
            return 0;
    }

    If I started it 'strace -f -o logx -r ./oopsdemo /dev/hda', I was awarded with

    Go
    Process 30900 attached
    Process 30901 attached
    2a: 33
    1a: 33
    1b: 63
    Process 30900 detached
    Process 30901 detached

    (well, it looks like that my HDD is still bootable, so write probably went
    into wastebasket... erm, I opened /dev/raw1 with O_RDONLY. How is it possible
    that I could even create PROT_READ|PROT_WRITE, MAP_SHARED mmapping ? )

    Looks strange, isn't it? Both processes could read memory, and first process
    could even write to its memory - even after read() failed due to vmtruncate...
    and as first process caused to swap everything from memory (touching /dev/zero
    mmaped pages), logfile filled up with

    page->mapping == NULL

    (caused by filemap_swap_out... without patch (see url below) you'll get nice
    oops)

    Other failure mode is -EIO or page->mapping == NULL in filemap_write_page, when
    invoked through do_exit -> ... -> filemap_sync -> ... -> filemap_write_page.

    For other details, look at http://platan.vc.cvut.cz/listit. From my findings it
    looks to me like that it have something to do with holding page reference
    while vmtruncate runs, but I know nothing... You can try playing with #if [01]
    to get different failure modes...

    I know, only root can do operations with /dev/raw, but... I'm not sure that you
    have to play with raw devices to cause this problem... There are other places
    which increment page->count temporarily... Or what's wrong...
                                                    Best regards,
                                                            Petr Vandrovec
                                                            vandrove@vc.cvut.cz

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    Please read the FAQ at http://www.tux.org/lkml/



    This archive was generated by hypermail 2b29 : Fri Oct 06 2000 - 13:29:58 EDT