上篇文章Android脱壳(基于第一代加壳)介绍了如何使用ida pro在运行Dalvik虚拟机的Android4.4手机上脱壳。其中有一个很重要的步骤就是给libdvm.so的_Z21dvmDexFileOpenPartialPKviPP6DvmDex函数下断点,用于确定dex在内存空间中的起始位置和大小,那这篇文章就挖一下Dalvik虚拟机是如何加载dex文件的。

Android源码

##开始

Dalvik虚拟机介绍

  • Dalvik虚拟机在加载dex之前,会判断本地是否存在odex文件(Dex\ELF文件格式, ODex与Dex比较
  • 如果存在,则直接加载odex
  • 如果不存在,使用dexopt优化dex文件为odex(odex的后缀类型仍然为dex)
  • 除此之外,dexopt还担任dex的校验功能。它会判断DexHeader中的时间戳和crc校验值,从而判断该dex是否合法(弱校验,可欺骗绕过)

故从上我们可以知道,在APP运行时dexopt占有很重要的一环,因此,我们只需在dexopt中找到加载dex的那段代码即可跟踪Dalvik虚拟机加载dex的流程。

dexopt的主函数位于OptMain.cpp

/*
 * Main entry point.  Decide where to go.
 */
int main(int argc, char* const argv[])
{
    set_process_name("dexopt");

    setvbuf(stdout, NULL, _IONBF, 0);

    if (argc > 1) {
        if (strcmp(argv[1], "--zip") == 0)
            return fromZip(argc, argv);
        else if (strcmp(argv[1], "--dex") == 0)
            return fromDex(argc, argv);
        else if (strcmp(argv[1], "--preopt") == 0)
            return preopt(argc, argv);
    }

    fprintf(stderr,
        "Usage:\n\n"
        "Short version: Don't use this.\n\n"
        "Slightly longer version: This system-internal tool is used to\n"
        "produce optimized dex files. See the source code for details.\n");

    return 1;
}

跟踪14行进入fromDex
这里面先进行的是一些参数和环境的判断,配置dexOptMode、verifyMode,初始化vm相关环境,然后进入dvmContinueOptimization函数继续接下来的优化过程。

static int fromDex(int argc, char* const argv[])
{
    int result = -1;
    bool vmStarted = false;
    char* bootClassPath = NULL;
    int fd, flags, vmBuildVersion;
    long offset, length;
    const char* debugFileName;
    u4 crc, modWhen;
    char* endp;
    bool onlyOptVerifiedDex = false;
    DexClassVerifyMode verifyMode;
    DexOptimizerMode dexOptMode;

if (argc < 10) {
    /* don't have all mandatory args */
    ALOGE("Not enough arguments for --dex (found %d)", argc);
    goto bail;
}

/* skip "--dex" */
argc--;
argv++;

/*
 * Extract the args.
 */
GET_ARG(vmBuildVersion, strtol, "bad vm build");
if (vmBuildVersion != DALVIK_VM_BUILD) {
    ALOGE("DexOpt: build rev does not match VM: %d vs %d",
        vmBuildVersion, DALVIK_VM_BUILD);
    goto bail;
}
GET_ARG(fd, strtol, "bad fd");
GET_ARG(offset, strtol, "bad offset");
GET_ARG(length, strtol, "bad length");
debugFileName = *++argv;
--argc;
GET_ARG(modWhen, strtoul, "bad modWhen");
GET_ARG(crc, strtoul, "bad crc");
GET_ARG(flags, strtol, "bad flags");

ALOGV("Args: fd=%d off=%ld len=%ld name='%s' mod=%#x crc=%#x flg=%d (argc=%d)",
    fd, offset, length, debugFileName, modWhen, crc, flags, argc);
assert(argc > 0);

if (--argc == 0) {
    bootClassPath = strdup("");
} else {
    int i, bcpLen;
    char* const* argp;
    char* cp;

    bcpLen = 0;
    for (i = 0, argp = argv; i < argc; i++) {
        ++argp;
        ALOGV("DEP: '%s'", *argp);
        bcpLen += strlen(*argp) + 1;
    }

    cp = bootClassPath = (char*) malloc(bcpLen +1);
    for (i = 0, argp = argv; i < argc; i++) {
        int strLen;

        ++argp;
        strLen = strlen(*argp);
        if (i != 0)
            *cp++ = ':';
        memcpy(cp, *argp, strLen);
        cp += strLen;
    }
    *cp = '\0';

    assert((int) strlen(bootClassPath) == bcpLen-1);
}
ALOGV("  bootclasspath is '%s'", bootClassPath);

/* start the VM partway */

/* ugh -- upgrade these to a bit field if they get any more complex */
if ((flags & DEXOPT_VERIFY_ENABLED) != 0) {
    if ((flags & DEXOPT_VERIFY_ALL) != 0)
        verifyMode = VERIFY_MODE_ALL;
    else
        verifyMode = VERIFY_MODE_REMOTE;
} else {
    verifyMode = VERIFY_MODE_NONE;
}
if ((flags & DEXOPT_OPT_ENABLED) != 0) {
    if ((flags & DEXOPT_OPT_ALL) != 0)
        dexOptMode = OPTIMIZE_MODE_ALL;
    else
        dexOptMode = OPTIMIZE_MODE_VERIFIED;
} else {
    dexOptMode = OPTIMIZE_MODE_NONE;
}

if (dvmPrepForDexOpt(bootClassPath, dexOptMode, verifyMode, flags) != 0) {
    ALOGE("VM init failed");
    goto bail;
}

vmStarted = true;

/* do the optimization */
if (!dvmContinueOptimization(fd, offset, length, debugFileName,
        modWhen, crc, (flags & DEXOPT_IS_BOOTSTRAP) != 0))
{
    ALOGE("Optimization failed");
    goto bail;
}

result = 0;

bail:
    /*
     * In theory we should gracefully shut the VM down at this point.  In
     * practice that only matters if we're checking for memory leaks with
     * valgrind -- simply exiting is much faster.
     *
     * As it turns out, the DEX optimizer plays a little fast and loose
     * with class loading.  We load all of the classes from a partially-
     * formed DEX file, which is unmapped when we're done.  If we want to
     * do clean shutdown here, perhaps for testing with valgrind, we need
     * to skip the munmap call there.
     */
#if 0
    if (vmStarted) {
        ALOGI("DexOpt shutting down, result=%d", result);
        dvmShutdown();
    }
#endif

    free(bootClassPath);
    ALOGV("DexOpt command complete (result=%d)", result);
    return result;
}

而在dvmContinueOptimization函数中,在简单判断dex是否合法之后会将整个dex文件通过mmap映射到内存中,然后重写文件,包括字符的重排序、结构对齐、类验证及字节码优化等,方便对文件进行处理。

/*
 * Do the actual optimization.  This is executed in the dexopt process.
 *
 * For best use of disk/memory, we want to extract once and perform
 * optimizations in place.  If the file has to expand or contract
 * to match local structure padding/alignment expectations, we want
 * to do the rewrite as part of the extract, rather than extracting
 * into a temp file and slurping it back out.  (The structure alignment
 * is currently correct for all platforms, and this isn't expected to
 * change, so we should be okay with having it already extracted.)
 *
 * Returns "true" on success.
 */
bool dvmContinueOptimization(int fd, off_t dexOffset, long dexLength,
    const char* fileName, u4 modWhen, u4 crc, bool isBootstrap)
{
    DexClassLookup* pClassLookup = NULL;
    RegisterMapBuilder* pRegMapBuilder = NULL;

    assert(gDvm.optimizing);

    ALOGV("Continuing optimization (%s, isb=%d)", fileName, isBootstrap);

    assert(dexOffset >= 0);

    /* quick test so we don't blow up on empty file */
    if (dexLength < (int) sizeof(DexHeader)) {
        ALOGE("too small to be DEX");
        return false;
    }
    if (dexOffset < (int) sizeof(DexOptHeader)) {
        ALOGE("not enough room for opt header");
        return false;
    }

    bool result = false;

    /*
     * Drop this into a global so we don't have to pass it around.  We could
     * also add a field to DexFile, but since it only pertains to DEX
     * creation that probably doesn't make sense.
     */
    gDvm.optimizingBootstrapClass = isBootstrap;

    {
        /*
         * Map the entire file (so we don't have to worry about page
         * alignment).  The expectation is that the output file contains
         * our DEX data plus room for a small header.
         */
        bool success;
        void* mapAddr;
        mapAddr = mmap(NULL, dexOffset + dexLength, PROT_READ|PROT_WRITE,
                    MAP_SHARED, fd, 0);
        if (mapAddr == MAP_FAILED) {
            ALOGE("unable to mmap DEX cache: %s", strerror(errno));
            goto bail;
        }

        bool doVerify, doOpt;
        if (gDvm.classVerifyMode == VERIFY_MODE_NONE) {
            doVerify = false;
        } else if (gDvm.classVerifyMode == VERIFY_MODE_REMOTE) {
            doVerify = !gDvm.optimizingBootstrapClass;
        } else /*if (gDvm.classVerifyMode == VERIFY_MODE_ALL)*/ {
            doVerify = true;
        }

        if (gDvm.dexOptMode == OPTIMIZE_MODE_NONE) {
            doOpt = false;
        } else if (gDvm.dexOptMode == OPTIMIZE_MODE_VERIFIED ||
                   gDvm.dexOptMode == OPTIMIZE_MODE_FULL) {
            doOpt = doVerify;
        } else /*if (gDvm.dexOptMode == OPTIMIZE_MODE_ALL)*/ {
            doOpt = true;
        }

        /*
         * Rewrite the file.  Byte reordering, structure realigning,
         * class verification, and bytecode optimization are all performed
         * here.
         *
         * In theory the file could change size and bits could shift around.
         * In practice this would be annoying to deal with, so the file
         * layout is designed so that it can always be rewritten in place.
         *
         * This creates the class lookup table as part of doing the processing.
         */
        success = rewriteDex(((u1*) mapAddr) + dexOffset, dexLength,
                    doVerify, doOpt, &pClassLookup, NULL);

        if (success) {
            DvmDex* pDvmDex = NULL;
            u1* dexAddr = ((u1*) mapAddr) + dexOffset;

            if (dvmDexFileOpenPartial(dexAddr, dexLength, &pDvmDex) != 0) {
                ALOGE("Unable to create DexFile");
                success = false;
            } else {
                /*
                 * If configured to do so, generate register map output
                 * for all verified classes.  The register maps were
                 * generated during verification, and will now be serialized.
                 */
                if (gDvm.generateRegisterMaps) {
                    pRegMapBuilder = dvmGenerateRegisterMaps(pDvmDex);
                    if (pRegMapBuilder == NULL) {
                        ALOGE("Failed generating register maps");
                        success = false;
                    }
                }

                DexHeader* pHeader = (DexHeader*)pDvmDex->pHeader;
                updateChecksum(dexAddr, dexLength, pHeader);

                dvmDexFileFree(pDvmDex);
            }
        }

      ..........
}

而如果重写成功,则会获取到dex在内存中的实际地址(这个地址由mmap映射的地址和dexoffset偏移量联合确定)。
这时就会调用dvmDexFileOpenPartial(const void addr, int len, DvmDex* ppDvmDex),第一和第二个参数,就是上篇文章提到的dex起始地址和dex长度,它们分别存放在R0和R1寄存器。有了这两个参数,我们就能将dex直接从内存中抠出来。

##未完待续:继续接下来的加载流程