Dalvik虚拟机加载Dex的流程
上篇文章Android脱壳(基于第一代加壳)介绍了如何使用ida pro在运行Dalvik虚拟机的Android4.4手机上脱壳。其中有一个很重要的步骤就是给libdvm.so的_Z21dvmDexFileOpenPartialPKviPP6DvmDex函数下断点,用于确定dex在内存空间中的起始位置和大小,那这篇文章就挖一下Dalvik虚拟机是如何加载dex文件的。
##开始
- Dalvik虚拟机在加载dex之前,会判断本地是否存在odex文件(Dex\ELF文件格式, ODex与Dex比较)
- 如果存在,则直接加载odex
- 如果不存在,使用dexopt优化dex文件为odex(odex的后缀类型仍然为dex)
- 除此之外,dexopt还担任dex的校验功能。它会判断DexHeader中的时间戳和crc校验值,从而判断该dex是否合法(弱校验,可欺骗绕过)
故从上我们可以知道,在APP运行时dexopt占有很重要的一环,因此,我们只需在dexopt中找到加载dex的那段代码即可跟踪Dalvik虚拟机加载dex的流程。
dexopt的主函数位于OptMain.cpp
/*
* Main entry point. Decide where to go.
*/
int main(int argc, char* const argv[])
{
set_process_name("dexopt");
setvbuf(stdout, NULL, _IONBF, 0);
if (argc > 1) {
if (strcmp(argv[1], "--zip") == 0)
return fromZip(argc, argv);
else if (strcmp(argv[1], "--dex") == 0)
return fromDex(argc, argv);
else if (strcmp(argv[1], "--preopt") == 0)
return preopt(argc, argv);
}
fprintf(stderr,
"Usage:\n\n"
"Short version: Don't use this.\n\n"
"Slightly longer version: This system-internal tool is used to\n"
"produce optimized dex files. See the source code for details.\n");
return 1;
}
跟踪14行进入fromDex
这里面先进行的是一些参数和环境的判断,配置dexOptMode、verifyMode,初始化vm相关环境,然后进入dvmContinueOptimization函数继续接下来的优化过程。
static int fromDex(int argc, char* const argv[])
{
int result = -1;
bool vmStarted = false;
char* bootClassPath = NULL;
int fd, flags, vmBuildVersion;
long offset, length;
const char* debugFileName;
u4 crc, modWhen;
char* endp;
bool onlyOptVerifiedDex = false;
DexClassVerifyMode verifyMode;
DexOptimizerMode dexOptMode;
if (argc < 10) {
/* don't have all mandatory args */
ALOGE("Not enough arguments for --dex (found %d)", argc);
goto bail;
}
/* skip "--dex" */
argc--;
argv++;
/*
* Extract the args.
*/
GET_ARG(vmBuildVersion, strtol, "bad vm build");
if (vmBuildVersion != DALVIK_VM_BUILD) {
ALOGE("DexOpt: build rev does not match VM: %d vs %d",
vmBuildVersion, DALVIK_VM_BUILD);
goto bail;
}
GET_ARG(fd, strtol, "bad fd");
GET_ARG(offset, strtol, "bad offset");
GET_ARG(length, strtol, "bad length");
debugFileName = *++argv;
--argc;
GET_ARG(modWhen, strtoul, "bad modWhen");
GET_ARG(crc, strtoul, "bad crc");
GET_ARG(flags, strtol, "bad flags");
ALOGV("Args: fd=%d off=%ld len=%ld name='%s' mod=%#x crc=%#x flg=%d (argc=%d)",
fd, offset, length, debugFileName, modWhen, crc, flags, argc);
assert(argc > 0);
if (--argc == 0) {
bootClassPath = strdup("");
} else {
int i, bcpLen;
char* const* argp;
char* cp;
bcpLen = 0;
for (i = 0, argp = argv; i < argc; i++) {
++argp;
ALOGV("DEP: '%s'", *argp);
bcpLen += strlen(*argp) + 1;
}
cp = bootClassPath = (char*) malloc(bcpLen +1);
for (i = 0, argp = argv; i < argc; i++) {
int strLen;
++argp;
strLen = strlen(*argp);
if (i != 0)
*cp++ = ':';
memcpy(cp, *argp, strLen);
cp += strLen;
}
*cp = '\0';
assert((int) strlen(bootClassPath) == bcpLen-1);
}
ALOGV(" bootclasspath is '%s'", bootClassPath);
/* start the VM partway */
/* ugh -- upgrade these to a bit field if they get any more complex */
if ((flags & DEXOPT_VERIFY_ENABLED) != 0) {
if ((flags & DEXOPT_VERIFY_ALL) != 0)
verifyMode = VERIFY_MODE_ALL;
else
verifyMode = VERIFY_MODE_REMOTE;
} else {
verifyMode = VERIFY_MODE_NONE;
}
if ((flags & DEXOPT_OPT_ENABLED) != 0) {
if ((flags & DEXOPT_OPT_ALL) != 0)
dexOptMode = OPTIMIZE_MODE_ALL;
else
dexOptMode = OPTIMIZE_MODE_VERIFIED;
} else {
dexOptMode = OPTIMIZE_MODE_NONE;
}
if (dvmPrepForDexOpt(bootClassPath, dexOptMode, verifyMode, flags) != 0) {
ALOGE("VM init failed");
goto bail;
}
vmStarted = true;
/* do the optimization */
if (!dvmContinueOptimization(fd, offset, length, debugFileName,
modWhen, crc, (flags & DEXOPT_IS_BOOTSTRAP) != 0))
{
ALOGE("Optimization failed");
goto bail;
}
result = 0;
bail:
/*
* In theory we should gracefully shut the VM down at this point. In
* practice that only matters if we're checking for memory leaks with
* valgrind -- simply exiting is much faster.
*
* As it turns out, the DEX optimizer plays a little fast and loose
* with class loading. We load all of the classes from a partially-
* formed DEX file, which is unmapped when we're done. If we want to
* do clean shutdown here, perhaps for testing with valgrind, we need
* to skip the munmap call there.
*/
#if 0
if (vmStarted) {
ALOGI("DexOpt shutting down, result=%d", result);
dvmShutdown();
}
#endif
free(bootClassPath);
ALOGV("DexOpt command complete (result=%d)", result);
return result;
}
而在dvmContinueOptimization函数中,在简单判断dex是否合法之后会将整个dex文件通过mmap映射到内存中,然后重写文件,包括字符的重排序、结构对齐、类验证及字节码优化等,方便对文件进行处理。
/*
* Do the actual optimization. This is executed in the dexopt process.
*
* For best use of disk/memory, we want to extract once and perform
* optimizations in place. If the file has to expand or contract
* to match local structure padding/alignment expectations, we want
* to do the rewrite as part of the extract, rather than extracting
* into a temp file and slurping it back out. (The structure alignment
* is currently correct for all platforms, and this isn't expected to
* change, so we should be okay with having it already extracted.)
*
* Returns "true" on success.
*/
bool dvmContinueOptimization(int fd, off_t dexOffset, long dexLength,
const char* fileName, u4 modWhen, u4 crc, bool isBootstrap)
{
DexClassLookup* pClassLookup = NULL;
RegisterMapBuilder* pRegMapBuilder = NULL;
assert(gDvm.optimizing);
ALOGV("Continuing optimization (%s, isb=%d)", fileName, isBootstrap);
assert(dexOffset >= 0);
/* quick test so we don't blow up on empty file */
if (dexLength < (int) sizeof(DexHeader)) {
ALOGE("too small to be DEX");
return false;
}
if (dexOffset < (int) sizeof(DexOptHeader)) {
ALOGE("not enough room for opt header");
return false;
}
bool result = false;
/*
* Drop this into a global so we don't have to pass it around. We could
* also add a field to DexFile, but since it only pertains to DEX
* creation that probably doesn't make sense.
*/
gDvm.optimizingBootstrapClass = isBootstrap;
{
/*
* Map the entire file (so we don't have to worry about page
* alignment). The expectation is that the output file contains
* our DEX data plus room for a small header.
*/
bool success;
void* mapAddr;
mapAddr = mmap(NULL, dexOffset + dexLength, PROT_READ|PROT_WRITE,
MAP_SHARED, fd, 0);
if (mapAddr == MAP_FAILED) {
ALOGE("unable to mmap DEX cache: %s", strerror(errno));
goto bail;
}
bool doVerify, doOpt;
if (gDvm.classVerifyMode == VERIFY_MODE_NONE) {
doVerify = false;
} else if (gDvm.classVerifyMode == VERIFY_MODE_REMOTE) {
doVerify = !gDvm.optimizingBootstrapClass;
} else /*if (gDvm.classVerifyMode == VERIFY_MODE_ALL)*/ {
doVerify = true;
}
if (gDvm.dexOptMode == OPTIMIZE_MODE_NONE) {
doOpt = false;
} else if (gDvm.dexOptMode == OPTIMIZE_MODE_VERIFIED ||
gDvm.dexOptMode == OPTIMIZE_MODE_FULL) {
doOpt = doVerify;
} else /*if (gDvm.dexOptMode == OPTIMIZE_MODE_ALL)*/ {
doOpt = true;
}
/*
* Rewrite the file. Byte reordering, structure realigning,
* class verification, and bytecode optimization are all performed
* here.
*
* In theory the file could change size and bits could shift around.
* In practice this would be annoying to deal with, so the file
* layout is designed so that it can always be rewritten in place.
*
* This creates the class lookup table as part of doing the processing.
*/
success = rewriteDex(((u1*) mapAddr) + dexOffset, dexLength,
doVerify, doOpt, &pClassLookup, NULL);
if (success) {
DvmDex* pDvmDex = NULL;
u1* dexAddr = ((u1*) mapAddr) + dexOffset;
if (dvmDexFileOpenPartial(dexAddr, dexLength, &pDvmDex) != 0) {
ALOGE("Unable to create DexFile");
success = false;
} else {
/*
* If configured to do so, generate register map output
* for all verified classes. The register maps were
* generated during verification, and will now be serialized.
*/
if (gDvm.generateRegisterMaps) {
pRegMapBuilder = dvmGenerateRegisterMaps(pDvmDex);
if (pRegMapBuilder == NULL) {
ALOGE("Failed generating register maps");
success = false;
}
}
DexHeader* pHeader = (DexHeader*)pDvmDex->pHeader;
updateChecksum(dexAddr, dexLength, pHeader);
dvmDexFileFree(pDvmDex);
}
}
..........
}
而如果重写成功,则会获取到dex在内存中的实际地址(这个地址由mmap映射的地址和dexoffset偏移量联合确定)。
这时就会调用dvmDexFileOpenPartial(const void addr, int len, DvmDex* ppDvmDex),第一和第二个参数,就是上篇文章提到的dex起始地址和dex长度,它们分别存放在R0和R1寄存器。有了这两个参数,我们就能将dex直接从内存中抠出来。
##未完待续:继续接下来的加载流程