本文仅做理性上的愉悦,无实际用途。
scanf实际的调用
我们直接使用的scanf其实是这样写的
int __cdecl scanf ( const char *format, ... ){ va_list arglist; va_start(arglist, format); return vscanf_fn(_input_l, format, NULL, arglist);}
我们可以看到,他其实是调用了这三个函数: va_list va_start vscanf_fn
我们跳转到vscanf_fn的实现
int __cdecl vscanf_fn ( INPUTFN inputfn, const char *format, _locale_t plocinfo, va_list arglist )/* * stdin 'SCAN', 'F'ormatted */{ int retval = 0; _VALIDATE_RETURN( (format != NULL), EINVAL, EOF); _lock_str2(0, stdin); __try { retval = (inputfn(stdin, format, plocinfo, arglist)); } __finally { _unlock_str2(0, stdin); } return(retval);}
/****int _input(stream, format, arglist), static int input(format, arglist)**Purpose:* get input items (data items or literal matches) from the input stream* and assign them if appropriate to the items thru the arglist. this* function is intended for internal library use only, not for the user** The _input entry point is for the normal scanf() functions* The input entry point is used when compiling for _cscanf() [CPRFLAF* defined] and is a static function called only by _cscanf() -- reads from* console.** This code also defines _input_s, which works differently for %c, %s & %[.* For these, _input_s first picks up the next argument from the variable* argument list & uses it as the maximum size of the character array pointed* to by the next argument in the list.**Entry:* FILE *stream - file to read from* char *format - format string to determine the data to read* arglist - list of pointer to data items**Exit:* returns number of items assigned and fills in data items* returns EOF if error or EOF found on stream before 1st data item matched**Exceptions:********************************************************************************/
有几个关键函数:
static _TINT __cdecl _inc(FILE* fileptr){ return (_gettc_nolock(fileptr));}
_inc的功能是调出缓冲区第一个字符
static void __cdecl _un_inc(_TINT chr, FILE* fileptr){ if (_TEOF != chr) { _ungettc_nolock(chr,fileptr); }}
_un_inc函数,将刚才_inc调出的函数重新放进缓冲区
static _TINT __cdecl _whiteout(int* counter, FILE* fileptr){ _TINT ch; do { ++*counter; ch = _inc(fileptr); if (ch == _TEOF) { break; } } while(_istspace((_TUCHAR)ch)); return ch;}
_whiteout函数,将从缓冲区开头开始的所有连续空白字符调出
最后一步检查缓冲区,如果缓冲区可读字符为0,那么清空缓冲区
while (*format) { if (_istspace((_TUCHAR)*format)) { UN_INC(EAT_WHITE()); /* put first non-space char back */ do { tch = *++format; } while (_istspace((_TUCHAR)tch)); continue; ………………
这里的UN_INC(EAT_WHITE()),是把当初EAT_WHITE读出的第一个非空白字符再放入缓冲区。
上面代码完成对键盘缓冲区中空白符的清理,直到正常读取第一个字符。
当读入%号,进行处理:
if (_T('%') == *format && _T('%') != *(format + 1))
解析完格式后,开始真正的实现,我们以%d做例子:
if (_T('^') == *scanptr) { ++scanptr; --reject; /* set reject to 255 */ } /* Allocate "table" on first %[] spec */#if ALLOC_TABLE if (table == NULL) { table = (char*)_malloc_crt(TABLESIZE); if ( table == NULL) goto error_return; malloc_flag = 1; }zuolizi#endif /* ALLOC_TABLE */ memset(table, 0, TABLESIZE); if (LEFT_BRACKET == comchr) if (_T(']') == *scanptr) { prevchar = _T(']'); ++scanptr; table[ _T(']') >> 3] = 1 << (_T(']') & 7); } while (_T(']') != *scanptr) { rngch = *scanptr++; if (_T('-') != rngch || !prevchar || /* first char */ _T(']') == *scanptr) /* last char */ table[(prevchar = rngch) >> 3] |= 1 << (rngch & 7); else { /* handle a-z type set */ rngch = *scanptr++; /* get end of range */ if (prevchar < rngch) /* %[a-z] */ last = rngch; else { /* %[z-a] */ last = prevchar; prevchar = rngch; } /* last could be 0xFF, so we handle it at the end of the for loop */ for (rngch = prevchar; rngch < last; ++rngch) { table[rngch >> 3] |= 1 << (rngch & 7); } table[last >> 3] |= 1 << (last & 7); prevchar = 0; } }
reject反转标记,如果出现^ 则reject = FF; 其后方便进行 ^ 进行反转。
对于[ ]字符集,有一个char table[32]来保存256个ascii字符。(此处每个char为8bits,所以有32组可以完全包含256个ascii字符)
微软对table中字符做了这样的处理:
table[rngch >> 3] |= 1 << (rngch & 7);
即:将所读的字符串分到32组中【rngch>>3相当于除以8】,每个table[n]有8bits,每个bit中,出现的字符位会被置为1,未出现则为0,这样就完美囊括了256个ASCII字符。
判断字符是否存在,直接这样处理:
(table[ch >> 3] ^ reject) & (1 << (ch & 7))
%d跳过了\n的读取,继续读取下一个字符。
if (_T('%') == *format && _T('%') != *(format + 1)) { …………………… ++format; /* skip to next char */ } else /* ('%' != *format) */ { ……………………… }
在读代码时候读到一个函数 hextodec
static _TINT __cdecl _hextodec ( _TCHAR chr){ return _ISDIGIT(chr) ? chr : (chr & ~(_T('a') - _T('A'))) - _T('A') + 10 + _T('0');}
将读取的16进制字符 0 - F转成 10进制数,然后scanf就结束了
我们有一个非常简单的scanf的实现(臭不要脸的调用vscanf_fn)
#include #include int my_scanf(char* fmt,...){ int ret=0; va_list args; va_start(args,fmt); vscanf(fmt,args); va_end(args); return ret;}int main(){ int a; my_scanf("%d",&a); printf("%d",a); return 0;}