Sunday, July 24, 2016

Code behaving differently in C90, C99, C11, C++98, and C++11

There are some subtle differences between the revisions of the C standard that makes it possible to create programs that behaves differently depending on if they are compiled as C90, C99, or C11. Similarly, C++ is mostly a superset of C, but there are constructs that produce different results for C and C++.

This is used in Don Yang's contribution to the 2015 International Obfuscated C Code Contest to create a program that produces different output depending on if it is compiled as C89, C99, C11, C++98, C++11. For C90 it prints stars of the form
*********************************************              ***               **
***********************     *****************             ******             **
*********************        ****************           **********           **
********************         ****************         **************         **
******        ******         *****************     ****************************
******           **          **************************************************
******                      *************************(O************************
*******                     *     *********************************************
*********                              ****************************************
***********                             ***************************************
************                            ***************************************
*********                             *****************************************
*******                     ***************************************************
******                       ****************)d));o((d=************************
******           **          **************************************************
******         *****         *********************      ***********************
********************(         O******************        **********************
**********************       *******************          *********************
************************     *******************          **************)p)-p);
o(d-(p=*****************************************          *********************
For C99 the stars have eyes, C++11 prints circles, etc. (There is more to this. The program reads text from standard input, and the output is obfuscated C90 source code that prints that text — all the * characters are pointer dereferences!)

The source code for the program is a little bit hard to read
                                           #define r(R) R"()"
                          /*[*/#include  /**/<stdio.h>
                   float I,bu,k,i,F,u,U,K,O;char o[5200];int
              #define R(U) (sizeof('U')==1||sizeof(U"1"[0])==1)
            h=0,t=-1,m=80,n=26,d,g,p=0,q=0,v=0,y=112,x=40;  float
           N(float/*x*/_){g=1<<30;d=-~d*1103515245&--g;return  d*_
          /g;}void/**/w(int/**/_){if(t<0){for(g=0;g<5200;o[g++   ]=
          0);for(;g;o[g+79]=10)g-=80;for(t=37;g<62;o[80+g++]=32)   ;
        n>1600&&R()){O=0;F=(x=0x1!=sizeof(' '))?k=1+N(2),i=12-k+N(
        8),N(4):(k=17+N(5),i=0,r()[0]?O=.1:  0);for(u=U=-.05;u<32;
        U=k+i+i*.5*sin((u+=.05)+F))for( K=0   ;K< U;K+=.1)if((bu=K*
       sin(u/5),g=I+cos( u/5) *K)>=0&&g  <     79  )o[g+(int)(t+44+
       bu*(.5-(bu>0?3*O:  O)   ) )%64*  80      ]  =32;x*=02//* */2
      -1;n=O+x?n=I+(x?0   :N     (k)-   k           /2),g=(t+42  )%
      64,m=-~g%64,x?g=m          =-~        m%64:0  ,n>5?o[g*80   +
     n-3]=o[m*80+n-3]=       0:   0              ,n <75?o[g*80+n
     +2]=o[m*80+n+2]=0   :0:0;                      x=I;}h=-~h%64
    ;m=0;}putchar((g=o [h*                          80+m++])?g:_);
   if(g){w(_);}}void W                               (const char*_
  ){for(;*_;w(*_++));}                               int main(int a
  ,char**_){while(a--)d              +=_[a          ]-(char*)0;W( \
 "#include<stdio.h>typed"             "e"         "f\40int\40O;v"
 "oid o(O _){putchar(_);}O"                    "\40main(){O"  ""
"*_[512],**p=_,**d,b,q;for(b=0;b"        "++<512;p=_+q)_[q"    \
"=(p-_+1)*9%512]=(O*)p;") ;      for(;(g= getchar())-EOF;p=
q){q=p;for(v=512;p-q-g&&q-p-              g;  v--)q=-~q*9%512
;W("o(");if(p>q)w(y),w(45);w(                      40);w(y^=20
);w(075);for(a=0;a<v;a++)w(42);                      for(W("(O**"
 );a--;w(42)){}w(41);w(y^024);w(                      41);if(p<=q)w(
   45),w(y^20);W(");");}for(a=7;a-6                      ;W(a<6?"{;}":""
      ))for(a  =0;a  <6 &&   !o[h*80+m                       +a];a++){}W("r"
         "etu"  /*J   */       "rn+0;}\n"                             );return
             /*                      "#*/0                                   ;}
but it is, as far as I can tell, using three tricks in order to detect which C or C++ dialect is used:

  • // comments
    C90 does not have // comments, so constructs of the form
    int i = 2 //**/2
    can be used to differentiate it from the other C and C++ revisions, as C90 compiles this as
    int i = 2 //**/2
    while C++ and the more recent revisions of C compiles this as
    int i = 2 //**/2
  • Type of character constants
    Character constants, such as 'a', have type int in C while C++ use the type char. This means that sizeof('a') evaluates to a different value for C and C++.
  • Wide string literals
    C11 and C++11 have wide string literals where for example U"hello!" is a string with characters of type char32_t. This can be used with a macro
    #define R(U) sizeof(U"a"[0])
    that is used as R(""). For C11 and C++ this expands to
    which evaluates to 4, while the older revisions of the standards treat U and "a" as two tokens, and the macro expands to
    which evaluates to 1.


  1. #define r(R) R"()"

    C++11 Raw string literals, which are used in r()[0], so in c++11 it results '\0', in prior standards - '('.

  2. Oh, I missed that. So the program is using four tricks!