More static analysis with CIL

Years ago I played around with CIL to analyze libvirt. More recently Dan used CIL to analyze libvirt’s locking code.

We didn’t get so far either time, but I’ve been taking a deeper look at CIL in an attempt to verify error handling in libguestfs.

Here is my partly working code so far.

(*
 * Analyse libguestfs APIs to find error overwriting.
 * Copyright (C) 2008-2013 Red Hat, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library.  If not, see
 * <http://www.gnu.org/licenses/>.
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 * Author: Richard W.M. Jones <rjones@redhat.com>
 *)

open Unix
open Printf

open Cil

let debug = ref false

(* Set of ints. *)
module IntSet = Set.Make (struct type t = int let compare = compare end)

(* A module for storing any set (unordered list) of functions. *)
module FunctionSet = Set.Make (
  struct
    type t = varinfo
    let compare v1 v2 = compare v1.vid v2.vid
  end
)

(* Directed graph of functions.
 *
 * Function = a node in the graph
 * FunctionDigraph = the directed graph
 * FunctionPathChecker = path checker module using Dijkstra's algorithm
 *)
module Function =
struct
  type t = varinfo
  let compare f1 f2 = compare f1.vid f2.vid
  let hash f = Hashtbl.hash f.vid
  let equal f1 f2 = f1.vid = f2.vid
end
module FunctionDigraph = Graph.Imperative.Digraph.Concrete (Function)
module FunctionPathChecker = Graph.Path.Check (FunctionDigraph)

(* Module used to analyze the paths through each function. *)
module ErrorCounter =
struct
  let name = "ErrorCounter"
  let debug = debug

  (* Our current state is very simple, just the number of error
   * function calls did encountered up to this statement.
   *)
  type t = int

  let copy errcalls = errcalls

  (* Start data for each statement. *)
  let stmtStartData = Inthash.create 97

  let printable errcalls = sprintf "(errcalls=%d)" errcalls

  let pretty () t = Pretty.text (printable t)

  let computeFirstPredecessor stmt x = x (* XXX??? *)

  let combinePredecessors stmt ~old:old_t new_t =
    if old_t = new_t then None
    else Some new_t

  (* This will be initialized after we have calculated the set of all
   * functions which can call an error function, in main() below.
   *)
  let error_functions_set = ref FunctionSet.empty

  (* Handle a Cil.Instr. *)
  let doInstr instr _ =
    match instr with
    (* A call to an error function. *)
    | Call (_, Lval (Var callee, _), _, _)
        when FunctionSet.mem callee !error_functions_set ->
      Dataflow.Post (fun errcalls -> errcalls+1)

    | _ -> Dataflow.Default

  (* Handle a Cil.Stmt. *)
  let doStmt _ _ = Dataflow.SDefault

  (* Handle a Cil.Guard. *)
  let doGuard _ _ = Dataflow.GDefault

  (* Filter statements we've seen already to prevent loops. *)
  let filter_set = ref IntSet.empty
  let filterStmt { sid = sid } =
    if IntSet.mem sid !filter_set then false
    else (
      filter_set := IntSet.add sid !filter_set;
      true
    )

  (* Initialize the module before each function that we examine. *)
  let init stmts =
    filter_set := IntSet.empty;
    Inthash.clear stmtStartData;
    (* Add the initial statement(s) to the hash. *)
    List.iter (fun stmt -> Inthash.add stmtStartData stmt.sid 0) stmts
end

module ForwardsErrorCounter = Dataflow.ForwardsDataFlow (ErrorCounter)

(* The always useful filter + map function. *)
let rec filter_map f = function
  | [] -> []
  | x :: xs ->
      match f x with
      | Some y -> y :: filter_map f xs
      | None -> filter_map f xs

let rec main () =
  (* Read the list of input C files. *)
  let files =
    let chan = open_process_in "find src -name '*.i' | sort" in
    let files = input_chan chan in
    if close_process_in chan <> WEXITED 0 then
      failwith "failed to read input list of files";
    if files = [] then
      failwith "no input files; is the program running from the top directory? did you compile with make -C src CFLAGS=\"-save-temps\"?";
    files in

  (* Load and parse each input file. *)
  let files =
    List.map (
      fun filename ->
        printf "loading %s\n%!" filename;
        Frontc.parse filename ()
    ) files in

  (* Merge the files. *)
  printf "merging files\n%!";
  let sourcecode = Mergecil.merge files "libguestfs" in

  (* CFG analysis. *)
  printf "computing control flow\n%!";
  Cfg.computeFileCFG sourcecode;

  let functions =
    filter_map (function GFun (f, loc) -> Some (f, loc) | _ -> None)
      sourcecode.globals in

  (* Examine which functions directly call which other functions. *)
  printf "computing call graph\n%!";
  let call_graph = make_call_graph functions in
  (*
  FunctionDigraph.iter_edges (
    fun caller callee ->
      printf "%s calls %s\n" caller.vname callee.vname
  ) call_graph;
  *)

  (* The libguestfs error functions.  These are global function names,
   * but to be any use to us we have to look these up in the list of
   * all global functions (ie. 'functions') and turn them into the
   * corresponding varinfo structures.
   *)
  let error_function_names = [ "guestfs_error_errno";
                               "guestfs_perrorf" ] in

  let find_function name =
    try List.find (fun ({ svar = { vname = n }}, _) -> n = name) functions
    with Not_found -> failwith ("function '" ^ name ^ "' does not exist")
  in
  let error_function_names = List.map (
    fun f -> (fst (find_function f)).svar
  ) error_function_names in

  (* Get a list of functions that might (directly or indirectly) call
   * one of the error functions.
   *)
  let error_functions, non_error_functions =
    functions_which_call call_graph error_function_names functions in

  (*
  List.iter (
    fun f -> printf "%s can call an error function\n" f.vname
  ) error_functions;

  List.iter (
    fun f -> printf "%s can NOT call an error function\n" f.vname
  ) non_error_functions;
  *)

  (* Save the list of error functions in a global set for fast lookups. *)
  let set =
    List.fold_left (
      fun set elt -> FunctionSet.add elt set
    ) FunctionSet.empty error_functions in
  ErrorCounter.error_functions_set := set;

  (* Analyze each top-level function to ensure it calls an error
   * function exactly once on error paths, and never on normal return
   * paths.
   *)
  printf "analyzing correctness of error paths\n%!";
  List.iter compute_error_paths functions;

  ()

(* Make a directed graph of which functions directly call which other
 * functions.
 *)
and make_call_graph functions =
  let graph = FunctionDigraph.create () in

  List.iter (
    fun ({ svar = caller; sallstmts = sallstmts }, _) ->
      (* Evaluate which other functions 'caller' calls.  First pull
       * out every 'Call' instruction anywhere in the function.
       *)
      let insns =  List.concat (
        filter_map (
          function
          | { skind = Instr insns } -> Some insns
          | _ -> None
        ) sallstmts
      ) in
      let calls = List.filter (function Call _ -> true | _ -> false) insns in
      (* Then examine what function is being called at each place. *)
      let callees = filter_map (
        function
        | Call (_, Lval (Var callee, _), _, _) -> Some callee
        | _ -> None
      ) calls in

      List.iter (
        fun callee ->
          FunctionDigraph.add_edge graph caller callee
      ) callees
  ) functions;

  graph

(* [functions_which_call g endpoints functions] partitions the
 * [functions] list, returning those functions that call directly or
 * indirectly one of the functions in [endpoints], and a separate list
 * of functions which do not.  [g] is the direct call graph.
 *)
and functions_which_call g endpoints functions =
  let functions = List.map (fun ({ svar = svar }, _) -> svar) functions in

  let checker = FunctionPathChecker.create g in
  List.partition (
    fun f ->
      (* Does a path exist from f to any of the endpoints? *)
      List.exists (
        fun endpoint ->
          try FunctionPathChecker.check_path checker f endpoint
          with
          (* It appears safe to ignore this exception.  It seems to
           * mean that this function is in a part of the graph which
           * is completely disconnected from the other part of the graph
           * that contains the endpoint.
           *)
          | Invalid_argument "[ocamlgraph] iter_succ" -> false
      ) endpoints
  ) functions

and compute_error_paths ({ svar = svar } as f, loc) =
  (*ErrorCounter.debug := true;*)

  (* Find the initial statement in this function (assumes that the
   * function can only be entered in one place, which is normal for C
   * functions).
   *)
  let initial_stmts =
    match f.sbody.bstmts with
    | [] -> []
    | first::_ -> [first] in

  (* Initialize ErrorCounter. *)
  ErrorCounter.init initial_stmts;

  (* Compute the error counters along paths through the function. *)
  ForwardsErrorCounter.compute initial_stmts;

  (* Process all Return statements in this function. *)
  List.iter (
    fun stmt ->
      try
        let errcalls = Inthash.find ErrorCounter.stmtStartData stmt.sid in

        match stmt with
        (* return -1; *)
        | { skind = Return (Some i, loc) } when is_literal_minus_one i ->
          if errcalls = 0 then
            printf "%s:%d: %s: may return an error code without calling error/perrorf\n"
              loc.file loc.line svar.vname
          else if errcalls > 1 then
            printf "%s:%d: %s: may call error/perrorf %d times (more than once) along an error path\n"
          loc.file loc.line svar.vname errcalls

        (* return 0; *)
        | { skind = Return (Some i, loc) } when is_literal_zero i ->
          if errcalls >= 1 then
            printf "%s:%d: %s: may call error/perrorf along a non-error return path\n"
              loc.file loc.line svar.vname

        (* return; (void return) *)
        | { skind = Return (None, loc) } ->
          if errcalls >= 1 then
            printf "%s:%d: %s: may call error/perrorf and return void\n"
              loc.file loc.line svar.vname

        | _ -> ()

      with
        Not_found ->
          printf "%s:%d: %s: may contain unreachable code\n"
            loc.file loc.line svar.vname
  ) f.sallstmts

(* Some convenience CIL matching functions. *)
and is_literal_minus_one = function
  | Const (CInt64 (-1L, _, _)) -> true
  | _ -> false

and is_literal_zero = function
  | Const (CInt64 (0L, _, _)) -> true
  | _ -> false

(* Convenient routine to load the contents of a channel into a list of
 * strings.
 *)
and input_chan chan =
  let lines = ref [] in
  try while true; do lines := input_line chan :: !lines done; []
  with End_of_file -> List.rev !lines

and input_file filename =
  let chan = open_in filename in
  let r = input_chan chan in
  close_in chan;
  r

let () =
  try main ()
  with 
    exn ->
      prerr_endline (Printexc.to_string exn);
      Printexc.print_backtrace Pervasives.stderr;
      exit 1

Advertisement

8 Comments

Filed under Uncategorized

8 responses to “More static analysis with CIL

  1. Gabriel

    Do not hesitate on the CIL-users mailing-list if you have any question!

  2. Gabriel

    You might be interested in the extensive tutorial Zach posted to the list recently: http://www.inf.ethz.ch/personal/azachary/teaching/ciltut.pdf

    Source is available too: hg clone https://bitbucket.org/zanderso/cil-template

    I planed to improve the doc and website, but I’ve postponed this for a (very) long time. Your post is welcome in reminding me to do it as soon as possible.

  3. Julien Signoles

    There is also Frama-C (http://frama-c.com), an extensible open-source platform based upon Cil and dedicated to the analysis of C programs.

    • rich

      Right — we even package Frama-C in Fedora. However I’ve never really been sure what frama-c is. It starts up a GUI (so no use for our automated test tools). There’s a lot of it and it was a major achievement just to package it. It has a peculiar website

      • Julien Signoles

        There are both a GUI and a command-line tool (namely frama-c-gui and frama-c), so you could use it for your automated test tools. Maybe the best pointer to learn what Frama-C is, is the SEFM’12 reference paper : “Frama-C, A Software Analysis Perspective”, and also the “Frama-C User Manual”. Next, if you want to learn how to write your own Frama-C analyzer (in OCaml), you would read the “Frama-C Plug-in Development Guide”.

      • rich

        Thanks — I will read these with interest.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

This site uses Akismet to reduce spam. Learn how your comment data is processed.