Years ago I played around with CIL to analyze libvirt. More recently Dan used CIL to analyze libvirt’s locking code.
We didn’t get so far either time, but I’ve been taking a deeper look at CIL in an attempt to verify error handling in libguestfs.
Here is my partly working code so far.
(* * Analyse libguestfs APIs to find error overwriting. * Copyright (C) 2008-2013 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see * <http://www.gnu.org/licenses/>. * * Author: Daniel P. Berrange <berrange@redhat.com> * Author: Richard W.M. Jones <rjones@redhat.com> *) open Unix open Printf open Cil let debug = ref false (* Set of ints. *) module IntSet = Set.Make (struct type t = int let compare = compare end) (* A module for storing any set (unordered list) of functions. *) module FunctionSet = Set.Make ( struct type t = varinfo let compare v1 v2 = compare v1.vid v2.vid end ) (* Directed graph of functions. * * Function = a node in the graph * FunctionDigraph = the directed graph * FunctionPathChecker = path checker module using Dijkstra's algorithm *) module Function = struct type t = varinfo let compare f1 f2 = compare f1.vid f2.vid let hash f = Hashtbl.hash f.vid let equal f1 f2 = f1.vid = f2.vid end module FunctionDigraph = Graph.Imperative.Digraph.Concrete (Function) module FunctionPathChecker = Graph.Path.Check (FunctionDigraph) (* Module used to analyze the paths through each function. *) module ErrorCounter = struct let name = "ErrorCounter" let debug = debug (* Our current state is very simple, just the number of error * function calls did encountered up to this statement. *) type t = int let copy errcalls = errcalls (* Start data for each statement. *) let stmtStartData = Inthash.create 97 let printable errcalls = sprintf "(errcalls=%d)" errcalls let pretty () t = Pretty.text (printable t) let computeFirstPredecessor stmt x = x (* XXX??? *) let combinePredecessors stmt ~old:old_t new_t = if old_t = new_t then None else Some new_t (* This will be initialized after we have calculated the set of all * functions which can call an error function, in main() below. *) let error_functions_set = ref FunctionSet.empty (* Handle a Cil.Instr. *) let doInstr instr _ = match instr with (* A call to an error function. *) | Call (_, Lval (Var callee, _), _, _) when FunctionSet.mem callee !error_functions_set -> Dataflow.Post (fun errcalls -> errcalls+1) | _ -> Dataflow.Default (* Handle a Cil.Stmt. *) let doStmt _ _ = Dataflow.SDefault (* Handle a Cil.Guard. *) let doGuard _ _ = Dataflow.GDefault (* Filter statements we've seen already to prevent loops. *) let filter_set = ref IntSet.empty let filterStmt { sid = sid } = if IntSet.mem sid !filter_set then false else ( filter_set := IntSet.add sid !filter_set; true ) (* Initialize the module before each function that we examine. *) let init stmts = filter_set := IntSet.empty; Inthash.clear stmtStartData; (* Add the initial statement(s) to the hash. *) List.iter (fun stmt -> Inthash.add stmtStartData stmt.sid 0) stmts end module ForwardsErrorCounter = Dataflow.ForwardsDataFlow (ErrorCounter) (* The always useful filter + map function. *) let rec filter_map f = function | [] -> [] | x :: xs -> match f x with | Some y -> y :: filter_map f xs | None -> filter_map f xs let rec main () = (* Read the list of input C files. *) let files = let chan = open_process_in "find src -name '*.i' | sort" in let files = input_chan chan in if close_process_in chan <> WEXITED 0 then failwith "failed to read input list of files"; if files = [] then failwith "no input files; is the program running from the top directory? did you compile with make -C src CFLAGS=\"-save-temps\"?"; files in (* Load and parse each input file. *) let files = List.map ( fun filename -> printf "loading %s\n%!" filename; Frontc.parse filename () ) files in (* Merge the files. *) printf "merging files\n%!"; let sourcecode = Mergecil.merge files "libguestfs" in (* CFG analysis. *) printf "computing control flow\n%!"; Cfg.computeFileCFG sourcecode; let functions = filter_map (function GFun (f, loc) -> Some (f, loc) | _ -> None) sourcecode.globals in (* Examine which functions directly call which other functions. *) printf "computing call graph\n%!"; let call_graph = make_call_graph functions in (* FunctionDigraph.iter_edges ( fun caller callee -> printf "%s calls %s\n" caller.vname callee.vname ) call_graph; *) (* The libguestfs error functions. These are global function names, * but to be any use to us we have to look these up in the list of * all global functions (ie. 'functions') and turn them into the * corresponding varinfo structures. *) let error_function_names = [ "guestfs_error_errno"; "guestfs_perrorf" ] in let find_function name = try List.find (fun ({ svar = { vname = n }}, _) -> n = name) functions with Not_found -> failwith ("function '" ^ name ^ "' does not exist") in let error_function_names = List.map ( fun f -> (fst (find_function f)).svar ) error_function_names in (* Get a list of functions that might (directly or indirectly) call * one of the error functions. *) let error_functions, non_error_functions = functions_which_call call_graph error_function_names functions in (* List.iter ( fun f -> printf "%s can call an error function\n" f.vname ) error_functions; List.iter ( fun f -> printf "%s can NOT call an error function\n" f.vname ) non_error_functions; *) (* Save the list of error functions in a global set for fast lookups. *) let set = List.fold_left ( fun set elt -> FunctionSet.add elt set ) FunctionSet.empty error_functions in ErrorCounter.error_functions_set := set; (* Analyze each top-level function to ensure it calls an error * function exactly once on error paths, and never on normal return * paths. *) printf "analyzing correctness of error paths\n%!"; List.iter compute_error_paths functions; () (* Make a directed graph of which functions directly call which other * functions. *) and make_call_graph functions = let graph = FunctionDigraph.create () in List.iter ( fun ({ svar = caller; sallstmts = sallstmts }, _) -> (* Evaluate which other functions 'caller' calls. First pull * out every 'Call' instruction anywhere in the function. *) let insns = List.concat ( filter_map ( function | { skind = Instr insns } -> Some insns | _ -> None ) sallstmts ) in let calls = List.filter (function Call _ -> true | _ -> false) insns in (* Then examine what function is being called at each place. *) let callees = filter_map ( function | Call (_, Lval (Var callee, _), _, _) -> Some callee | _ -> None ) calls in List.iter ( fun callee -> FunctionDigraph.add_edge graph caller callee ) callees ) functions; graph (* [functions_which_call g endpoints functions] partitions the * [functions] list, returning those functions that call directly or * indirectly one of the functions in [endpoints], and a separate list * of functions which do not. [g] is the direct call graph. *) and functions_which_call g endpoints functions = let functions = List.map (fun ({ svar = svar }, _) -> svar) functions in let checker = FunctionPathChecker.create g in List.partition ( fun f -> (* Does a path exist from f to any of the endpoints? *) List.exists ( fun endpoint -> try FunctionPathChecker.check_path checker f endpoint with (* It appears safe to ignore this exception. It seems to * mean that this function is in a part of the graph which * is completely disconnected from the other part of the graph * that contains the endpoint. *) | Invalid_argument "[ocamlgraph] iter_succ" -> false ) endpoints ) functions and compute_error_paths ({ svar = svar } as f, loc) = (*ErrorCounter.debug := true;*) (* Find the initial statement in this function (assumes that the * function can only be entered in one place, which is normal for C * functions). *) let initial_stmts = match f.sbody.bstmts with | [] -> [] | first::_ -> [first] in (* Initialize ErrorCounter. *) ErrorCounter.init initial_stmts; (* Compute the error counters along paths through the function. *) ForwardsErrorCounter.compute initial_stmts; (* Process all Return statements in this function. *) List.iter ( fun stmt -> try let errcalls = Inthash.find ErrorCounter.stmtStartData stmt.sid in match stmt with (* return -1; *) | { skind = Return (Some i, loc) } when is_literal_minus_one i -> if errcalls = 0 then printf "%s:%d: %s: may return an error code without calling error/perrorf\n" loc.file loc.line svar.vname else if errcalls > 1 then printf "%s:%d: %s: may call error/perrorf %d times (more than once) along an error path\n" loc.file loc.line svar.vname errcalls (* return 0; *) | { skind = Return (Some i, loc) } when is_literal_zero i -> if errcalls >= 1 then printf "%s:%d: %s: may call error/perrorf along a non-error return path\n" loc.file loc.line svar.vname (* return; (void return) *) | { skind = Return (None, loc) } -> if errcalls >= 1 then printf "%s:%d: %s: may call error/perrorf and return void\n" loc.file loc.line svar.vname | _ -> () with Not_found -> printf "%s:%d: %s: may contain unreachable code\n" loc.file loc.line svar.vname ) f.sallstmts (* Some convenience CIL matching functions. *) and is_literal_minus_one = function | Const (CInt64 (-1L, _, _)) -> true | _ -> false and is_literal_zero = function | Const (CInt64 (0L, _, _)) -> true | _ -> false (* Convenient routine to load the contents of a channel into a list of * strings. *) and input_chan chan = let lines = ref [] in try while true; do lines := input_line chan :: !lines done; [] with End_of_file -> List.rev !lines and input_file filename = let chan = open_in filename in let r = input_chan chan in close_in chan; r let () = try main () with exn -> prerr_endline (Printexc.to_string exn); Printexc.print_backtrace Pervasives.stderr; exit 1
Do not hesitate on the CIL-users mailing-list if you have any question!
Thanks .. We both agreed that documentation and examples leave a lot to be desired.
You might be interested in the extensive tutorial Zach posted to the list recently: http://www.inf.ethz.ch/personal/azachary/teaching/ciltut.pdf
Source is available too: hg clone https://bitbucket.org/zanderso/cil-template
I planed to improve the doc and website, but I’ve postponed this for a (very) long time. Your post is welcome in reminding me to do it as soon as possible.
Thanks – I sent it to Dan too.
There is also Frama-C (http://frama-c.com), an extensible open-source platform based upon Cil and dedicated to the analysis of C programs.
Right — we even package Frama-C in Fedora. However I’ve never really been sure what frama-c is. It starts up a GUI (so no use for our automated test tools). There’s a lot of it and it was a major achievement just to package it. It has a peculiar website …
There are both a GUI and a command-line tool (namely frama-c-gui and frama-c), so you could use it for your automated test tools. Maybe the best pointer to learn what Frama-C is, is the SEFM’12 reference paper : “Frama-C, A Software Analysis Perspective”, and also the “Frama-C User Manual”. Next, if you want to learn how to write your own Frama-C analyzer (in OCaml), you would read the “Frama-C Plug-in Development Guide”.
Thanks — I will read these with interest.