Years ago I played around with CIL to analyze libvirt. More recently Dan used CIL to analyze libvirt’s locking code.

We didn’t get so far either time, but I’ve been taking a deeper look at CIL in an attempt to verify error handling in libguestfs.

Here is my partly working code so far.

*(**
* * Analyse libguestfs APIs to find error overwriting.*
* * Copyright (C) 2008-2013 Red Hat, Inc.*
* **
* * This library is free software; you can redistribute it and/or*
* * modify it under the terms of the GNU Lesser General Public*
* * License as published by the Free Software Foundation; either*
* * version 2.1 of the License, or (at your option) any later version.*
* **
* * This library is distributed in the hope that it will be useful,*
* * but WITHOUT ANY WARRANTY; without even the implied warranty of*
* * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU*
* * Lesser General Public License for more details.*
* **
* * You should have received a copy of the GNU Lesser General Public*
* * License along with this library. If not, see*
* * <http://www.gnu.org/licenses/>.*
* **
* * Author: Daniel P. Berrange <berrange@redhat.com>*
* * Author: Richard W.M. Jones <rjones@redhat.com>*
* *)*
**open** Unix
**open** Printf
**open** Cil
**let** debug = ref **false**
*(* Set of ints. *)*
**module** IntSet = **Set**.Make (**struct** **type** t = int **let** compare = compare **end**)
*(* A module for storing any set (unordered list) of functions. *)*
**module** FunctionSet = **Set**.Make (
**struct**
**type** t = varinfo
**let** compare v1 v2 = compare v1.vid v2.vid
**end**
)
*(* Directed graph of functions.*
* **
* * Function = a node in the graph*
* * FunctionDigraph = the directed graph*
* * FunctionPathChecker = path checker module using Dijkstra's algorithm*
* *)*
**module** Function =
**struct**
**type** t = varinfo
**let** compare f1 f2 = compare f1.vid f2.vid
**let** hash f = **Hashtbl**.hash f.vid
**let** equal f1 f2 = f1.vid = f2.vid
**end**
**module** FunctionDigraph = **Graph**.**Imperative**.**Digraph**.Concrete (Function)
**module** FunctionPathChecker = **Graph**.**Path**.Check (FunctionDigraph)
*(* Module used to analyze the paths through each function. *)*
**module** ErrorCounter =
**struct**
**let** name = "ErrorCounter"
**let** debug = debug
*(* Our current state is very simple, just the number of error*
* * function calls did encountered up to this statement.*
* *)*
**type** t = int
**let** copy errcalls = errcalls
*(* Start data for each statement. *)*
**let** stmtStartData = **Inthash**.create 97
**let** printable errcalls = sprintf "(errcalls=%d)" errcalls
**let** pretty () t = **Pretty**.text (printable t)
**let** computeFirstPredecessor stmt x = x *(* XXX??? *)*
**let** combinePredecessors stmt ~old:old_t new_t =
**if** old_t = new_t **then** None
**else** Some new_t
*(* This will be initialized after we have calculated the set of all*
* * functions which can call an error function, in main() below.*
* *)*
**let** error_functions_set = ref **FunctionSet**.empty
*(* Handle a Cil.Instr. *)*
**let** doInstr instr _ =
**match** instr **with**
*(* A call to an error function. *)*
| Call (_, Lval (Var callee, _), _, _)
**when** **FunctionSet**.mem callee !error_functions_set ->
**Dataflow**.Post (**fun** errcalls -> errcalls+1)
| _ -> **Dataflow**.Default
*(* Handle a Cil.Stmt. *)*
**let** doStmt _ _ = **Dataflow**.SDefault
*(* Handle a Cil.Guard. *)*
**let** doGuard _ _ = **Dataflow**.GDefault
*(* Filter statements we've seen already to prevent loops. *)*
**let** filter_set = ref **IntSet**.empty
**let** filterStmt { sid = sid } =
**if** **IntSet**.mem sid !filter_set **then** **false**
**else** (
filter_set := **IntSet**.add sid !filter_set;
**true**
)
*(* Initialize the module before each function that we examine. *)*
**let** init stmts =
filter_set := **IntSet**.empty;
**Inthash**.clear stmtStartData;
*(* Add the initial statement(s) to the hash. *)*
**List**.iter (**fun** stmt -> **Inthash**.add stmtStartData stmt.sid 0) stmts
**end**
**module** ForwardsErrorCounter = **Dataflow**.ForwardsDataFlow (ErrorCounter)
*(* The always useful filter + map function. *)*
**let** **rec** filter_map f = **function**
| [] -> []
| x :: xs ->
**match** f x **with**
| Some y -> y :: filter_map f xs
| None -> filter_map f xs
**let** **rec** main () =
*(* Read the list of input C files. *)*
**let** files =
**let** chan = open_process_in "find src -name '*.i' | sort" **in**
**let** files = input_chan chan **in**
**if** close_process_in chan <> WEXITED 0 **then**
failwith "failed to read input list of files";
**if** files = [] **then**
failwith "no input files; is the program running from the top directory? did you compile with make -C src CFLAGS=\"-save-temps\"?";
files **in**
*(* Load and parse each input file. *)*
**let** files =
**List**.map (
**fun** filename ->
printf "loading %s\n%!" filename;
**Frontc**.parse filename ()
) files **in**
*(* Merge the files. *)*
printf "merging files\n%!";
**let** sourcecode = **Mergecil**.merge files "libguestfs" **in**
*(* CFG analysis. *)*
printf "computing control flow\n%!";
**Cfg**.computeFileCFG sourcecode;
**let** functions =
filter_map (**function** GFun (f, loc) -> Some (f, loc) | _ -> None)
sourcecode.globals **in**
*(* Examine which functions directly call which other functions. *)*
printf "computing call graph\n%!";
**let** call_graph = make_call_graph functions **in**
*(**
* FunctionDigraph.iter_edges (*
* fun caller callee ->*
* printf "%s calls %s\n" caller.vname callee.vname*
* ) call_graph;*
* *)*
*(* The libguestfs error functions. These are global function names,*
* * but to be any use to us we have to look these up in the list of*
* * all global functions (ie. 'functions') and turn them into the*
* * corresponding varinfo structures.*
* *)*
**let** error_function_names = [ "guestfs_error_errno";
"guestfs_perrorf" ] **in**
**let** find_function name =
**try** **List**.find (**fun** ({ svar = { vname = n }}, _) -> n = name) functions
**with** Not_found -> failwith ("function '" ^ name ^ "' does not exist")
**in**
**let** error_function_names = **List**.map (
**fun** f -> (fst (find_function f)).svar
) error_function_names **in**
*(* Get a list of functions that might (directly or indirectly) call*
* * one of the error functions.*
* *)*
**let** error_functions, non_error_functions =
functions_which_call call_graph error_function_names functions **in**
*(**
* List.iter (*
* fun f -> printf "%s can call an error function\n" f.vname*
* ) error_functions;*
* List.iter (*
* fun f -> printf "%s can NOT call an error function\n" f.vname*
* ) non_error_functions;*
* *)*
*(* Save the list of error functions in a global set for fast lookups. *)*
**let** set =
**List**.fold_left (
**fun** set elt -> **FunctionSet**.add elt set
) **FunctionSet**.empty error_functions **in**
**ErrorCounter**.error_functions_set := set;
*(* Analyze each top-level function to ensure it calls an error*
* * function exactly once on error paths, and never on normal return*
* * paths.*
* *)*
printf "analyzing correctness of error paths\n%!";
**List**.iter compute_error_paths functions;
()
*(* Make a directed graph of which functions directly call which other*
* * functions.*
* *)*
**and** make_call_graph functions =
**let** graph = **FunctionDigraph**.create () **in**
**List**.iter (
**fun** ({ svar = caller; sallstmts = sallstmts }, _) ->
*(* Evaluate which other functions 'caller' calls. First pull*
* * out every 'Call' instruction anywhere in the function.*
* *)*
**let** insns = **List**.concat (
filter_map (
**function**
| { skind = Instr insns } -> Some insns
| _ -> None
) sallstmts
) **in**
**let** calls = **List**.filter (**function** Call _ -> **true** | _ -> **false**) insns **in**
*(* Then examine what function is being called at each place. *)*
**let** callees = filter_map (
**function**
| Call (_, Lval (Var callee, _), _, _) -> Some callee
| _ -> None
) calls **in**
**List**.iter (
**fun** callee ->
**FunctionDigraph**.add_edge graph caller callee
) callees
) functions;
graph
*(* [functions_which_call g endpoints functions] partitions the*
* * [functions] list, returning those functions that call directly or*
* * indirectly one of the functions in [endpoints], and a separate list*
* * of functions which do not. [g] is the direct call graph.*
* *)*
**and** functions_which_call g endpoints functions =
**let** functions = **List**.map (**fun** ({ svar = svar }, _) -> svar) functions **in**
**let** checker = **FunctionPathChecker**.create g **in**
**List**.partition (
**fun** f ->
*(* Does a path exist from f to any of the endpoints? *)*
**List**.exists (
**fun** endpoint ->
**try** **FunctionPathChecker**.check_path checker f endpoint
**with**
*(* It appears safe to ignore this exception. It seems to*
* * mean that this function is in a part of the graph which*
* * is completely disconnected from the other part of the graph*
* * that contains the endpoint.*
* *)*
| Invalid_argument "[ocamlgraph] iter_succ" -> **false**
) endpoints
) functions
**and** compute_error_paths ({ svar = svar } **as** f, loc) =
*(*ErrorCounter.debug := true;*)*
*(* Find the initial statement in this function (assumes that the*
* * function can only be entered in one place, which is normal for C*
* * functions).*
* *)*
**let** initial_stmts =
**match** f.sbody.bstmts **with**
| [] -> []
| first::_ -> [first] **in**
*(* Initialize ErrorCounter. *)*
**ErrorCounter**.init initial_stmts;
*(* Compute the error counters along paths through the function. *)*
**ForwardsErrorCounter**.compute initial_stmts;
*(* Process all Return statements in this function. *)*
**List**.iter (
**fun** stmt ->
**try**
**let** errcalls = **Inthash**.find **ErrorCounter**.stmtStartData stmt.sid **in**
**match** stmt **with**
*(* return -1; *)*
| { skind = Return (Some i, loc) } **when** is_literal_minus_one i ->
**if** errcalls = 0 **then**
printf "%s:%d: %s: may return an error code without calling error/perrorf\n"
loc.file loc.line svar.vname
**else** **if** errcalls > 1 **then**
printf "%s:%d: %s: may call error/perrorf %d times (more than once) along an error path\n"
loc.file loc.line svar.vname errcalls
*(* return 0; *)*
| { skind = Return (Some i, loc) } **when** is_literal_zero i ->
**if** errcalls >= 1 **then**
printf "%s:%d: %s: may call error/perrorf along a non-error return path\n"
loc.file loc.line svar.vname
*(* return; (void return) *)*
| { skind = Return (None, loc) } ->
**if** errcalls >= 1 **then**
printf "%s:%d: %s: may call error/perrorf and return void\n"
loc.file loc.line svar.vname
| _ -> ()
**with**
Not_found ->
printf "%s:%d: %s: may contain unreachable code\n"
loc.file loc.line svar.vname
) f.sallstmts
*(* Some convenience CIL matching functions. *)*
**and** is_literal_minus_one = **function**
| Const (CInt64 (-1L, _, _)) -> **true**
| _ -> **false**
**and** is_literal_zero = **function**
| Const (CInt64 (0L, _, _)) -> **true**
| _ -> **false**
*(* Convenient routine to load the contents of a channel into a list of*
* * strings.*
* *)*
**and** input_chan chan =
**let** lines = ref [] **in**
**try** **while** **true**; **do** lines := input_line chan :: !lines **done**; []
**with** End_of_file -> **List**.rev !lines
**and** input_file filename =
**let** chan = open_in filename **in**
**let** r = input_chan chan **in**
close_in chan;
r
**let** () =
**try** main ()
**with**
exn ->
prerr_endline (**Printexc**.to_string exn);
**Printexc**.print_backtrace **Pervasives**.stderr;
exit 1