Years ago I played around with CIL to analyze libvirt. More recently Dan used CIL to analyze libvirt’s locking code.
We didn’t get so far either time, but I’ve been taking a deeper look at CIL in an attempt to verify error handling in libguestfs.
Here is my partly working code so far.
(*
* Analyse libguestfs APIs to find error overwriting.
* Copyright (C) 2008-2013 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <http://www.gnu.org/licenses/>.
*
* Author: Daniel P. Berrange <berrange@redhat.com>
* Author: Richard W.M. Jones <rjones@redhat.com>
*)
open Unix
open Printf
open Cil
let debug = ref false
(* Set of ints. *)
module IntSet = Set.Make (struct type t = int let compare = compare end)
(* A module for storing any set (unordered list) of functions. *)
module FunctionSet = Set.Make (
struct
type t = varinfo
let compare v1 v2 = compare v1.vid v2.vid
end
)
(* Directed graph of functions.
*
* Function = a node in the graph
* FunctionDigraph = the directed graph
* FunctionPathChecker = path checker module using Dijkstra's algorithm
*)
module Function =
struct
type t = varinfo
let compare f1 f2 = compare f1.vid f2.vid
let hash f = Hashtbl.hash f.vid
let equal f1 f2 = f1.vid = f2.vid
end
module FunctionDigraph = Graph.Imperative.Digraph.Concrete (Function)
module FunctionPathChecker = Graph.Path.Check (FunctionDigraph)
(* Module used to analyze the paths through each function. *)
module ErrorCounter =
struct
let name = "ErrorCounter"
let debug = debug
(* Our current state is very simple, just the number of error
* function calls did encountered up to this statement.
*)
type t = int
let copy errcalls = errcalls
(* Start data for each statement. *)
let stmtStartData = Inthash.create 97
let printable errcalls = sprintf "(errcalls=%d)" errcalls
let pretty () t = Pretty.text (printable t)
let computeFirstPredecessor stmt x = x (* XXX??? *)
let combinePredecessors stmt ~old:old_t new_t =
if old_t = new_t then None
else Some new_t
(* This will be initialized after we have calculated the set of all
* functions which can call an error function, in main() below.
*)
let error_functions_set = ref FunctionSet.empty
(* Handle a Cil.Instr. *)
let doInstr instr _ =
match instr with
(* A call to an error function. *)
| Call (_, Lval (Var callee, _), _, _)
when FunctionSet.mem callee !error_functions_set ->
Dataflow.Post (fun errcalls -> errcalls+1)
| _ -> Dataflow.Default
(* Handle a Cil.Stmt. *)
let doStmt _ _ = Dataflow.SDefault
(* Handle a Cil.Guard. *)
let doGuard _ _ = Dataflow.GDefault
(* Filter statements we've seen already to prevent loops. *)
let filter_set = ref IntSet.empty
let filterStmt { sid = sid } =
if IntSet.mem sid !filter_set then false
else (
filter_set := IntSet.add sid !filter_set;
true
)
(* Initialize the module before each function that we examine. *)
let init stmts =
filter_set := IntSet.empty;
Inthash.clear stmtStartData;
(* Add the initial statement(s) to the hash. *)
List.iter (fun stmt -> Inthash.add stmtStartData stmt.sid 0) stmts
end
module ForwardsErrorCounter = Dataflow.ForwardsDataFlow (ErrorCounter)
(* The always useful filter + map function. *)
let rec filter_map f = function
| [] -> []
| x :: xs ->
match f x with
| Some y -> y :: filter_map f xs
| None -> filter_map f xs
let rec main () =
(* Read the list of input C files. *)
let files =
let chan = open_process_in "find src -name '*.i' | sort" in
let files = input_chan chan in
if close_process_in chan <> WEXITED 0 then
failwith "failed to read input list of files";
if files = [] then
failwith "no input files; is the program running from the top directory? did you compile with make -C src CFLAGS=\"-save-temps\"?";
files in
(* Load and parse each input file. *)
let files =
List.map (
fun filename ->
printf "loading %s\n%!" filename;
Frontc.parse filename ()
) files in
(* Merge the files. *)
printf "merging files\n%!";
let sourcecode = Mergecil.merge files "libguestfs" in
(* CFG analysis. *)
printf "computing control flow\n%!";
Cfg.computeFileCFG sourcecode;
let functions =
filter_map (function GFun (f, loc) -> Some (f, loc) | _ -> None)
sourcecode.globals in
(* Examine which functions directly call which other functions. *)
printf "computing call graph\n%!";
let call_graph = make_call_graph functions in
(*
FunctionDigraph.iter_edges (
fun caller callee ->
printf "%s calls %s\n" caller.vname callee.vname
) call_graph;
*)
(* The libguestfs error functions. These are global function names,
* but to be any use to us we have to look these up in the list of
* all global functions (ie. 'functions') and turn them into the
* corresponding varinfo structures.
*)
let error_function_names = [ "guestfs_error_errno";
"guestfs_perrorf" ] in
let find_function name =
try List.find (fun ({ svar = { vname = n }}, _) -> n = name) functions
with Not_found -> failwith ("function '" ^ name ^ "' does not exist")
in
let error_function_names = List.map (
fun f -> (fst (find_function f)).svar
) error_function_names in
(* Get a list of functions that might (directly or indirectly) call
* one of the error functions.
*)
let error_functions, non_error_functions =
functions_which_call call_graph error_function_names functions in
(*
List.iter (
fun f -> printf "%s can call an error function\n" f.vname
) error_functions;
List.iter (
fun f -> printf "%s can NOT call an error function\n" f.vname
) non_error_functions;
*)
(* Save the list of error functions in a global set for fast lookups. *)
let set =
List.fold_left (
fun set elt -> FunctionSet.add elt set
) FunctionSet.empty error_functions in
ErrorCounter.error_functions_set := set;
(* Analyze each top-level function to ensure it calls an error
* function exactly once on error paths, and never on normal return
* paths.
*)
printf "analyzing correctness of error paths\n%!";
List.iter compute_error_paths functions;
()
(* Make a directed graph of which functions directly call which other
* functions.
*)
and make_call_graph functions =
let graph = FunctionDigraph.create () in
List.iter (
fun ({ svar = caller; sallstmts = sallstmts }, _) ->
(* Evaluate which other functions 'caller' calls. First pull
* out every 'Call' instruction anywhere in the function.
*)
let insns = List.concat (
filter_map (
function
| { skind = Instr insns } -> Some insns
| _ -> None
) sallstmts
) in
let calls = List.filter (function Call _ -> true | _ -> false) insns in
(* Then examine what function is being called at each place. *)
let callees = filter_map (
function
| Call (_, Lval (Var callee, _), _, _) -> Some callee
| _ -> None
) calls in
List.iter (
fun callee ->
FunctionDigraph.add_edge graph caller callee
) callees
) functions;
graph
(* [functions_which_call g endpoints functions] partitions the
* [functions] list, returning those functions that call directly or
* indirectly one of the functions in [endpoints], and a separate list
* of functions which do not. [g] is the direct call graph.
*)
and functions_which_call g endpoints functions =
let functions = List.map (fun ({ svar = svar }, _) -> svar) functions in
let checker = FunctionPathChecker.create g in
List.partition (
fun f ->
(* Does a path exist from f to any of the endpoints? *)
List.exists (
fun endpoint ->
try FunctionPathChecker.check_path checker f endpoint
with
(* It appears safe to ignore this exception. It seems to
* mean that this function is in a part of the graph which
* is completely disconnected from the other part of the graph
* that contains the endpoint.
*)
| Invalid_argument "[ocamlgraph] iter_succ" -> false
) endpoints
) functions
and compute_error_paths ({ svar = svar } as f, loc) =
(*ErrorCounter.debug := true;*)
(* Find the initial statement in this function (assumes that the
* function can only be entered in one place, which is normal for C
* functions).
*)
let initial_stmts =
match f.sbody.bstmts with
| [] -> []
| first::_ -> [first] in
(* Initialize ErrorCounter. *)
ErrorCounter.init initial_stmts;
(* Compute the error counters along paths through the function. *)
ForwardsErrorCounter.compute initial_stmts;
(* Process all Return statements in this function. *)
List.iter (
fun stmt ->
try
let errcalls = Inthash.find ErrorCounter.stmtStartData stmt.sid in
match stmt with
(* return -1; *)
| { skind = Return (Some i, loc) } when is_literal_minus_one i ->
if errcalls = 0 then
printf "%s:%d: %s: may return an error code without calling error/perrorf\n"
loc.file loc.line svar.vname
else if errcalls > 1 then
printf "%s:%d: %s: may call error/perrorf %d times (more than once) along an error path\n"
loc.file loc.line svar.vname errcalls
(* return 0; *)
| { skind = Return (Some i, loc) } when is_literal_zero i ->
if errcalls >= 1 then
printf "%s:%d: %s: may call error/perrorf along a non-error return path\n"
loc.file loc.line svar.vname
(* return; (void return) *)
| { skind = Return (None, loc) } ->
if errcalls >= 1 then
printf "%s:%d: %s: may call error/perrorf and return void\n"
loc.file loc.line svar.vname
| _ -> ()
with
Not_found ->
printf "%s:%d: %s: may contain unreachable code\n"
loc.file loc.line svar.vname
) f.sallstmts
(* Some convenience CIL matching functions. *)
and is_literal_minus_one = function
| Const (CInt64 (-1L, _, _)) -> true
| _ -> false
and is_literal_zero = function
| Const (CInt64 (0L, _, _)) -> true
| _ -> false
(* Convenient routine to load the contents of a channel into a list of
* strings.
*)
and input_chan chan =
let lines = ref [] in
try while true; do lines := input_line chan :: !lines done; []
with End_of_file -> List.rev !lines
and input_file filename =
let chan = open_in filename in
let r = input_chan chan in
close_in chan;
r
let () =
try main ()
with
exn ->
prerr_endline (Printexc.to_string exn);
Printexc.print_backtrace Pervasives.stderr;
exit 1