Tag Archives: libvirt

Fedora 19 virtualization test day 2013-05-28

Put it in your calendars .. May 28th is Fedora 19 virtualization test day.

New features include nested virtualization on Intel, new Boxes, new libosinfo, new qemu, KMS-based spice driver, live storage migration and virtio RNG.

Every day is libguestfs test day. Just follow the instructions here.

2 Comments

Filed under Uncategorized

More static analysis with CIL

Years ago I played around with CIL to analyze libvirt. More recently Dan used CIL to analyze libvirt’s locking code.

We didn’t get so far either time, but I’ve been taking a deeper look at CIL in an attempt to verify error handling in libguestfs.

Here is my partly working code so far.

(*
 * Analyse libguestfs APIs to find error overwriting.
 * Copyright (C) 2008-2013 Red Hat, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library.  If not, see
 * <http://www.gnu.org/licenses/>.
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 * Author: Richard W.M. Jones <rjones@redhat.com>
 *)

open Unix
open Printf

open Cil

let debug = ref false

(* Set of ints. *)
module IntSet = Set.Make (struct type t = int let compare = compare end)

(* A module for storing any set (unordered list) of functions. *)
module FunctionSet = Set.Make (
  struct
    type t = varinfo
    let compare v1 v2 = compare v1.vid v2.vid
  end
)

(* Directed graph of functions.
 *
 * Function = a node in the graph
 * FunctionDigraph = the directed graph
 * FunctionPathChecker = path checker module using Dijkstra's algorithm
 *)
module Function =
struct
  type t = varinfo
  let compare f1 f2 = compare f1.vid f2.vid
  let hash f = Hashtbl.hash f.vid
  let equal f1 f2 = f1.vid = f2.vid
end
module FunctionDigraph = Graph.Imperative.Digraph.Concrete (Function)
module FunctionPathChecker = Graph.Path.Check (FunctionDigraph)

(* Module used to analyze the paths through each function. *)
module ErrorCounter =
struct
  let name = "ErrorCounter"
  let debug = debug

  (* Our current state is very simple, just the number of error
   * function calls did encountered up to this statement.
   *)
  type t = int

  let copy errcalls = errcalls

  (* Start data for each statement. *)
  let stmtStartData = Inthash.create 97

  let printable errcalls = sprintf "(errcalls=%d)" errcalls

  let pretty () t = Pretty.text (printable t)

  let computeFirstPredecessor stmt x = x (* XXX??? *)

  let combinePredecessors stmt ~old:old_t new_t =
    if old_t = new_t then None
    else Some new_t

  (* This will be initialized after we have calculated the set of all
   * functions which can call an error function, in main() below.
   *)
  let error_functions_set = ref FunctionSet.empty

  (* Handle a Cil.Instr. *)
  let doInstr instr _ =
    match instr with
    (* A call to an error function. *)
    | Call (_, Lval (Var callee, _), _, _)
        when FunctionSet.mem callee !error_functions_set ->
      Dataflow.Post (fun errcalls -> errcalls+1)

    | _ -> Dataflow.Default

  (* Handle a Cil.Stmt. *)
  let doStmt _ _ = Dataflow.SDefault

  (* Handle a Cil.Guard. *)
  let doGuard _ _ = Dataflow.GDefault

  (* Filter statements we've seen already to prevent loops. *)
  let filter_set = ref IntSet.empty
  let filterStmt { sid = sid } =
    if IntSet.mem sid !filter_set then false
    else (
      filter_set := IntSet.add sid !filter_set;
      true
    )

  (* Initialize the module before each function that we examine. *)
  let init stmts =
    filter_set := IntSet.empty;
    Inthash.clear stmtStartData;
    (* Add the initial statement(s) to the hash. *)
    List.iter (fun stmt -> Inthash.add stmtStartData stmt.sid 0) stmts
end

module ForwardsErrorCounter = Dataflow.ForwardsDataFlow (ErrorCounter)

(* The always useful filter + map function. *)
let rec filter_map f = function
  | [] -> []
  | x :: xs ->
      match f x with
      | Some y -> y :: filter_map f xs
      | None -> filter_map f xs

let rec main () =
  (* Read the list of input C files. *)
  let files =
    let chan = open_process_in "find src -name '*.i' | sort" in
    let files = input_chan chan in
    if close_process_in chan <> WEXITED 0 then
      failwith "failed to read input list of files";
    if files = [] then
      failwith "no input files; is the program running from the top directory? did you compile with make -C src CFLAGS=\"-save-temps\"?";
    files in

  (* Load and parse each input file. *)
  let files =
    List.map (
      fun filename ->
        printf "loading %s\n%!" filename;
        Frontc.parse filename ()
    ) files in

  (* Merge the files. *)
  printf "merging files\n%!";
  let sourcecode = Mergecil.merge files "libguestfs" in

  (* CFG analysis. *)
  printf "computing control flow\n%!";
  Cfg.computeFileCFG sourcecode;

  let functions =
    filter_map (function GFun (f, loc) -> Some (f, loc) | _ -> None)
      sourcecode.globals in

  (* Examine which functions directly call which other functions. *)
  printf "computing call graph\n%!";
  let call_graph = make_call_graph functions in
  (*
  FunctionDigraph.iter_edges (
    fun caller callee ->
      printf "%s calls %s\n" caller.vname callee.vname
  ) call_graph;
  *)

  (* The libguestfs error functions.  These are global function names,
   * but to be any use to us we have to look these up in the list of
   * all global functions (ie. 'functions') and turn them into the
   * corresponding varinfo structures.
   *)
  let error_function_names = [ "guestfs_error_errno";
                               "guestfs_perrorf" ] in

  let find_function name =
    try List.find (fun ({ svar = { vname = n }}, _) -> n = name) functions
    with Not_found -> failwith ("function '" ^ name ^ "' does not exist")
  in
  let error_function_names = List.map (
    fun f -> (fst (find_function f)).svar
  ) error_function_names in

  (* Get a list of functions that might (directly or indirectly) call
   * one of the error functions.
   *)
  let error_functions, non_error_functions =
    functions_which_call call_graph error_function_names functions in

  (*
  List.iter (
    fun f -> printf "%s can call an error function\n" f.vname
  ) error_functions;

  List.iter (
    fun f -> printf "%s can NOT call an error function\n" f.vname
  ) non_error_functions;
  *)

  (* Save the list of error functions in a global set for fast lookups. *)
  let set =
    List.fold_left (
      fun set elt -> FunctionSet.add elt set
    ) FunctionSet.empty error_functions in
  ErrorCounter.error_functions_set := set;

  (* Analyze each top-level function to ensure it calls an error
   * function exactly once on error paths, and never on normal return
   * paths.
   *)
  printf "analyzing correctness of error paths\n%!";
  List.iter compute_error_paths functions;

  ()

(* Make a directed graph of which functions directly call which other
 * functions.
 *)
and make_call_graph functions =
  let graph = FunctionDigraph.create () in

  List.iter (
    fun ({ svar = caller; sallstmts = sallstmts }, _) ->
      (* Evaluate which other functions 'caller' calls.  First pull
       * out every 'Call' instruction anywhere in the function.
       *)
      let insns =  List.concat (
        filter_map (
          function
          | { skind = Instr insns } -> Some insns
          | _ -> None
        ) sallstmts
      ) in
      let calls = List.filter (function Call _ -> true | _ -> false) insns in
      (* Then examine what function is being called at each place. *)
      let callees = filter_map (
        function
        | Call (_, Lval (Var callee, _), _, _) -> Some callee
        | _ -> None
      ) calls in

      List.iter (
        fun callee ->
          FunctionDigraph.add_edge graph caller callee
      ) callees
  ) functions;

  graph

(* [functions_which_call g endpoints functions] partitions the
 * [functions] list, returning those functions that call directly or
 * indirectly one of the functions in [endpoints], and a separate list
 * of functions which do not.  [g] is the direct call graph.
 *)
and functions_which_call g endpoints functions =
  let functions = List.map (fun ({ svar = svar }, _) -> svar) functions in

  let checker = FunctionPathChecker.create g in
  List.partition (
    fun f ->
      (* Does a path exist from f to any of the endpoints? *)
      List.exists (
        fun endpoint ->
          try FunctionPathChecker.check_path checker f endpoint
          with
          (* It appears safe to ignore this exception.  It seems to
           * mean that this function is in a part of the graph which
           * is completely disconnected from the other part of the graph
           * that contains the endpoint.
           *)
          | Invalid_argument "[ocamlgraph] iter_succ" -> false
      ) endpoints
  ) functions

and compute_error_paths ({ svar = svar } as f, loc) =
  (*ErrorCounter.debug := true;*)

  (* Find the initial statement in this function (assumes that the
   * function can only be entered in one place, which is normal for C
   * functions).
   *)
  let initial_stmts =
    match f.sbody.bstmts with
    | [] -> []
    | first::_ -> [first] in

  (* Initialize ErrorCounter. *)
  ErrorCounter.init initial_stmts;

  (* Compute the error counters along paths through the function. *)
  ForwardsErrorCounter.compute initial_stmts;

  (* Process all Return statements in this function. *)
  List.iter (
    fun stmt ->
      try
        let errcalls = Inthash.find ErrorCounter.stmtStartData stmt.sid in

        match stmt with
        (* return -1; *)
        | { skind = Return (Some i, loc) } when is_literal_minus_one i ->
          if errcalls = 0 then
            printf "%s:%d: %s: may return an error code without calling error/perrorf\n"
              loc.file loc.line svar.vname
          else if errcalls > 1 then
            printf "%s:%d: %s: may call error/perrorf %d times (more than once) along an error path\n"
          loc.file loc.line svar.vname errcalls

        (* return 0; *)
        | { skind = Return (Some i, loc) } when is_literal_zero i ->
          if errcalls >= 1 then
            printf "%s:%d: %s: may call error/perrorf along a non-error return path\n"
              loc.file loc.line svar.vname

        (* return; (void return) *)
        | { skind = Return (None, loc) } ->
          if errcalls >= 1 then
            printf "%s:%d: %s: may call error/perrorf and return void\n"
              loc.file loc.line svar.vname

        | _ -> ()

      with
        Not_found ->
          printf "%s:%d: %s: may contain unreachable code\n"
            loc.file loc.line svar.vname
  ) f.sallstmts

(* Some convenience CIL matching functions. *)
and is_literal_minus_one = function
  | Const (CInt64 (-1L, _, _)) -> true
  | _ -> false

and is_literal_zero = function
  | Const (CInt64 (0L, _, _)) -> true
  | _ -> false

(* Convenient routine to load the contents of a channel into a list of
 * strings.
 *)
and input_chan chan =
  let lines = ref [] in
  try while true; do lines := input_line chan :: !lines done; []
  with End_of_file -> List.rev !lines

and input_file filename =
  let chan = open_in filename in
  let r = input_chan chan in
  close_in chan;
  r

let () =
  try main ()
  with 
    exn ->
      prerr_endline (Printexc.to_string exn);
      Printexc.print_backtrace Pervasives.stderr;
      exit 1

8 Comments

Filed under Uncategorized

Extracting filesystems from guest images, reconstructing guest images from filesystems, part 3

In part 1 I “exploded” a disk image into its constituent filesystems. In part 2 I “imploded” those filesystems back into a disk image without LVM. Now let’s see if we can get this thing to boot.

Since there is no boot sector nor grub, the disk image produced by virt-implode won’t boot normally. You could boot it using an external kernel and initrd, but let’s see if we can install grub first.

Using virt-rescue we can interactively run programs from the guest:

$ virt-rescue -a output.img
...
><rescue> mount /dev/sda2 /sysroot
><rescue> mount /dev/sda1 /sysroot/boot
><rescue> mount --bind /dev /sysroot/dev
><rescue> mount --bind /sys /sysroot/sys
><rescue> mount --bind /proc /sysroot/proc
><rescue> chroot /sysroot
sh: no job control in this shell
><rescue> cat /etc/redhat-release
Red Hat Enterprise Linux AS release 4 (Nahant Update 8)
><rescue> vi /etc/fstab

You may need to fix /etc/fstab in the guest so that it points to the new partitions. For guests using LABELs or UUIDs, this won’t be necessary.

At this point, it should be simply a matter of running grub-install. But here’s where I remember how much I hate grub, because it just throws up peculiar, non-actionable error messages for every conceivable variation in the command.

So I give up that, and decide to extract the kernel and initrd and use the external boot method after all:

$ guestfish --ro -a output.img -i

Welcome to guestfish, the libguestfs filesystem interactive shell for
editing virtual machine filesystems.

Type: 'help' for help on commands
      'man' to read the manual
      'quit' to quit the shell

Operating system: Red Hat Enterprise Linux AS release 4 (Nahant Update 8)
/dev/sda2 mounted on /
/dev/sda1 mounted on /boot

><fs> ll /boot
total 4397
drwxr-xr-x.  5 root root    1024 Nov 15 13:49 .
drwxr-xr-x. 24 root root    4096 Nov 15 13:50 ..
-rw-r--r--.  1 root root  909034 Apr 20  2009 System.map-2.6.9-89.EL
drwxr-xr-x   3 root root    1024 Nov 15 13:49 boot
-rw-r--r--.  1 root root   45145 Apr 20  2009 config-2.6.9-89.EL
drwxr-xr-x.  2 root root    1024 Nov 15 13:58 grub
-rw-r--r--.  1 root root 1546843 Oct 27  2010 initrd-2.6.9-89.EL.img
drwx------   2 root root   12288 Oct 27  2010 lost+found
-rw-r--r--.  1 root root   23108 Aug  3  2005 message
-rw-r--r--.  1 root root   21282 Aug  3  2005 message.ja
-rw-r--r--.  1 root root   67352 Apr 20  2009 symvers-2.6.9-89.EL.gz
-rw-r--r--.  1 root root 1829516 Apr 20  2009 vmlinuz-2.6.9-89.EL
><fs> download /boot/vmlinuz-2.6.9-89.EL /tmp/vmlinuz-2.6.9-89.EL
><fs> download /boot/initrd-2.6.9-89.EL.img /tmp/initrd-2.6.9-89.EL.img
$ qemu-kvm -m 512 \
  -kernel vmlinuz-2.6.9-89.EL \
  -initrd initrd-2.6.9-89.EL.img \
  -append 'ro root=/dev/hda2' \
  -hda output.img

(I should really use libvirt, but this quick test proves that the guest works fine)

1 Comment

Filed under Uncategorized

Thursday is Fedora virt test day

Thursday (1st Nov) is Fedora virtualization test day. Help us out by testing libguestfs!

Fedora 18 has definitely been a struggle. It is possibly the most delayed Fedora release ever. In libguestfs (in Fedora only) we switched to using libvirt to launch the appliance, revealing a lot of bugs and problems in libvirt in the process.

At the same time we’ve added dozens of major new features to libguestfs.

So there’s likely to be a lot of bugs, and you can make a difference.

Leave a Comment

Filed under Uncategorized

Hotplugging in libguestfs

I just posted a new set of patches upstream which add hotplugging support for libguestfs.

Since the project started, most libguestfs programs have looked like this:

/* create the handle */
g = guestfs_create ();
/* add 1 or more disks to examine */
guestfs_add_drive (g, "disk.img");
/* launch the handle */
guestfs_launch (g);

It was not possible to add further disks after launch, because of the way libguestfs works: You could not add disks to the small appliance that we use after the appliance has started up.

Except that you can: Linux and qemu have supported hotplugging disks for a long time, but we didn’t expose this through libguestfs.

Now that you can use libvirt to run the appliance it becomes relatively easy for us to add hotplugging, implemented via libvirt’s virDomainAttachDevice API. (The actual implementation inside libvirt is very complex, hence the reason why we didn’t try to reimplement it in libguestfs). From the point of view of libguestfs callers, you can now call the add-drive* APIs after launch.

All good news? Apart from the obvious limitation that you have to be using the libvirt backend for it to work at all, is it a good idea to use hotplugging?

From a security point of view, there is a trade-off: If you have to modify lots of guests, it’s faster to use hotplugging because you save having to start a new appliance for each guest. But it’s also less secure (significantly so) because one guest may be able to exploit and hijack the appliance and interfere with the disks of other appliances.

In virt-df we accept this risk. It is largely mitigated because the disks are read-only so an exploit from one guest cannot make a permanent change to any other guest.

But if you are considering using hotplugging to modify lots of mutually untrustworthy disks, don’t do it. Either use one appliance per group of mutually trusting guests, or (easier) just launch an appliance per guest. (Reading this page may help you to get the best performance in this case).

Leave a Comment

Filed under Uncategorized

New in libguestfs: Use libvirt to launch the appliance

libvirt logoFrom libguestfs ≥ 1.19.24 you can, if you want, use libvirt to manage the libguestfs appliance. To enable this, set the environment variable:

export LIBGUESTFS_ATTACH_METHOD=libvirt

and (hopefully) any libguestfs program, guestfish, virt tool etc will just work as normal. As well as the absolute latest libguestfs you’ll also need libvirt ≥ 0.9.13 and a recent qemu.

What’s the point of this change? There are a couple of advantages. Firstly libvirt confines guests using sVirt mandatory access control (using SELinux or AppArmor). This will [it's not working right now] provide additional defences against rogue disk images subverting the appliance or qemu.

Secondly libvirt already does disk hotplugging, and it will allow us to add this easily to libguestfs.

2 Comments

Filed under Uncategorized

Using libguestfs live without libvirt

You should use libguestfs live with libvirt because it takes the pain out of it.

However if for some reason you don’t want to use libvirt, here is how to use it directly. Note normally you should use libvirt and you do not need to do any of this!

(1) Your guest needs to have guestfsd -r installed and running. [Normally you just install libguestfs-live-service which does everything necessary]

(2) You need to start up qemu (the guest) with these special options, or the equivalent. [Normally you just set this up through libvirt]

-chardev pty,id=charserial0
-device isa-serial,chardev=charserial0,id=serial0
-chardev socket,id=charchannel0,path=/tmp/socket,server,nowait
-device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=org.libguestfs.channel.0

(3) Start guestfish and use the attach-method command to point to the socket. [Normally using '-d Guest --live' does all this automatically for you by getting the necessary information out of libvirt]

$ guestfish
><fs> attach-method unix:/tmp/socket
><fs> run
><fs> ll /
[use libguestfs command as normal here]

Leave a Comment

Filed under Uncategorized

Tip: Detecting guest activity: three methods

(1) Is the guest generating disk and network activity (a.k.a are the lights flashing)?

I don’t have nice code for this, but you can see the technique that Oz uses here. Oz uses the libvirt monitoring APIs to look for disk and network activity, and signals when it hasn’t seen any after a certain timeout period.

(2) When was the last time a user logged in?

If this is the sort of “activity” you’re after, then you can use virt-cat on Linux, or virt-win-reg [sorry, no actual example yet] on Windows.

(3) What was the last file updated in a guest?

Finally, if you’re interested in the newest file updated in a guest, see this technique which will work for any Linux or Windows guest.

Are there other kinds of “activity” that it’s interesting to find from guests?

Leave a Comment

Filed under Uncategorized

Half-baked idea: Autodetect dependencies

For more half-baked ideas, see the ideas tag.

Large “coordination” libraries like libvirt unify a lot of disparate features through one API, and as a result they depend on many other libraries and external programs. Libvirt directly links to 20 libraries and requires countless other external programs.

We want users to be able to compile libvirt even when they don’t have the full set of libraries (you don’t need, say, PolKit, libvirt will still work with reduced functionality). The issue though is you end up with code which looks like:

switch (cred[i].type) {
case VIR_CRED_EXTERNAL: {
    if (STRNEQ(cred[i].challenge, "PolicyKit"))
        return -1;

#if defined(POLKIT_AUTH)
    if (virConnectAuthGainPolkit(cred[i].prompt) < 0)
        return -1;
#else
    /* Ignore & carry on. Although we can't auth
     * directly, the user may have authenticated
     * themselves already outside context of libvirt
     */
#endif
    break;
}

This code is fragile because (a) it’s hard to reason about all the pathways and (b) it’s combinatorially difficult to test all the different permutations of available libraries. This fragility leads to bug reports and possibly worse.

Before I get to the half-baked idea, I’ll throw in another thought: at the moment we do most of this detection at compile time using a long configure script. It might be better to do it at run time. You could imagine how this could work if you were a very patient programmer who liked writing tedious boilerplate:

libaudit = dlopen ("libaudit.so", 0);
//...
if (libaudit) {
  int (*audit_add_watch) (struct audit_rule_data **rulep,
                          const char *path);
  audit_add_watch = dlsym (libaudit, "audit_add_watch");
  if (audit_add_watch)
    r = audit_add_watch (rule, path);
  else
    goto no_func;
} else {
 no_func:
  // no libaudit, do something else
}

The half-baked idea is this: Write the code as if all the functions exist. Then transform the code into the runtime/dlsym version above. In the first iteration, for each libvirt API entry point we compute the sum of all optional libraries/functions that are required to execute that entry point, and we generate checks like this:

virFoo ()
{
  // The following checks are generated automatically:
  if (!libaudit)
    return error ("virFoo: you need to install libaudit");
  if (!libaudit_audit_add_watch)
    return error ("virFoo: wrong version of libaudit, "
                  "requires audit_add_watch function");
  //..
  // Here we run the programmer's code:
  //..
}

The first iteration is very conservative. In the second iteration of the project we’d allow the programmer to write fallback code, so that partial API functionality is available even if not all the libraries are. But how to do that and avoid the #ifdef problem?

I think you should be allowed to write alternate functions:

authenticate ()
{
  return polkit_context_is_caller_authorized (pkcontext, ...);
}

authenticate ()
{
  return 1;
}

(Remember this is not C, but some sort of C with transformations applied to it).

Our C transformation chooses the “best” function to call at runtime, where best is simply the one which has the most libraries available. In the above case, the first version of authenticate is chosen if the PolKit library is available, the second version if not.

5 Comments

Filed under Uncategorized

Setting up a serial console in qemu and libvirt

I always forget how to do this, but in fact it’s quite simple.

First ensure your libvirt XML contains a fragment like this (my guest, installed using virt-install, already had this).

<serial type='pty'>
  <target port='0'/>
</serial>
<console type='pty'>
  <target type='serial' port='0'/>
</console>

Second, edit /boot/grub/grub.conf inside the guest, adding the console=ttyS0 element to the kernel command line:

# virt-edit Guestname /boot/grub/grub.conf
...
title Fedora (2.6.38.6-26.rc1.fc15.x86_64)
	root (hd0,0)
	kernel /vmlinuz ro [...] console=ttyS0
...

You don’t need to set the speed. I believe it defaults to 115200 8N1, but I don’t think that qemu serial ports have a “speed” as such, since the hardware is emulated.

Third, start the guest and dump out the running XML:

# virsh start Guestname && virsh dumpxml Guestname
...
    <console type='pty' tty='/dev/pts/8'>

Notice the randomly assigned pty on the host side (/dev/pts/8). Connect to that with Minicom[1], and you should see boot messages and/or a login prompt.

[1] Is there something better than minicom? It’s a horrible program, always has been.

15 Comments

Filed under Uncategorized