Tuesday, February 24, 2015

PauseMonitor - take action when a VM resumes from paused status

Depending on your virtualization software of choice, your VMs can be paused or suspended. If this is done without interacting with the OS in your VM, your VM is totally unaware that it has been paused. The clock stops when it is paused, along with everything else.

Last week I was challenged on this. I got a phonecall from a nice girl, who had some VMs running with some sort of automation. She was using the Virtualbox hypervisor, but was administrating (or scripting) her VMs with libvirt. She wanted to run a script within her VMs whenever they resumed from paused status.

As the OS is unaware it has been paused, I figured the simplest way to accomplish this, was to let a daemon run in the guest, repeatedly checking against a time source outside of the VM, to identify situations where it had been paused.


PauseMonitor

I wrote a tiny program for this, and called it "PauseMonitor". It is Linux only, choose whatever license you like for it, and use it as you wish.
Configuration is done in the /etc/pausemonitor.conf file. Here is an example:

 accepted_diff = 3  
 delay_between_checks = 5  
 ntp_server_ip = "x.x.x.x"  
 on_resume_command = "echo I have been paused! > ~/pausemonitor.txt"  

The accepted_diff variable is there to adjust for any delays you might have talking to your ntp server. (If you have a local ntp server, this is a good time to use it.) The higher the number, the more time difference will be accepted without taking action. Values are in full seconds.

The delay_between_checks variable tells the program how often it should get a time code from the ntp server. If you have a local ntp server, use a low number. The load on the guest is minimal. The on_resume_command, the actual action being taken after resuming from paused status, is run using the C library function system(), so it runs any script or command you can fit in there. Perhaps you want to adjust your time here, using another ntp utility. Adjusting the time on the guest will not affect the program.

The binary program file is done as a daemon. That means it exits immediately to the command prompt leaving itself running in memory. Stop it by killing it ("killall pausemonitor").

The program does some basic logging to syslog, mostly for errors.

I have not made up any initialization scripts for running this program automatically on boot, I am sure you can figure something out...

The code

This is the code for the program as described above. Compile it with libconfig linked in.

#include <stdio.h>
#include <stdlib.h>
#include <syslog.h>
#include <libconfig.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>

#define CONFIG_FILE "/etc/pausemonitor.conf"
#define MYNAME "pausemonitor"

int ntpseconds(char* ntpserver);

int main(void)
{
    /* Our process ID and Session ID */
    pid_t pid, sid;

    /* Fork off the parent process */
    pid = fork();
    if (pid < 0)
    {
        exit(EXIT_FAILURE);
    }
    /* If we got a good PID, then
       we can exit the parent process. */
    if (pid > 0)
    {
        exit(EXIT_SUCCESS);
    }

    /* Change the file mode mask */
    umask(0);

    /* Open any logs here */

    /* Create a new SID for the child process */
    sid = setsid();
    if (sid < 0)
    {
        /* Log the failure */
        exit(EXIT_FAILURE);
    }

    /* Daemon-specific initialization goes here */
    int accepted_diff;
    int delay_between_checks;
    const char *ntp_server_ip;
    const char *on_resume_command;

    int now_stamp;
    int prev_stamp;

    setlogmask (LOG_UPTO (LOG_NOTICE));
    openlog (MYNAME, LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1);

    config_t cfg;
    config_init(&cfg);

    // Read the CONFIG_FILE file. If there is an error, log it and exit.
    if(! config_read_file(&cfg, CONFIG_FILE))
    {
        syslog (LOG_ERR, "Config file error (%s) %s:%d - %s\n", CONFIG_FILE, config_error_file(&cfg),
                config_error_line(&cfg), config_error_text(&cfg));
        config_destroy(&cfg);
        return(EXIT_FAILURE);
    }
    // Read all values, if they're not all there, log it and exit.
    if(!(config_lookup_int(&cfg, "accepted_diff", &accepted_diff)
            && config_lookup_int(&cfg, "delay_between_checks", &delay_between_checks)
            && config_lookup_string(&cfg, "ntp_server_ip", &ntp_server_ip)
            && config_lookup_string(&cfg, "on_resume_command", &on_resume_command)))
    {
        config_destroy(&cfg);
        syslog (LOG_ERR, "Unable to read all values from %s, exiting...", CONFIG_FILE);
        return(EXIT_FAILURE);
    }

    prev_stamp = ntpseconds(ntp_server_ip);
    if (prev_stamp == 0)
    {
        syslog (LOG_ERR, "Unable to contact ntp server, exiting...");
        return(EXIT_FAILURE);
    }

    syslog (LOG_NOTICE, "%s has been loaded.", MYNAME);
    /* The Big Loop */
    while (1)
    {
        sleep(delay_between_checks); /* wait delay_between_checks seconds */
        now_stamp = ntpseconds(ntp_server_ip);
        if (now_stamp != 0 && ((now_stamp-prev_stamp)>(delay_between_checks + accepted_diff)))
        {
            system(on_resume_command);
            syslog (LOG_NOTICE, "It seems we where paused for up to %i seconds.", (now_stamp - prev_stamp - delay_between_checks));
        }

        prev_stamp = now_stamp;
    }

    closelog ();
    exit(EXIT_SUCCESS);
}

int ntpseconds(char* ntpserver)
{
    int portno=123;
    int maxlen=1024;
    int i;
    unsigned char msg[48]= {010,0,0,0,0,0,0,0,0};
    unsigned long  buf[maxlen];
    struct protoent *proto;
    struct sockaddr_in server_addr;
    int s;
    long tmit;
    proto=getprotobyname("udp");
    s=socket(PF_INET, SOCK_DGRAM, proto->p_proto);
    if (s<0)
    {
        syslog (LOG_ERR, "Error opening socket.");
        close(s);
        return 0;
    }
    memset( &server_addr, 0, sizeof( server_addr ));
    server_addr.sin_family=AF_INET;
    server_addr.sin_addr.s_addr = inet_addr(ntpserver);
    server_addr.sin_port=htons(portno);
    i=sendto(s,msg,sizeof(msg),0,(struct sockaddr *)&server_addr,sizeof(server_addr));
    if (i<0)
    {
        syslog (LOG_ERR, "Error writing to socket.");
        close(s);
        return 0;
    }
    struct sockaddr saddr;
    socklen_t saddr_l = sizeof (saddr);
    i=recvfrom(s,buf,48,0,&saddr,&saddr_l);
    if (i<0)
    {
        syslog (LOG_ERR, "Error reading from socket.");
        close(s);
        return 0;
    }
#if __x86_64__
    tmit=ntohl((time_t)buf[4]);
#else
    tmit=ntohl((time_t)buf[10]);
#endif
    close(s);
    return tmit;
}

If you are like me, and you like stuff small, then hardcode the configuration in the code, and leave out libconfig. Drop the syslog logging, and you can end up with this much simpler code:


#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>

#define ACCEPTED_DIFF 3
#define DELAY_BETWEEN_CHECKS 5
#define NTP_SERVER_IP "129.240.2.6"
#define ON_RESUME_COMMAND "echo I have been paused! > ~/pausemonitor.txt"

int ntpseconds(char* ntpserver);

int main(void)
{
    pid_t pid, sid;
    pid = fork();
    if (pid < 0)
    {
        exit(EXIT_FAILURE);
    }
    if (pid > 0)
    {
        exit(EXIT_SUCCESS);
    }
    umask(0);
    sid = setsid();
    if (sid < 0)
    {
        exit(EXIT_FAILURE);
    }

    int now_stamp;
    int prev_stamp;

    prev_stamp = ntpseconds(NTP_SERVER_IP);
    if (prev_stamp == 0)
    {
        return(EXIT_FAILURE);
    }

    while (1)
    {
        sleep(DELAY_BETWEEN_CHECKS);
        now_stamp = ntpseconds(NTP_SERVER_IP);
        if (now_stamp != 0 && ((now_stamp-prev_stamp)>(DELAY_BETWEEN_CHECKS + ACCEPTED_DIFF)))
        {
            system(ON_RESUME_COMMAND);
        }
        prev_stamp = now_stamp;
    }
    exit(EXIT_SUCCESS);
}

int ntpseconds(char* ntpserver)
{
    int portno=123;
    int maxlen=1024;
    int i;
    unsigned char msg[48]= {010,0,0,0,0,0,0,0,0};
    unsigned long  buf[maxlen];
    struct protoent *proto;
    struct sockaddr_in server_addr;
    int s;
    long tmit;
    proto=getprotobyname("udp");
    s=socket(PF_INET, SOCK_DGRAM, proto->p_proto);
    if (s<0)
    {
        close(s);
        return 0;
    }
    memset( &server_addr, 0, sizeof( server_addr ));
    server_addr.sin_family=AF_INET;
    server_addr.sin_addr.s_addr = inet_addr(ntpserver);
    server_addr.sin_port=htons(portno);
    i=sendto(s,msg,sizeof(msg),0,(struct sockaddr *)&server_addr,sizeof(server_addr));
    if (i<0)
    {
        close(s);
        return 0;
    }
    struct sockaddr saddr;
    socklen_t saddr_l = sizeof (saddr);
    i=recvfrom(s,buf,48,0,&saddr,&saddr_l);
    if (i<0)
    {
        close(s);
        return 0;
    }
#if __x86_64__
    tmit=ntohl((time_t)buf[4]);
#else
    tmit=ntohl((time_t)buf[10]);
#endif
    close(s);
    return tmit;
}