Sunday, December 22, 2013

Don't parse output from system utilities

One often can see that some system utility returns information he needs. Then he does a wrong thing: parses output from this utility. We'll do opposite. For instance, we'll get broadcast address
/sbin/ifconfig eth1
eth1      Link encap:Ethernet  HWaddr 00:0A:CD:14:CD:77  
          inet addr:192.168.44.177  Bcast:192.168.44.255  Mask:255.255.255.0
          inet6 addr: fe80::20a:cdff:fe14:cd77/64 Scope:Link
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:791580 errors:0 dropped:0 overruns:0 frame:0
          TX packets:381581 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000 
          RX bytes:468681235 (446.9 Mb)  TX bytes:47469801 (45.2 Mb)
          Interrupt:18 Base address:0xc000
What does ifconfig do to get this info?
strace /sbin/ifconfig eth1
...
socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 4
...
ioctl(4, SIOCGIFBRDADDR, {ifr_name="eth1", ifr_broadaddr={AF_INET, inet_addr("192.168.44.255")}}) = 0
...
strace shows that descriptor 4 is passed to ioctl. In python one can do the same
# get the constant beforehand
grep -R SIOCGIFBRDADDR /usr/include/*                         
/usr/include/bits/ioctls.h:#define SIOCGIFBRDADDR  0x8919  /* get broadcast PA address */


import fcntl, socket, struct
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_IP)
SIOCGIFBRDADDR = 0x8919
iface = struct.pack('256s', 'eth1')
info = fcntl.ioctl(s.fileno(), SIOCGIFBRDADDR, iface)
socket.inet_ntoa(info[20:24])
'192.168.44.255'
Why we get bytes from 20 to 24? One passes struct ifreq to ioctl (see netdevice(7)), IFNAMSIZ is 16, plus offsetof(struct sockaddr_in, sin_addr), this equals to 20, and plus unsigned long that is 4 bytes.
struct ifreq {
    char ifr_name[IFNAMSIZ]; /* Interface name */
    union {
        struct sockaddr ifr_addr;
        struct sockaddr ifr_dstaddr;
        struct sockaddr ifr_broadaddr;
        struct sockaddr ifr_netmask;
        struct sockaddr ifr_hwaddr;
        short           ifr_flags;
        int             ifr_ifindex;
        int             ifr_metric;
        int             ifr_mtu;
        struct ifmap    ifr_map;
        char            ifr_slave[IFNAMSIZ];
        char            ifr_newname[IFNAMSIZ];
        char *          ifr_data;
    };
};
struct sockaddr_in {
    short            sin_family;
    unsigned short   sin_port;
    struct in_addr   sin_addr;
    char             sin_zero[8];
};
struct in_addr {
    unsigned long s_addr;
};
Note, there are no holes in these structs. One can expect 4 byte hole before sin_addr on 64 bit systems, but there is no. Those structures are declared in a way that omits holes. A simple test to show
#include <stdio.h>
#include <stddef.h>

#include <netinet/in.h>


struct in_addr2 {
    unsigned long s_addr;
};
struct sockaddr_in2 {
    short            sin_family;
    unsigned short   sin_port;
    struct in_addr2  sin_addr;
    char             sin_zero[8];
};

int main(void) {
    printf("%d\n", offsetof(struct sockaddr_in, sin_addr));
    printf("%d\n", offsetof(struct sockaddr_in2, sin_addr));
    return 0;
}

gcc 1.c
./a.out
4
8
With the help of strace you can find out a lot about utilities, one more example
strace ps
...
open("/proc", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 5
fcntl(5, F_GETFD)                       = 0x1 (flags FD_CLOEXEC)
getdents64(5, /* 211 entries */, 32768) = 5552
stat("/proc/1", {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0
open("/proc/1/stat", O_RDONLY)          = 6
read(6, "1 (init) S 0 1 1 0 -1 4202752 31"..., 1023) = 187
close(6)                                = 0
open("/proc/1/status", O_RDONLY)        = 6
read(6, "Name:\tinit\nState:\tS (sleeping)\nT"..., 1023) = 675
close(6)
stat("/proc/2", {st_mode=S_IFDIR|0555, st_size=0, ...}) = 0
... 
So, the advice is to not parse output from utilities, it is not reliable thing to do, it is a subject to change. Use ioctl, sysctl, /proc, etc. to gather info you need. Use strace and others to find out what you need.

P.S. The funniest bug I've seen is when one mixed stdout and stderr and got every time new result upon parsing due to two streams are mixed in an unpredictable way. The other one was due to i18n feature.

No comments:

Post a Comment