Logo Search packages:      
Sourcecode: strongswan version File versions  Download package

kernel.c

/* routines that interface with the kernel's IPsec mechanism
 * Copyright (C) 1997 Angelos D. Keromytis.
 * Copyright (C) 1998-2002  D. Hugh Redelmeier.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * for more details.
 *
 * RCSID $Id: kernel.c,v 1.26 2006/04/29 18:16:02 as Exp $
 */

#include <stddef.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <wait.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/queue.h>

#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#include <freeswan.h>
#include <freeswan/ipsec_policy.h>

#ifdef KLIPS
#include <signal.h>
#include <sys/time.h>   /* for select(2) */
#include <sys/types.h>  /* for select(2) */
#include <pfkeyv2.h>
#include <pfkey.h>
#include "kameipsec.h"
#endif /* KLIPS */

#include "constants.h"
#include "defs.h"
#include "rnd.h"
#include "id.h"
#include "connections.h"
#include "state.h"
#include "timer.h"
#include "kernel.h"
#include "kernel_netlink.h"
#include "kernel_pfkey.h"
#include "kernel_noklips.h"
#include "log.h"
#include "ca.h"
#include "server.h"
#include "whack.h"      /* for RC_LOG_SERIOUS */
#include "keys.h"

#ifdef NAT_TRAVERSAL
#include "packet.h"  /* for pb_stream in nat_traversal.h */
#include "nat_traversal.h"
#endif

#include "alg_info.h"
#include "kernel_alg.h"


bool can_do_IPcomp = TRUE;  /* can system actually perform IPCOMP? */

/* How far can IPsec messages arrive out of order before the anti-replay
 * logic loses track and swats them?  64 is the best KLIPS can do.
 * And 32 is the best XFRM can do...
 */
#define REPLAY_WINDOW   64
#define REPLAY_WINDOW_XFRM      32

/* test if the routes required for two different connections agree
 * It is assumed that the destination subnets agree; we are only
 * testing that the interfaces and nexthops match.
 */
#define routes_agree(c, d) ((c)->interface == (d)->interface \
      && sameaddr(&(c)->spd.this.host_nexthop, &(d)->spd.this.host_nexthop))

#ifndef KLIPS

bool no_klips = TRUE;   /* don't actually use KLIPS */

#else /* !KLIPS */

/* bare (connectionless) shunt (eroute) table
 *
 * Bare shunts are those that don't "belong" to a connection.
 * This happens because some %trapped traffic hasn't yet or cannot be
 * assigned to a connection.  The usual reason is that we cannot discover
 * the peer SG.  Another is that even when the peer has been discovered,
 * it may be that no connection matches all the particulars.
 * We record them so that, with scanning, we can discover
 * which %holds are news and which others should expire.
 */

#define SHUNT_SCAN_INTERVAL   (60 * 2)   /* time between scans of eroutes */

/* SHUNT_PATIENCE only has resolution down to a multiple of the sample rate,
 * SHUNT_SCAN_INTERVAL.
 * By making SHUNT_PATIENCE an odd multiple of half of SHUNT_SCAN_INTERVAL,
 * we minimize the effects of jitter.
 */
#define SHUNT_PATIENCE  (SHUNT_SCAN_INTERVAL * 15 / 2)      /* inactivity timeout */

struct bare_shunt {
    policy_prio_t policy_prio;
    ip_subnet ours;
    ip_subnet his;
    ip_said said;
    int transport_proto;
    unsigned long count;
    time_t last_activity;
    char *why;
    struct bare_shunt *next;
};

static struct bare_shunt *bare_shunts = NULL;

#ifdef DEBUG
static void
DBG_bare_shunt(const char *op, const struct bare_shunt *bs)
{
    DBG(DBG_KLIPS,
      {
          int ourport = ntohs(portof(&(bs)->ours.addr));
          int hisport = ntohs(portof(&(bs)->his.addr));
          char ourst[SUBNETTOT_BUF];
          char hist[SUBNETTOT_BUF];
          char sat[SATOT_BUF];
          char prio[POLICY_PRIO_BUF];

          subnettot(&(bs)->ours, 0, ourst, sizeof(ourst));
          subnettot(&(bs)->his, 0, hist, sizeof(hist));
          satot(&(bs)->said, 0, sat, sizeof(sat));
          fmt_policy_prio(bs->policy_prio, prio);
          DBG_log("%s bare shunt %p %s:%d -> %s:%d => %s:%d %s    %s"
            , op, (const void *)(bs), ourst, ourport, hist, hisport
            , sat, (bs)->transport_proto, prio, (bs)->why);
      });
}
#else /* !DEBUG */
#define DBG_bare_shunt(op, bs) {}
#endif /* !DEBUG */

/* The orphaned_holds table records %holds for which we
 * scan_proc_shunts found no representation of in any connection.
 * The corresponding ACQUIRE message might have been lost.
 */
struct eroute_info *orphaned_holds = NULL;

/* forward declaration */
static bool shunt_eroute(struct connection *c
                   , struct spd_route *sr
                   , enum routing_t rt_kind
                   , unsigned int op, const char *opname);
static void set_text_said(char *text_said
                    , const ip_address *dst
                    , ipsec_spi_t spi
                    , int proto);

bool no_klips = FALSE;  /* don't actually use KLIPS */

static const struct pfkey_proto_info null_proto_info[2] = {
      {
            proto: IPPROTO_ESP,
            encapsulation: ENCAPSULATION_MODE_TRANSPORT,
            reqid: 0
      },
      {
              proto: 0,
            encapsulation: 0,
            reqid: 0
      }
};

void
record_and_initiate_opportunistic(const ip_subnet *ours
                          , const ip_subnet *his
                          , int transport_proto
                          , const char *why)
{
    passert(samesubnettype(ours, his));

    /* Add to bare shunt list.
     * We need to do this because the shunt was installed by KLIPS
     * which can't do this itself.
     */
    {
      struct bare_shunt *bs = alloc_thing(struct bare_shunt, "bare shunt");

      bs->why = clone_str(why, "story for bare shunt");
      bs->ours = *ours;
      bs->his = *his;
      bs->transport_proto = transport_proto;
      bs->policy_prio = BOTTOM_PRIO;

      bs->said.proto = SA_INT;
      bs->said.spi = htonl(SPI_HOLD);
      bs->said.dst = *aftoinfo(subnettypeof(ours))->any;

      bs->count = 0;
      bs->last_activity = now();

      bs->next = bare_shunts;
      bare_shunts = bs;
      DBG_bare_shunt("add", bs);
    }

    /* actually initiate opportunism */
    {
      ip_address src, dst;

      networkof(ours, &src);
      networkof(his, &dst);
      initiate_opportunistic(&src, &dst, transport_proto, TRUE, NULL_FD);
    }

    /* if present, remove from orphaned_holds list.
     * NOTE: we do this last in case ours or his is a pointer into a member.
     */
    {
      struct eroute_info **pp, *p;

      for (pp = &orphaned_holds; (p = *pp) != NULL; pp = &p->next)
      {
          if (samesubnet(ours, &p->ours)
          && samesubnet(his, &p->his)
          && transport_proto == p->transport_proto
          && portof(&ours->addr) == portof(&p->ours.addr)
          && portof(&his->addr) == portof(&p->his.addr))
          {
            *pp = p->next;
            pfree(p);
            break;
          }
      }
    }
}

#endif /* KLIPS */

static unsigned get_proto_reqid(unsigned base, int proto)
{
    switch (proto)
    {
    default:
    case IPPROTO_COMP:
      base++;
      /* fall through */
    case IPPROTO_ESP:
      base++;
      /* fall through */
    case IPPROTO_AH:
      break;
    }

    return base;
}

/* Generate Unique SPI numbers.
 *
 * The specs say that the number must not be less than IPSEC_DOI_SPI_MIN.
 * Pluto generates numbers not less than IPSEC_DOI_SPI_OUR_MIN,
 * reserving numbers in between for manual keying (but we cannot so
 * restrict numbers generated by our peer).
 * XXX This should be replaced by a call to the kernel when
 * XXX we get an API.
 * The returned SPI is in network byte order.
 * We use a random number as the initial SPI so that there is
 * a good chance that different Pluto instances will choose
 * different SPIs.  This is good for two reasons.
 * - the keying material for the initiator and responder only
 *   differs if the SPIs differ.
 * - if Pluto is restarted, it would otherwise recycle the SPI
 *   numbers and confuse everything.  When the kernel generates
 *   SPIs, this will no longer matter.
 * We then allocate numbers sequentially.  Thus we don't have to
 * check if the number was previously used (assuming that no
 * SPI lives longer than 4G of its successors).
 */
ipsec_spi_t
get_ipsec_spi(ipsec_spi_t avoid, int proto, struct spd_route *sr, bool tunnel)
{
    static ipsec_spi_t spi = 0;     /* host order, so not returned directly! */
    char text_said[SATOT_BUF];

    set_text_said(text_said, &sr->this.host_addr, 0, proto);

    if (kernel_ops->get_spi)
      return kernel_ops->get_spi(&sr->that.host_addr
          , &sr->this.host_addr, proto, tunnel
          , get_proto_reqid(sr->reqid, proto)
          , IPSEC_DOI_SPI_OUR_MIN, 0xffffffff
          , text_said);

    spi++;
    while (spi < IPSEC_DOI_SPI_OUR_MIN || spi == ntohl(avoid))
      get_rnd_bytes((u_char *)&spi, sizeof(spi));

    DBG(DBG_CONTROL,
      {
          ipsec_spi_t spi_net = htonl(spi);

          DBG_dump("generate SPI:", (u_char *)&spi_net, sizeof(spi_net));
      });

    return htonl(spi);
}

/* Generate Unique CPI numbers.
 * The result is returned as an SPI (4 bytes) in network order!
 * The real bits are in the nework-low-order 2 bytes.
 * Modelled on get_ipsec_spi, but range is more limited:
 * 256-61439.
 * If we can't find one easily, return 0 (a bad SPI,
 * no matter what order) indicating failure.
 */
ipsec_spi_t
get_my_cpi(struct spd_route *sr, bool tunnel)
{
    static cpi_t
      first_busy_cpi = 0,
      latest_cpi;
    char text_said[SATOT_BUF];

    set_text_said(text_said, &sr->this.host_addr, 0, IPPROTO_COMP);

    if (kernel_ops->get_spi)
      return kernel_ops->get_spi(&sr->that.host_addr
          , &sr->this.host_addr, IPPROTO_COMP, tunnel
          , get_proto_reqid(sr->reqid, IPPROTO_COMP)
          , IPCOMP_FIRST_NEGOTIATED, IPCOMP_LAST_NEGOTIATED
          , text_said);

    while (!(IPCOMP_FIRST_NEGOTIATED <= first_busy_cpi && first_busy_cpi < IPCOMP_LAST_NEGOTIATED))
    {
      get_rnd_bytes((u_char *)&first_busy_cpi, sizeof(first_busy_cpi));
      latest_cpi = first_busy_cpi;
    }

    latest_cpi++;

    if (latest_cpi == first_busy_cpi)
      find_my_cpi_gap(&latest_cpi, &first_busy_cpi);

    if (latest_cpi > IPCOMP_LAST_NEGOTIATED)
      latest_cpi = IPCOMP_FIRST_NEGOTIATED;

    return htonl((ipsec_spi_t)latest_cpi);
}

/* invoke the updown script to do the routing and firewall commands required
 *
 * The user-specified updown script is run.  Parameters are fed to it in
 * the form of environment variables.  All such environment variables
 * have names starting with "PLUTO_".
 *
 * The operation to be performed is specified by PLUTO_VERB.  This
 * verb has a suffix "-host" if the client on this end is just the
 * host; otherwise the suffix is "-client".  If the address family
 * of the host is IPv6, an extra suffix of "-v6" is added.
 *
 * "prepare-host" and "prepare-client" are used to delete a route
 * that may exist (due to forces outside of Pluto).  It is used to
 * prepare for pluto creating a route.
 *
 * "route-host" and "route-client" are used to install a route.
 * Since routing is based only on destination, the PLUTO_MY_CLIENT_*
 * values are probably of no use (using them may signify a bug).
 *
 * "unroute-host" and "unroute-client" are used to delete a route.
 * Since routing is based only on destination, the PLUTO_MY_CLIENT_*
 * values are probably of no use (using them may signify a bug).
 *
 * "up-host" and "up-client" are run when an eroute is added (not replaced).
 * They are useful for adjusting a firewall: usually for adding a rule
 * to let processed packets flow between clients.  Note that only
 * one eroute may exist for a pair of client subnets but inbound
 * IPsec SAs may persist without an eroute.
 *
 * "down-host" and "down-client" are run when an eroute is deleted.
 * They are useful for adjusting a firewall.
 */

#ifndef DEFAULT_UPDOWN
# define DEFAULT_UPDOWN "ipsec _updown"
#endif

static bool
do_command(struct connection *c, struct spd_route *sr, const char *verb)
{
    char cmd[1536];     /* arbitrary limit on shell command length */
    const char *verb_suffix;

    /* figure out which verb suffix applies */
    {
      const char *hs, *cs;

      switch (addrtypeof(&sr->this.host_addr))
      {
          case AF_INET:
            hs = "-host";
            cs = "-client";
            break;
          case AF_INET6:
            hs = "-host-v6";
            cs = "-client-v6";
            break;
          default:
            loglog(RC_LOG_SERIOUS, "unknown address family");
            return FALSE;
      }
      verb_suffix = subnetisaddr(&sr->this.client, &sr->this.host_addr)
          ? hs : cs;
    }

    /* form the command string */
    {
      char
            nexthop_str[sizeof("PLUTO_NEXT_HOP='' ") +ADDRTOT_BUF] = "",
            srcip_str[sizeof("PLUTO_MY_SOURCEIP='' ")+ADDRTOT_BUF] = "",
          me_str[ADDRTOT_BUF],
          myid_str[BUF_LEN],
          myclient_str[SUBNETTOT_BUF],
          myclientnet_str[ADDRTOT_BUF],
          myclientmask_str[ADDRTOT_BUF],
          peer_str[ADDRTOT_BUF],
          peerid_str[BUF_LEN],
          peerclient_str[SUBNETTOT_BUF],
          peerclientnet_str[ADDRTOT_BUF],
          peerclientmask_str[ADDRTOT_BUF],
          peerca_str[BUF_LEN],
          secure_myid_str[BUF_LEN] = "",
          secure_peerid_str[BUF_LEN] = "",
          secure_peerca_str[BUF_LEN] = "";
      ip_address ta;
      pubkey_list_t *p;

      if (addrbytesptr(&sr->this.host_nexthop, NULL)
      && !isanyaddr(&sr->this.host_nexthop))
      {
          char *n;

          strcpy(nexthop_str, "PLUTO_NEXT_HOP='");
          n = nexthop_str + strlen(nexthop_str);

          addrtot(&sr->this.host_nexthop, 0
                ,n , sizeof(nexthop_str)-strlen(nexthop_str));
          strncat(nexthop_str, "' ", sizeof(nexthop_str));
      }

      if (addrbytesptr(&sr->this.host_srcip, NULL)
      && !isanyaddr(&sr->this.host_srcip))
        {
          char *n;

          strcpy(srcip_str, "PLUTO_MY_SOURCEIP='");
          n = srcip_str + strlen(srcip_str);

          addrtot(&sr->this.host_srcip, 0
                ,n , sizeof(srcip_str)-strlen(srcip_str));
            strncat(srcip_str, "' ", sizeof(srcip_str));
        }

      addrtot(&sr->this.host_addr, 0, me_str, sizeof(me_str));
      idtoa(&sr->this.id, myid_str, sizeof(myid_str));
      escape_metachar(myid_str, secure_myid_str, sizeof(secure_myid_str));
      subnettot(&sr->this.client, 0, myclient_str, sizeof(myclientnet_str));
      networkof(&sr->this.client, &ta);
      addrtot(&ta, 0, myclientnet_str, sizeof(myclientnet_str));
      maskof(&sr->this.client, &ta);
      addrtot(&ta, 0, myclientmask_str, sizeof(myclientmask_str));

      addrtot(&sr->that.host_addr, 0, peer_str, sizeof(peer_str));
      idtoa(&sr->that.id, peerid_str, sizeof(peerid_str));
      escape_metachar(peerid_str, secure_peerid_str, sizeof(secure_peerid_str));
      subnettot(&sr->that.client, 0, peerclient_str, sizeof(peerclientnet_str));
      networkof(&sr->that.client, &ta);
      addrtot(&ta, 0, peerclientnet_str, sizeof(peerclientnet_str));
      maskof(&sr->that.client, &ta);
      addrtot(&ta, 0, peerclientmask_str, sizeof(peerclientmask_str));

      for (p = pubkeys; p != NULL; p = p->next)
      {
          pubkey_t *key = p->key;
          int pathlen;

          if (key->alg == PUBKEY_ALG_RSA && same_id(&sr->that.id, &key->id)
          && trusted_ca(key->issuer, sr->that.ca, &pathlen))
          {
            dntoa_or_null(peerca_str, BUF_LEN, key->issuer, "");
            escape_metachar(peerca_str, secure_peerca_str, sizeof(secure_peerca_str));
            break;
          }
      }

      if (-1 == snprintf(cmd, sizeof(cmd)
          , "2>&1 "     /* capture stderr along with stdout */
          "PLUTO_VERSION='1.1' "    /* change VERSION when interface spec changes */
          "PLUTO_VERB='%s%s' "
          "PLUTO_CONNECTION='%s' "
          "%s"    /* optional PLUTO_NEXT_HOP */
          "PLUTO_INTERFACE='%s' "
          "%s"    /* optional PLUTO_HOST_ACCESS */
          "PLUTO_REQID='%u' "
          "PLUTO_ME='%s' "
          "PLUTO_MY_ID='%s' "
          "PLUTO_MY_CLIENT='%s' "
          "PLUTO_MY_CLIENT_NET='%s' "
          "PLUTO_MY_CLIENT_MASK='%s' "
          "PLUTO_MY_PORT='%u' "
          "PLUTO_MY_PROTOCOL='%u' "
          "PLUTO_PEER='%s' "
          "PLUTO_PEER_ID='%s' "
          "PLUTO_PEER_CLIENT='%s' "
          "PLUTO_PEER_CLIENT_NET='%s' "
          "PLUTO_PEER_CLIENT_MASK='%s' "
          "PLUTO_PEER_PORT='%u' "
          "PLUTO_PEER_PROTOCOL='%u' "
          "PLUTO_PEER_CA='%s' "
          "%s"    /* optional PLUTO_MY_SRCIP */
          "%s"    /* actual script */
          , verb, verb_suffix
          , c->name
          , nexthop_str
          , c->interface->vname
          , sr->this.hostaccess? "PLUTO_HOST_ACCESS='1' " : ""
          , sr->reqid + 1     /* ESP requid */
          , me_str
          , secure_myid_str
          , myclient_str
          , myclientnet_str
          , myclientmask_str
          , sr->this.port
          , sr->this.protocol
          , peer_str
          , secure_peerid_str
          , peerclient_str
          , peerclientnet_str
          , peerclientmask_str
          , sr->that.port
          , sr->that.protocol
          , secure_peerca_str
          , srcip_str
          , sr->this.updown == NULL? DEFAULT_UPDOWN : sr->this.updown))
      {
          loglog(RC_LOG_SERIOUS, "%s%s command too long!", verb, verb_suffix);
          return FALSE;
      }
    }

    DBG(DBG_CONTROL, DBG_log("executing %s%s: %s"
      , verb, verb_suffix, cmd));

#ifdef KLIPS
    if (!no_klips)
    {
      /* invoke the script, catching stderr and stdout
       * It may be of concern that some file descriptors will
       * be inherited.  For the ones under our control, we
       * have done fcntl(fd, F_SETFD, FD_CLOEXEC) to prevent this.
       * Any used by library routines (perhaps the resolver or syslog)
       * will remain.
       */
      FILE *f = popen(cmd, "r");

      if (f == NULL)
      {
          loglog(RC_LOG_SERIOUS, "unable to popen %s%s command", verb, verb_suffix);
          return FALSE;
      }

      /* log any output */
      for (;;)
      {
          /* if response doesn't fit in this buffer, it will be folded */
          char resp[256];

          if (fgets(resp, sizeof(resp), f) == NULL)
          {
            if (ferror(f))
            {
                log_errno((e, "fgets failed on output of %s%s command"
                  , verb, verb_suffix));
                return FALSE;
            }
            else
            {
                passert(feof(f));
                break;
            }
          }
          else
          {
            char *e = resp + strlen(resp);

            if (e > resp && e[-1] == '\n')
                e[-1] = '\0'; /* trim trailing '\n' */
            plog("%s%s output: %s", verb, verb_suffix, resp);
          }
      }

      /* report on and react to return code */
      {
          int r = pclose(f);

          if (r == -1)
          {
            log_errno((e, "pclose failed for %s%s command"
                , verb, verb_suffix));
            return FALSE;
          }
          else if (WIFEXITED(r))
          {
            if (WEXITSTATUS(r) != 0)
            {
                loglog(RC_LOG_SERIOUS, "%s%s command exited with status %d"
                  , verb, verb_suffix, WEXITSTATUS(r));
                return FALSE;
            }
          }
          else if (WIFSIGNALED(r))
          {
            loglog(RC_LOG_SERIOUS, "%s%s command exited with signal %d"
                , verb, verb_suffix, WTERMSIG(r));
            return FALSE;
          }
          else
          {
            loglog(RC_LOG_SERIOUS, "%s%s command exited with unknown status %d"
                , verb, verb_suffix, r);
            return FALSE;
          }
      }
    }
#endif /* KLIPS */
    return TRUE;
}

/* Check that we can route (and eroute).  Diagnose if we cannot. */

enum routability {
    route_impossible = 0,
    route_easy = 1,
    route_nearconflict = 2,
    route_farconflict = 3
};

static enum routability
could_route(struct connection *c)
{
    struct spd_route *esr, *rosr;
    struct connection *ero    /* who, if anyone, owns our eroute? */
      , *ro = route_owner(c, &rosr, &ero, &esr); /* who owns our route? */

    /* it makes no sense to route a connection that is ISAKMP-only */
    if (!NEVER_NEGOTIATE(c->policy) && !HAS_IPSEC_POLICY(c->policy))
    {
      loglog(RC_ROUTE, "cannot route an ISAKMP-only connection");
      return route_impossible;
    }

    /* if this is a Road Warrior template, we cannot route.
     * Opportunistic template is OK.
     */
    if (c->kind == CK_TEMPLATE && !(c->policy & POLICY_OPPO))
    {
      loglog(RC_ROUTE, "cannot route Road Warrior template");
      return route_impossible;
    }

    /* if we don't know nexthop, we cannot route */
    if (isanyaddr(&c->spd.this.host_nexthop))
    {
      loglog(RC_ROUTE, "cannot route connection without knowing our nexthop");
      return route_impossible;
    }

    /* if routing would affect IKE messages, reject */
    if (!no_klips
#ifdef NAT_TRAVERSAL
    && c->spd.this.host_port != NAT_T_IKE_FLOAT_PORT
#endif
    && c->spd.this.host_port != IKE_UDP_PORT
    && addrinsubnet(&c->spd.that.host_addr, &c->spd.that.client))
    {
      loglog(RC_LOG_SERIOUS, "cannot install route: peer is within its client");
      return route_impossible;
    }

    /* If there is already a route for peer's client subnet
     * and it disagrees about interface or nexthop, we cannot steal it.
     * Note: if this connection is already routed (perhaps for another
     * state object), the route will agree.
     * This is as it should be -- it will arise during rekeying.
     */
    if (ro != NULL && !routes_agree(ro, c))
    {
      loglog(RC_LOG_SERIOUS, "cannot route -- route already in use for \"%s\""
          , ro->name);
      return route_impossible;  /* another connection already
                             using the eroute */
    }

#ifdef KLIPS
    /* if there is an eroute for another connection, there is a problem */
    if (ero != NULL && ero != c)
    {
      struct connection *ero2, *ero_top;
      struct connection *inside, *outside;

      /*
       * note, wavesec (PERMANENT) goes *outside* and
       * OE goes *inside* (TEMPLATE)
       */
      inside = NULL;
      outside= NULL;
      if (ero->kind == CK_PERMANENT
         && c->kind == CK_TEMPLATE)
      {
          outside = ero;
          inside = c;
      }
      else if (c->kind == CK_PERMANENT
            && ero->kind == CK_TEMPLATE)
      {
          outside = c;
          inside = ero;
      }

      /* okay, check again, with correct order */
      if (outside && outside->kind == CK_PERMANENT
          && inside && inside->kind == CK_TEMPLATE)
      {
          char inst[CONN_INST_BUF];

          /* this is a co-terminal attempt of the "near" kind. */
          /* when chaining, we chain from inside to outside */

          /* XXX permit multiple deep connections? */
          passert(inside->policy_next == NULL);

          inside->policy_next = outside;

          /* since we are going to steal the eroute from the secondary
           * policy, we need to make sure that it no longer thinks that
           * it owns the eroute.
           */
          outside->spd.eroute_owner = SOS_NOBODY;
          outside->spd.routing = RT_UNROUTED_KEYED;

          /* set the priority of the new eroute owner to be higher
           * than that of the current eroute owner
           */
          inside->prio = outside->prio + 1;

          fmt_conn_instance(inside, inst);

          loglog(RC_LOG_SERIOUS
               , "conflict on eroute (%s), switching eroute to %s and linking %s"
               , inst, inside->name, outside->name);

          return route_nearconflict;
      }

      /* look along the chain of policies for one with the same name */
      ero_top = ero;

      for (ero2 = ero; ero2 != NULL; ero2 = ero->policy_next)
      {
          if (ero2->kind == CK_TEMPLATE
          && streq(ero2->name, c->name))
            break;
      }

      /* If we fell of the end of the list, then we found no TEMPLATE
       * so there must be a conflict that we can't resolve.
       * As the names are not equal, then we aren't replacing/rekeying.
       */
      if (ero2 == NULL)
      {
          char inst[CONN_INST_BUF];

          fmt_conn_instance(ero, inst);

          loglog(RC_LOG_SERIOUS
            , "cannot install eroute -- it is in use for \"%s\"%s #%lu"
            , ero->name, inst, esr->eroute_owner);
          return FALSE; /* another connection already using the eroute */
      }
    }
#endif /* KLIPS */
    return route_easy;
}

bool
trap_connection(struct connection *c)
{
    switch (could_route(c))
    {
    case route_impossible:
      return FALSE;

    case route_nearconflict:
    case route_easy:
      /* RT_ROUTED_TUNNEL is treated specially: we don't override
       * because we don't want to lose track of the IPSEC_SAs etc.
       */
      if (c->spd.routing < RT_ROUTED_TUNNEL)
      {
          return route_and_eroute(c, &c->spd, NULL);
      }
      return TRUE;

    case route_farconflict:
      return FALSE;
    }

    return FALSE;
}

/* delete any eroute for a connection and unroute it if route isn't shared */
void
unroute_connection(struct connection *c)
{
    struct spd_route *sr;
    enum routing_t cr;

    for (sr = &c->spd; sr; sr = sr->next)
    {
      cr = sr->routing;

      if (erouted(cr))
      {
          /* cannot handle a live one */
          passert(sr->routing != RT_ROUTED_TUNNEL);
#ifdef KLIPS
          shunt_eroute(c, sr, RT_UNROUTED, ERO_DELETE, "delete");
#endif
      }

      sr->routing = RT_UNROUTED;  /* do now so route_owner won't find us */

      /* only unroute if no other connection shares it */
      if (routed(cr) && route_owner(c, NULL, NULL, NULL) == NULL)
          (void) do_command(c, sr, "unroute");
    }
}


#ifdef KLIPS

static void
set_text_said(char *text_said, const ip_address *dst, ipsec_spi_t spi, int proto)
{
    ip_said said;

    initsaid(dst, spi, proto, &said);
    satot(&said, 0, text_said, SATOT_BUF);
}

/* find an entry in the bare_shunt table.
 * Trick: return a pointer to the pointer to the entry;
 * this allows the entry to be deleted.
 */
static struct bare_shunt **
bare_shunt_ptr(const ip_subnet *ours, const ip_subnet *his, int transport_proto)
{
    struct bare_shunt *p, **pp;

    for (pp = &bare_shunts; (p = *pp) != NULL; pp = &p->next)
    {
      if (samesubnet(ours, &p->ours)
      && samesubnet(his, &p->his)
      && transport_proto == p->transport_proto
      && portof(&ours->addr) == portof(&p->ours.addr)
      && portof(&his->addr) == portof(&p->his.addr))
          return pp;
    }
    return NULL;
}

/* free a bare_shunt entry, given a pointer to the pointer */
static void
free_bare_shunt(struct bare_shunt **pp)
{
    if (pp == NULL)
    {
      DBG(DBG_CONTROL,
          DBG_log("delete bare shunt: null pointer")
      )
    }
    else
    {
      struct bare_shunt *p = *pp;

      *pp = p->next;
      DBG_bare_shunt("delete", p);
      pfree(p->why);
      pfree(p);
    }
}

void
show_shunt_status(void)
{
    struct bare_shunt *bs;

    for (bs = bare_shunts; bs != NULL; bs = bs->next)
    {
      /* Print interesting fields.  Ignore count and last_active. */

      int ourport = ntohs(portof(&bs->ours.addr));
      int hisport = ntohs(portof(&bs->his.addr));
      char ourst[SUBNETTOT_BUF];
      char hist[SUBNETTOT_BUF];
      char sat[SATOT_BUF];
      char prio[POLICY_PRIO_BUF];

      subnettot(&(bs)->ours, 0, ourst, sizeof(ourst));
      subnettot(&(bs)->his, 0, hist, sizeof(hist));
      satot(&(bs)->said, 0, sat, sizeof(sat));
      fmt_policy_prio(bs->policy_prio, prio);

      whack_log(RC_COMMENT, "%s:%d -> %s:%d => %s:%d %s    %s"
          , ourst, ourport, hist, hisport, sat, bs->transport_proto
          , prio, bs->why);
    }
    if (bare_shunts != NULL)
      whack_log(RC_COMMENT, BLANK_FORMAT);      /* spacer */
}

/* Setup an IPsec route entry.
 * op is one of the ERO_* operators.
 */

static bool
raw_eroute(const ip_address *this_host
         , const ip_subnet *this_client
         , const ip_address *that_host
         , const ip_subnet *that_client
         , ipsec_spi_t spi
         , unsigned int proto
         , unsigned int satype
         , unsigned int transport_proto
         , const struct pfkey_proto_info *proto_info
         , time_t use_lifetime
         , unsigned int op
         , const char *opname USED_BY_DEBUG)
{
    char text_said[SATOT_BUF];

    set_text_said(text_said, that_host, spi, proto);

    DBG(DBG_CONTROL | DBG_KLIPS,
      {
          int sport = ntohs(portof(&this_client->addr));
          int dport = ntohs(portof(&that_client->addr));
          char mybuf[SUBNETTOT_BUF];
          char peerbuf[SUBNETTOT_BUF];

          subnettot(this_client, 0, mybuf, sizeof(mybuf));
          subnettot(that_client, 0, peerbuf, sizeof(peerbuf));
          DBG_log("%s eroute %s:%d -> %s:%d => %s:%d"
            , opname, mybuf, sport, peerbuf, dport
            , text_said, transport_proto);
      });

    return kernel_ops->raw_eroute(this_host, this_client
      , that_host, that_client, spi, satype, transport_proto, proto_info
      , use_lifetime, op, text_said);
}

/* test to see if %hold remains */
bool
has_bare_hold(const ip_address *src, const ip_address *dst, int transport_proto)
{
    ip_subnet this_client, that_client;
    struct bare_shunt **bspp;

    passert(addrtypeof(src) == addrtypeof(dst));
    happy(addrtosubnet(src, &this_client));
    happy(addrtosubnet(dst, &that_client));
    bspp = bare_shunt_ptr(&this_client, &that_client, transport_proto);
    return bspp != NULL
      && (*bspp)->said.proto == SA_INT && (*bspp)->said.spi == htonl(SPI_HOLD);
}


/* Replace (or delete) a shunt that is in the bare_shunts table.
 * Issues the PF_KEY commands and updates the bare_shunts table.
 */
bool
replace_bare_shunt(const ip_address *src, const ip_address *dst
               , policy_prio_t policy_prio
               , ipsec_spi_t shunt_spi    /* in host order! */
               , bool repl    /* if TRUE, replace; if FALSE, delete */
               , unsigned int transport_proto
               , const char *why)
{
    ip_subnet this_client, that_client;
    ip_subnet this_broad_client, that_broad_client;
    const ip_address *null_host = aftoinfo(addrtypeof(src))->any;

    passert(addrtypeof(src) == addrtypeof(dst));
    happy(addrtosubnet(src, &this_client));
    happy(addrtosubnet(dst, &that_client));
    this_broad_client = this_client;
    that_broad_client = that_client;
    setportof(0, &this_broad_client.addr);
    setportof(0, &that_broad_client.addr);

    if (repl)
    {
      struct bare_shunt **bs_pp = bare_shunt_ptr(&this_broad_client
                                     , &that_broad_client, 0);

      /* is there already a broad host-to-host bare shunt? */
      if (bs_pp == NULL)
      {
          if (raw_eroute(null_host, &this_broad_client, null_host, &that_broad_client
                     , htonl(shunt_spi), SA_INT, SADB_X_SATYPE_INT
                     , 0, null_proto_info
                     , SHUNT_PATIENCE, ERO_ADD, why))
          {
            struct bare_shunt *bs = alloc_thing(struct bare_shunt, "bare shunt");

            bs->ours = this_broad_client;
            bs->his =  that_broad_client;
            bs->transport_proto = 0;
            bs->said.proto = SA_INT;
            bs->why = clone_str(why, "bare shunt story");
            bs->policy_prio = policy_prio;
            bs->said.spi = htonl(shunt_spi);
            bs->said.dst = *null_host;
            bs->count = 0;
            bs->last_activity = now();
            bs->next = bare_shunts;
            bare_shunts = bs;
            DBG_bare_shunt("add", bs);
          }
      }
      shunt_spi = SPI_HOLD;
    }

    if (raw_eroute(null_host, &this_client, null_host, &that_client
               , htonl(shunt_spi), SA_INT, SADB_X_SATYPE_INT
               , transport_proto, null_proto_info
               , SHUNT_PATIENCE, ERO_DELETE, why))
    {
      struct bare_shunt **bs_pp = bare_shunt_ptr(&this_client, &that_client
                              , transport_proto);

      /* delete bare eroute */
      free_bare_shunt(bs_pp);
      return TRUE;
    }
    else
    {
      return FALSE;
    }
}

static bool
eroute_connection(struct spd_route *sr
, ipsec_spi_t spi, unsigned int proto, unsigned int satype
, const struct pfkey_proto_info *proto_info
, unsigned int op, const char *opname)
{
    const ip_address *peer = &sr->that.host_addr;
    char buf2[256];

    snprintf(buf2, sizeof(buf2)
           , "eroute_connection %s", opname);

    if (proto == SA_INT)
      peer = aftoinfo(addrtypeof(peer))->any;

    return raw_eroute(&sr->this.host_addr, &sr->this.client
                  , peer
                  , &sr->that.client
                  , spi, proto, satype
                  , sr->this.protocol, proto_info, 0, op, buf2);
}

/* assign a bare hold to a connection */

bool
assign_hold(struct connection *c USED_BY_DEBUG
          , struct spd_route *sr
          , int transport_proto
          , const ip_address *src, const ip_address *dst)
{
    /* either the automatically installed %hold eroute is broad enough
     * or we try to add a broader one and delete the automatic one.
     * Beware: this %hold might be already handled, but still squeak
     * through because of a race.
     */
    enum routing_t ro = sr->routing /* routing, old */
      , rn = ro;              /* routing, new */

    passert(LHAS(LELEM(CK_PERMANENT) | LELEM(CK_INSTANCE), c->kind));
    /* figure out what routing should become */
    switch (ro)
    {
    case RT_UNROUTED:
      rn = RT_UNROUTED_HOLD;
      break;
    case RT_ROUTED_PROSPECTIVE:
      rn = RT_ROUTED_HOLD;
      break;
    default:
      /* no change: this %hold is old news and should just be deleted */
      break;
    }

    /* we need a broad %hold, not the narrow one.
     * First we ensure that there is a broad %hold.
     * There may already be one (race condition): no need to create one.
     * There may already be a %trap: replace it.
     * There may not be any broad eroute: add %hold.
     * Once the broad %hold is in place, delete the narrow one.
     */
    if (rn != ro)
    {
      if (erouted(ro)
      ? !eroute_connection(sr, htonl(SPI_HOLD), SA_INT, SADB_X_SATYPE_INT
            , null_proto_info
            , ERO_REPLACE, "replace %trap with broad %hold")
      : !eroute_connection(sr, htonl(SPI_HOLD), SA_INT, SADB_X_SATYPE_INT
            , null_proto_info
            , ERO_ADD, "add broad %hold"))
      {
          return FALSE;
      }
    }
    if (!replace_bare_shunt(src, dst, BOTTOM_PRIO, SPI_HOLD, FALSE
    , transport_proto, "delete narrow %hold"))
    {
      return FALSE;
    }
    sr->routing = rn;
    return TRUE;
}

/* install or remove eroute for SA Group */
static bool
sag_eroute(struct state *st, struct spd_route *sr
         , unsigned op, const char *opname)
{
    u_int inner_proto = 0;
    u_int inner_satype = 0;
    ipsec_spi_t inner_spi = 0;
    struct pfkey_proto_info proto_info[4];
    int i;
    bool tunnel;

    /* figure out the SPI and protocol (in two forms)
     * for the innermost transformation.
     */

    i = sizeof(proto_info) / sizeof(proto_info[0]) - 1;
    proto_info[i].proto = 0;
    tunnel = FALSE;

    if (st->st_ah.present)
    {
      inner_spi = st->st_ah.attrs.spi;
      inner_proto = SA_AH;
      inner_satype = SADB_SATYPE_AH;

      i--;
      proto_info[i].proto = IPPROTO_AH;
      proto_info[i].encapsulation = st->st_ah.attrs.encapsulation;
      tunnel |= proto_info[i].encapsulation == ENCAPSULATION_MODE_TUNNEL;
      proto_info[i].reqid = sr->reqid;
    }

    if (st->st_esp.present)
    {
      inner_spi = st->st_esp.attrs.spi;
      inner_proto = SA_ESP;
      inner_satype = SADB_SATYPE_ESP;

      i--;
      proto_info[i].proto = IPPROTO_ESP;
      proto_info[i].encapsulation = st->st_esp.attrs.encapsulation;
      tunnel |= proto_info[i].encapsulation == ENCAPSULATION_MODE_TUNNEL;
      proto_info[i].reqid = sr->reqid + 1;
    }

    if (st->st_ipcomp.present)
    {
      inner_spi = st->st_ipcomp.attrs.spi;
      inner_proto = SA_COMP;
      inner_satype = SADB_X_SATYPE_COMP;

      i--;
      proto_info[i].proto = IPPROTO_COMP;
      proto_info[i].encapsulation = st->st_ipcomp.attrs.encapsulation;
      tunnel |= proto_info[i].encapsulation == ENCAPSULATION_MODE_TUNNEL;
      proto_info[i].reqid = sr->reqid + 2;
    }

    if (i == sizeof(proto_info) / sizeof(proto_info[0]) - 1)
    {
      impossible();     /* no transform at all! */
    }

    if (tunnel)
    {
      int j;

      inner_spi = st->st_tunnel_out_spi;
      inner_proto = SA_IPIP;
      inner_satype = SADB_X_SATYPE_IPIP;

      proto_info[i].encapsulation = ENCAPSULATION_MODE_TUNNEL;
      for (j = i + 1; proto_info[j].proto; j++)
      {
          proto_info[j].encapsulation = ENCAPSULATION_MODE_TRANSPORT;
      }
    }

    return eroute_connection(sr
      , inner_spi, inner_proto, inner_satype, proto_info + i
      , op, opname);
}

/* compute a (host-order!) SPI to implement the policy in connection c */
ipsec_spi_t
shunt_policy_spi(struct connection *c, bool prospective)
{
    /* note: these are in host order :-( */
    static const ipsec_spi_t shunt_spi[] =
    {
      SPI_TRAP,   /* --initiateontraffic */
      SPI_PASS,   /* --pass */
      SPI_DROP,   /* --drop */
      SPI_REJECT, /* --reject */
    };

    static const ipsec_spi_t fail_spi[] =
    {
      0,    /* --none*/
      SPI_PASS,   /* --failpass */
      SPI_DROP,   /* --faildrop */
      SPI_REJECT, /* --failreject */
    };

    return prospective
      ? shunt_spi[(c->policy & POLICY_SHUNT_MASK) >> POLICY_SHUNT_SHIFT]
      : fail_spi[(c->policy & POLICY_FAIL_MASK) >> POLICY_FAIL_SHIFT];
}

/* Add/replace/delete a shunt eroute.
 * Such an eroute determines the fate of packets without the use
 * of any SAs.  These are defaults, in effect.
 * If a negotiation has not been attempted, use %trap.
 * If negotiation has failed, the choice between %trap/%pass/%drop/%reject
 * is specified in the policy of connection c.
 */
static bool
shunt_eroute(struct connection *c
, struct spd_route *sr
, enum routing_t rt_kind
, unsigned int op, const char *opname)
{
    /* We are constructing a special SAID for the eroute.
     * The destination doesn't seem to matter, but the family does.
     * The protocol is SA_INT -- mark this as shunt.
     * The satype has no meaning, but is required for PF_KEY header!
     * The SPI signifies the kind of shunt.
     */
    ipsec_spi_t spi = shunt_policy_spi(c, rt_kind == RT_ROUTED_PROSPECTIVE);
    bool ok;

    if (spi == 0)
    {
      /* we're supposed to end up with no eroute: rejig op and opname */
      switch (op)
      {
      case ERO_REPLACE:
          /* replace with nothing == delete */
          op = ERO_DELETE;
          opname = "delete";
          break;
      case ERO_ADD:
          /* add nothing == do nothing */
          return TRUE;
      case ERO_DELETE:
          /* delete remains delete */
          break;
      default:
          bad_case(op);
      }
    }
    if (sr->routing == RT_ROUTED_ECLIPSED && c->kind == CK_TEMPLATE)
    {
      /* We think that we have an eroute, but we don't.
       * Adjust the request and account for eclipses.
       */
      passert(eclipsable(sr));
      switch (op)
      {
      case ERO_REPLACE:
          /* really an add */
          op = ERO_ADD;
          opname = "replace eclipsed";
          eclipse_count--;
          break;
      case ERO_DELETE:
          /* delete unnecessary: we don't actually have an eroute */
          eclipse_count--;
          return TRUE;
      case ERO_ADD:
      default:
          bad_case(op);
      }
    }
    else if (eclipse_count > 0 && op == ERO_DELETE && eclipsable(sr))
    {
      /* maybe we are uneclipsing something */
      struct spd_route *esr;
      struct connection *ue = eclipsed(c, &esr);

      if (ue != NULL)
      {
          esr->routing = RT_ROUTED_PROSPECTIVE;
          return shunt_eroute(ue, esr
                        , RT_ROUTED_PROSPECTIVE, ERO_REPLACE, "restoring eclipsed");
      }
    }

    ok = TRUE;
    if (kernel_ops->inbound_eroute)
    {
      ok = raw_eroute(&c->spd.that.host_addr, &c->spd.that.client
          , &c->spd.this.host_addr, &c->spd.this.client
          , htonl(spi), SA_INT, SADB_X_SATYPE_INT
          , 0, null_proto_info, 0
          , op | (SADB_X_SAFLAGS_INFLOW << ERO_FLAG_SHIFT), opname);
    }
    return eroute_connection(sr, htonl(spi), SA_INT, SADB_X_SATYPE_INT
      , null_proto_info, op, opname) && ok;
}


/*
 * This is only called when s is a likely SAID with  trailing protocol i.e.
 * it has the form :-
 *
 *   %<keyword>:p
 *   <ip-proto><spi>@a.b.c.d:p
 *
 * The task here is to remove the ":p" part so that the rest can be read
 * by another routine.
 */
static const char *
read_proto(const char * s, size_t * len, int * transport_proto)
{
    const char * p;
    const char * ugh;
    unsigned long proto;
    size_t l;

    l = *len;
    p = memchr(s, ':', l);
    if (p == 0) {
        *transport_proto = 0;
        return 0;
    }
    ugh = ttoul(p+1, l-((p-s)+1), 10, &proto);
    if (ugh != 0)
        return ugh;
    if (proto > 65535)
        return "protocol number is too large, legal range is 0-65535";
    *len = p-s;
    *transport_proto = proto;
    return 0;
}


/* scan /proc/net/ipsec_eroute every once in a while, looking for:
 *
 * - %hold shunts of which Pluto isn't aware.  This situation could
 *   be caused by lost ACQUIRE messages.  When found, they will
 *   added to orphan_holds.  This in turn will lead to Opportunistic
 *   initiation.
 *
 * - other kinds of shunts that haven't been used recently.  These will be
 *   deleted.  They represent OE failures.
 *
 * - recording recent uses of tunnel eroutes so that rekeying decisions
 *   can be made for OE connections.
 *
 * Here are some sample lines:
 * 10         10.3.2.1.0/24    -> 0.0.0.0/0          => %trap
 * 259        10.3.2.1.115/32  -> 10.19.75.161/32    => tun0x1002@10.19.75.145
 * 71         10.44.73.97/32   -> 0.0.0.0/0          => %trap
 * 4119       10.44.73.97/32   -> 10.114.121.41/32   => %pass
 * Newer versions of KLIPS start each line with a 32-bit packet count.
 * If available, the count is used to detect whether a %pass shunt is in use.
 *
 * NOTE: execution time is quadratic in the number of eroutes since the
 * searching for each is sequential.  If this becomes a problem, faster
 * searches could be implemented (hash or radix tree, for example).
 */
void
scan_proc_shunts(void)
{
    static const char procname[] = "/proc/net/ipsec_eroute";
    FILE *f;
    time_t nw = now();
    int lino;
    struct eroute_info *expired = NULL;

    event_schedule(EVENT_SHUNT_SCAN, SHUNT_SCAN_INTERVAL, NULL);

    DBG(DBG_CONTROL,
      DBG_log("scanning for shunt eroutes")
    )

    /* free any leftover entries: they will be refreshed if still current */
    while (orphaned_holds != NULL)
    {
      struct eroute_info *p = orphaned_holds;

      orphaned_holds = p->next;
      pfree(orphaned_holds);
    }

    /* decode the /proc file.  Don't do anything strenuous to it
     * (certainly no PF_KEY stuff) to minimize the chance that it
     * might change underfoot.
     */

    f = fopen(procname, "r");
    if (f == NULL)
      return;

    /* for each line... */
    for (lino = 1; ; lino++)
    {
      unsigned char buf[1024];  /* should be big enough */
      chunk_t field[10];        /* 10 is loose upper bound */
      chunk_t *ff = NULL;       /* fixed fields (excluding optional count) */
      int fi;
      struct eroute_info eri;
      char *cp;
      err_t context = ""
          , ugh = NULL;

      cp = fgets(buf, sizeof(buf), f);
      if (cp == NULL)
          break;

      /* break out each field
       * Note: if there are too many fields, just stop;
       * it will be diagnosed a little later.
       */
      for (fi = 0; fi < (int)elemsof(field); fi++)
      {
          static const char sep[] = " \t\n";    /* field-separating whitespace */
          size_t w;

          cp += strspn(cp, sep);    /* find start of field */
          w = strcspn(cp, sep);     /* find width of field */
          setchunk(field[fi], cp, w);
          cp += w;
          if (w == 0)
            break;
      }

      /* This odd do-hickey is to share error reporting code.
       * A break will get to that common code.  The setting
       * of "ugh" and "context" parameterize it.
       */
      do {
          /* Old entries have no packet count; new ones do.
           * check if things are as they should be.
           */
          if (fi == 5)
            ff = &field[0];   /* old form, with no count */
          else if (fi == 6)
            ff = &field[1];   /* new form, with count */
          else
          {
            ugh = "has wrong number of fields";
            break;
          }

          if (ff[1].len != 2
          || strncmp(ff[1].ptr, "->", 2) != 0
          || ff[3].len != 2
          || strncmp(ff[3].ptr, "=>", 2) != 0)
          {
            ugh = "is missing -> or =>";
            break;
          }

          /* actually digest fields of interest */

          /* packet count */

          eri.count = 0;
          if (ff != field)
          {
            context = "count field is malformed: ";
            ugh = ttoul(field[0].ptr, field[0].len, 10, &eri.count);
            if (ugh != NULL)
                break;
          }

          /* our client */

          context = "source subnet field malformed: ";
          ugh = ttosubnet(ff[0].ptr, ff[0].len, AF_INET, &eri.ours);
          if (ugh != NULL)
            break;

          /* his client */

          context = "destination subnet field malformed: ";
          ugh = ttosubnet(ff[2].ptr, ff[2].len, AF_INET, &eri.his);
          if (ugh != NULL)
            break;

          /* SAID */

          context = "SA ID field malformed: ";
            ugh = read_proto(ff[4].ptr, &ff[4].len, &eri.transport_proto);
          if (ugh != NULL)
            break;
          ugh = ttosa(ff[4].ptr, ff[4].len, &eri.said);
      } while (FALSE);

      if (ugh != NULL)
      {
          plog("INTERNAL ERROR: %s line %d %s%s"
            , procname, lino, context, ugh);
          continue;     /* ignore rest of line */
      }

      /* Now we have decoded eroute, let's consider it.
       * For shunt eroutes:
       *
       * %hold: if not known, add to orphaned_holds list for initiation
       *    because ACQUIRE might have been lost.
       *
       * %pass, %drop, %reject: determine if idle; if so, blast it away.
       *    Can occur bare (if DNS provided insufficient information)
       *    or with a connection (failure context).
       *    Could even be installed by ipsec manual.
       *
       * %trap: always welcome.
       *
       * For other eroutes: find state and record count change
       */
      if (eri.said.proto == SA_INT)
      {
          /* shunt eroute */
          switch (ntohl(eri.said.spi))
          {
          case SPI_HOLD:
            if (bare_shunt_ptr(&eri.ours, &eri.his, eri.transport_proto) == NULL
            && shunt_owner(&eri.ours, &eri.his) == NULL)
            {
                int ourport = ntohs(portof(&eri.ours.addr));
                int hisport = ntohs(portof(&eri.his.addr));
                char ourst[SUBNETTOT_BUF];
                char hist[SUBNETTOT_BUF];
                char sat[SATOT_BUF];

                subnettot(&eri.ours, 0, ourst, sizeof(ourst));
                subnettot(&eri.his, 0, hist, sizeof(hist));
                satot(&eri.said, 0, sat, sizeof(sat));

                DBG(DBG_CONTROL,
                  DBG_log("add orphaned shunt %s:%d -> %s:%d => %s:%d"
                      , ourst, ourport, hist, hisport, sat, eri.transport_proto)
                 )
                eri.next = orphaned_holds;
                orphaned_holds = clone_thing(eri, "orphaned %hold");
            }
            break;

          case SPI_PASS:
          case SPI_DROP:
          case SPI_REJECT:
            /* nothing sensible to do if we don't have counts */
            if (ff != field)
            {
                struct bare_shunt **bs_pp
                  = bare_shunt_ptr(&eri.ours, &eri.his, eri.transport_proto);

                if (bs_pp != NULL)
                {
                  struct bare_shunt *bs = *bs_pp;

                  if (eri.count != bs->count)
                  {
                      bs->count = eri.count;
                      bs->last_activity = nw;
                  }
                  else if (nw - bs->last_activity > SHUNT_PATIENCE)
                  {
                      eri.next = expired;
                      expired = clone_thing(eri, "expired %pass");
                  }
                }
            }
            break;

          case SPI_TRAP:
            break;

          default:
            bad_case(ntohl(eri.said.spi));
          }
      }
      else
      {
          /* regular (non-shunt) eroute */
          state_eroute_usage(&eri.ours, &eri.his, eri.count, nw);
      }
    } /* for each line */
    fclose(f);

    /* Now that we've finished processing the /proc file,
     * it is safe to delete the expired %pass shunts.
     */
    while (expired != NULL)
    {
      struct eroute_info *p = expired;
      ip_address src, dst;

      networkof(&p->ours, &src);
      networkof(&p->his, &dst);
      (void) replace_bare_shunt(&src, &dst
          , BOTTOM_PRIO /* not used because we are deleting.  This value is a filler */
          , SPI_PASS    /* not used because we are deleting.  This value is a filler */
          , FALSE, p->transport_proto, "delete expired bare shunts");
      expired = p->next;
      pfree(p);
    }
}

static bool
del_spi(ipsec_spi_t spi, int proto
, const ip_address *src, const ip_address *dest)
{
    char text_said[SATOT_BUF];
    struct kernel_sa sa;

    set_text_said(text_said, dest, spi, proto);

    DBG(DBG_KLIPS, DBG_log("delete %s", text_said));

    memset(&sa, 0, sizeof(sa));
    sa.spi = spi;
    sa.proto = proto;
    sa.src = src;
    sa.dst = dest;
    sa.text_said = text_said;

    return kernel_ops->del_sa(&sa);
}

/* Setup a pair of SAs. Code taken from setsa.c and spigrp.c, in
 * ipsec-0.5.
 */

static bool
setup_half_ipsec_sa(struct state *st, bool inbound)
{
    /* Build an inbound or outbound SA */

    struct connection *c = st->st_connection;
    ip_subnet src, dst;
    ip_subnet src_client, dst_client;
    ipsec_spi_t inner_spi = 0;
    u_int proto = 0;
    u_int satype = SADB_SATYPE_UNSPEC;
    bool replace;

    /* SPIs, saved for spigrouping or undoing, if necessary */
    struct kernel_sa
      said[EM_MAXRELSPIS],
      *said_next = said;

    char text_said[SATOT_BUF];
    int encapsulation;

    replace = inbound && (kernel_ops->get_spi != NULL);

    src.maskbits = 0;
    dst.maskbits = 0;

    if (inbound)
    {
      src.addr = c->spd.that.host_addr;
      dst.addr = c->spd.this.host_addr;
      src_client = c->spd.that.client;
      dst_client = c->spd.this.client;
    }
    else
    {
      src.addr = c->spd.this.host_addr,
      dst.addr = c->spd.that.host_addr;
      src_client = c->spd.this.client;
      dst_client = c->spd.that.client;
    }

    encapsulation = ENCAPSULATION_MODE_TRANSPORT;
    if (st->st_ah.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL
      || st->st_esp.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL
      || st->st_ipcomp.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL)
    {
      encapsulation = ENCAPSULATION_MODE_TUNNEL;
    }

    memset(said, 0, sizeof(said));

    /* If we are tunnelling, set up IP in IP pseudo SA */

    if (kernel_ops->inbound_eroute)
    {
      inner_spi = 256;
      proto = SA_IPIP;
      satype = SADB_SATYPE_UNSPEC;
    }
    else if (encapsulation == ENCAPSULATION_MODE_TUNNEL)
    {
      /* XXX hack alert -- we SHOULD NOT HAVE TO HAVE A DIFFERENT SPI
       * XXX FOR IP-in-IP ENCAPSULATION!
       */

      ipsec_spi_t ipip_spi;

      /* Allocate an SPI for the tunnel.
       * Since our peer will never see this,
       * and it comes from its own number space,
       * it is purely a local implementation wart.
       */
      {
          static ipsec_spi_t last_tunnel_spi = IPSEC_DOI_SPI_OUR_MIN;

          ipip_spi = htonl(++last_tunnel_spi);
          if (inbound)
            st->st_tunnel_in_spi = ipip_spi;
          else
            st->st_tunnel_out_spi = ipip_spi;
      }

      set_text_said(text_said
          , &c->spd.that.host_addr, ipip_spi, SA_IPIP);

      said_next->src = &src.addr;
      said_next->dst = &dst.addr;
      said_next->src_client = &src_client;
      said_next->dst_client = &dst_client;
      said_next->spi = ipip_spi;
      said_next->satype = SADB_X_SATYPE_IPIP;
      said_next->text_said = text_said;

      if (!kernel_ops->add_sa(said_next, replace))
          goto fail;

      said_next++;

      inner_spi = ipip_spi;
      proto = SA_IPIP;
      satype = SADB_X_SATYPE_IPIP;
    }

    /* set up IPCOMP SA, if any */

    if (st->st_ipcomp.present)
    {
      ipsec_spi_t ipcomp_spi = inbound? st->st_ipcomp.our_spi : st->st_ipcomp.attrs.spi;
      unsigned compalg;

      switch (st->st_ipcomp.attrs.transid)
      {
          case IPCOMP_DEFLATE:
            compalg = SADB_X_CALG_DEFLATE;
            break;

          default:
            loglog(RC_LOG_SERIOUS, "IPCOMP transform %s not implemented"
                , enum_name(&ipcomp_transformid_names, st->st_ipcomp.attrs.transid));
            goto fail;
      }

      set_text_said(text_said, &dst.addr, ipcomp_spi, SA_COMP);

      said_next->src = &src.addr;
      said_next->dst = &dst.addr;
      said_next->src_client = &src_client;
      said_next->dst_client = &dst_client;
      said_next->spi = ipcomp_spi;
      said_next->satype = SADB_X_SATYPE_COMP;
      said_next->compalg = compalg;
      said_next->encapsulation = encapsulation;
      said_next->reqid = c->spd.reqid + 2;
      said_next->text_said = text_said;

      if (!kernel_ops->add_sa(said_next, replace))
          goto fail;

      said_next++;

      encapsulation = ENCAPSULATION_MODE_TRANSPORT;
    }

    /* set up ESP SA, if any */

    if (st->st_esp.present)
    {
      ipsec_spi_t esp_spi = inbound? st->st_esp.our_spi : st->st_esp.attrs.spi;
      u_char *esp_dst_keymat = inbound? st->st_esp.our_keymat : st->st_esp.peer_keymat;
      const struct esp_info *ei;
      u_int16_t key_len;

      static const struct esp_info esp_info[] = {
          { ESP_NULL, AUTH_ALGORITHM_HMAC_MD5,
            0, HMAC_MD5_KEY_LEN,
            SADB_EALG_NULL, SADB_AALG_MD5_HMAC },
          { ESP_NULL, AUTH_ALGORITHM_HMAC_SHA1,
            0, HMAC_SHA1_KEY_LEN,
            SADB_EALG_NULL, SADB_AALG_SHA1_HMAC },

          { ESP_DES, AUTH_ALGORITHM_NONE,
            DES_CBC_BLOCK_SIZE, 0,
            SADB_EALG_DES_CBC, SADB_AALG_NONE },
          { ESP_DES, AUTH_ALGORITHM_HMAC_MD5,
            DES_CBC_BLOCK_SIZE, HMAC_MD5_KEY_LEN,
            SADB_EALG_DES_CBC, SADB_AALG_MD5_HMAC },
          { ESP_DES, AUTH_ALGORITHM_HMAC_SHA1,
            DES_CBC_BLOCK_SIZE,
            HMAC_SHA1_KEY_LEN, SADB_EALG_DES_CBC, SADB_AALG_SHA1_HMAC },

          { ESP_3DES, AUTH_ALGORITHM_NONE,
            DES_CBC_BLOCK_SIZE * 3, 0,
            SADB_EALG_3DES_CBC, SADB_AALG_NONE },
          { ESP_3DES, AUTH_ALGORITHM_HMAC_MD5,
            DES_CBC_BLOCK_SIZE * 3, HMAC_MD5_KEY_LEN,
            SADB_EALG_3DES_CBC, SADB_AALG_MD5_HMAC },
          { ESP_3DES, AUTH_ALGORITHM_HMAC_SHA1,
            DES_CBC_BLOCK_SIZE * 3, HMAC_SHA1_KEY_LEN,
            SADB_EALG_3DES_CBC, SADB_AALG_SHA1_HMAC },
      };

#ifdef NAT_TRAVERSAL
      u_int8_t natt_type = 0;
      u_int16_t natt_sport = 0, natt_dport = 0;
      ip_address natt_oa;

      if (st->nat_traversal & NAT_T_DETECTED) {
          natt_type = (st->nat_traversal & NAT_T_WITH_PORT_FLOATING) ?
            ESPINUDP_WITH_NON_ESP : ESPINUDP_WITH_NON_IKE;
          natt_sport = inbound? c->spd.that.host_port : c->spd.this.host_port;
          natt_dport = inbound? c->spd.this.host_port : c->spd.that.host_port;
          natt_oa = st->nat_oa;
      }
#endif

      for (ei = esp_info; ; ei++)
      {
          if (ei == &esp_info[elemsof(esp_info)])
          {
            /* Check for additional kernel alg */
#ifndef NO_KERNEL_ALG
            if ((ei=kernel_alg_esp_info(st->st_esp.attrs.transid, 
                              st->st_esp.attrs.auth))!=NULL) {
                  break;
            }
#endif

            /* note: enum_show may use a static buffer, so two
             * calls in one printf would be a mistake.
             * enum_name does the same job, without a static buffer,
             * assuming the name will be found.
             */
            loglog(RC_LOG_SERIOUS, "ESP transform %s / auth %s not implemented yet"
                , enum_name(&esp_transformid_names, st->st_esp.attrs.transid)
                , enum_name(&auth_alg_names, st->st_esp.attrs.auth));
            goto fail;
          }

          if (st->st_esp.attrs.transid == ei->transid
          && st->st_esp.attrs.auth == ei->auth)
            break;
      }

      key_len = st->st_esp.attrs.key_len/8;
      if (key_len) {
            /* XXX: must change to check valid _range_ key_len */
            if (key_len > ei->enckeylen) {
                  loglog(RC_LOG_SERIOUS, "ESP transform %s passed key_len=%d > %d",
                  enum_name(&esp_transformid_names, st->st_esp.attrs.transid),
                  (int)key_len, (int)ei->enckeylen);
                  goto fail;
            }
      } else {
            key_len = ei->enckeylen;
      }
      /* Grrrrr.... f*cking 7 bits jurassic algos  */

      /* 168 bits in kernel, need 192 bits for keymat_len */
      if (ei->transid == ESP_3DES && key_len == 21) 
            key_len = 24;

      /* 56 bits in kernel, need 64 bits for keymat_len */
      if (ei->transid == ESP_DES && key_len == 7) 
            key_len = 8;

      /* divide up keying material */
      /* passert(st->st_esp.keymat_len == ei->enckeylen + ei->authkeylen); */
      DBG(DBG_KLIPS|DBG_CONTROL|DBG_PARSING, 
            if(st->st_esp.keymat_len != key_len + ei->authkeylen)
                  DBG_log("keymat_len=%d key_len=%d authkeylen=%d",
                        st->st_esp.keymat_len, (int)key_len, (int)ei->authkeylen);
      );
      passert(st->st_esp.keymat_len == key_len + ei->authkeylen);

      set_text_said(text_said, &dst.addr, esp_spi, SA_ESP);

      said_next->src = &src.addr;
      said_next->dst = &dst.addr;
      said_next->src_client = &src_client;
      said_next->dst_client = &dst_client;
      said_next->spi = esp_spi;
      said_next->satype = SADB_SATYPE_ESP;
      said_next->replay_window = (kernel_ops->type == KERNEL_TYPE_KLIPS) ? REPLAY_WINDOW : REPLAY_WINDOW_XFRM;
      said_next->authalg = ei->authalg;
      said_next->authkeylen = ei->authkeylen;
      /* said_next->authkey = esp_dst_keymat + ei->enckeylen; */
      said_next->authkey = esp_dst_keymat + key_len;
      said_next->encalg = ei->encryptalg;
      /* said_next->enckeylen = ei->enckeylen; */
      said_next->enckeylen = key_len;
      said_next->enckey = esp_dst_keymat;
      said_next->encapsulation = encapsulation;
      said_next->reqid = c->spd.reqid + 1;
#ifdef NAT_TRAVERSAL
      said_next->natt_sport = natt_sport;
      said_next->natt_dport = natt_dport;
      said_next->transid = st->st_esp.attrs.transid;
      said_next->natt_type = natt_type;
      said_next->natt_oa = &natt_oa;
#endif      
      said_next->text_said = text_said;

      if (!kernel_ops->add_sa(said_next, replace))
          goto fail;

      said_next++;

      encapsulation = ENCAPSULATION_MODE_TRANSPORT;
    }

    /* set up AH SA, if any */

    if (st->st_ah.present)
    {
      ipsec_spi_t ah_spi = inbound? st->st_ah.our_spi : st->st_ah.attrs.spi;
      u_char *ah_dst_keymat = inbound? st->st_ah.our_keymat : st->st_ah.peer_keymat;

      unsigned char authalg;

      switch (st->st_ah.attrs.auth)
      {
      case AUTH_ALGORITHM_HMAC_MD5:
          authalg = SADB_AALG_MD5_HMAC;
          break;

      case AUTH_ALGORITHM_HMAC_SHA1:
          authalg = SADB_AALG_SHA1_HMAC;
          break;

      default:
          loglog(RC_LOG_SERIOUS, "%s not implemented yet"
            , enum_show(&auth_alg_names, st->st_ah.attrs.auth));
          goto fail;
      }

      set_text_said(text_said, &dst.addr, ah_spi, SA_AH);

      said_next->src = &src.addr;
      said_next->dst = &dst.addr;
      said_next->src_client = &src_client;
      said_next->dst_client = &dst_client;
      said_next->spi = ah_spi;
      said_next->satype = SADB_SATYPE_AH;
      said_next->replay_window = (kernel_ops->type == KERNEL_TYPE_KLIPS) ? REPLAY_WINDOW : REPLAY_WINDOW_XFRM;
      said_next->authalg = authalg;
      said_next->authkeylen = st->st_ah.keymat_len;
      said_next->authkey = ah_dst_keymat;
      said_next->encapsulation = encapsulation;
      said_next->reqid = c->spd.reqid;
      said_next->text_said = text_said;

      if (!kernel_ops->add_sa(said_next, replace))
          goto fail;

      said_next++;

      encapsulation = ENCAPSULATION_MODE_TRANSPORT;
    }

    if (st->st_ah.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL
    || st->st_esp.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL
    || st->st_ipcomp.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL)
    {
      encapsulation = ENCAPSULATION_MODE_TUNNEL;
    }

    if (kernel_ops->inbound_eroute ? c->spd.eroute_owner == SOS_NOBODY
      : encapsulation == ENCAPSULATION_MODE_TUNNEL)
    {
      /* If inbound, and policy does not specifie DISABLEARRIVALCHECK,
       * tell KLIPS to enforce the IP addresses appropriate for this tunnel.
       * Note reversed ends.
       * Not much to be done on failure.
       */
      if (inbound && (c->policy & POLICY_DISABLEARRIVALCHECK) == 0)
      {
          struct pfkey_proto_info proto_info[4];
          int i = 0;
          
          if (st->st_ipcomp.present)
          {
            proto_info[i].proto = IPPROTO_COMP;
            proto_info[i].encapsulation = st->st_ipcomp.attrs.encapsulation;
            proto_info[i].reqid = c->spd.reqid + 2;
            i++;
          }
          
          if (st->st_esp.present)
          {
            proto_info[i].proto = IPPROTO_ESP;
            proto_info[i].encapsulation = st->st_esp.attrs.encapsulation;
            proto_info[i].reqid = c->spd.reqid + 1;
            i++;
          }
          
          if (st->st_ah.present)
          {
            proto_info[i].proto = IPPROTO_AH;
            proto_info[i].encapsulation = st->st_ah.attrs.encapsulation;
            proto_info[i].reqid = c->spd.reqid;
            i++;
          }
          
          proto_info[i].proto = 0;
          
          if (kernel_ops->inbound_eroute
            && encapsulation == ENCAPSULATION_MODE_TUNNEL)
          {
            proto_info[0].encapsulation = ENCAPSULATION_MODE_TUNNEL;
            for (i = 1; proto_info[i].proto; i++)
            {
                proto_info[i].encapsulation = ENCAPSULATION_MODE_TRANSPORT;
            }
          }
          
          /* MCR - should be passed a spd_eroute structure here */
          (void) raw_eroute(&c->spd.that.host_addr, &c->spd.that.client
                        , &c->spd.this.host_addr, &c->spd.this.client
                        , inner_spi, proto, satype, c->spd.this.protocol
                        , proto_info, 0
                        , ERO_ADD_INBOUND, "add inbound");
      }
    }

    /* If there are multiple SPIs, group them. */
    
    if (kernel_ops->grp_sa && said_next > &said[1])
    {
      struct kernel_sa *s;
      
      /* group SAs, two at a time, inner to outer (backwards in said[])
       * The grouping is by pairs.  So if said[] contains ah esp ipip,
       * the grouping would be ipip:esp, esp:ah.
       */
      for (s = said; s < said_next-1; s++)
      {
          char
            text_said0[SATOT_BUF],
            text_said1[SATOT_BUF];

          /* group s[1] and s[0], in that order */
          
          set_text_said(text_said0, s[0].dst, s[0].spi, s[0].proto);
          set_text_said(text_said1, s[1].dst, s[1].spi, s[1].proto);
          
          DBG(DBG_KLIPS, DBG_log("grouping %s and %s", text_said1, text_said0));
          
          s[0].text_said = text_said0;
          s[1].text_said = text_said1;
          
          if (!kernel_ops->grp_sa(s + 1, s))
            goto fail;
      }
      /* could update said, but it will not be used */
    }

    return TRUE;

fail:
    {
      /* undo the done SPIs */
      while (said_next-- != said)
          (void) del_spi(said_next->spi, said_next->proto
            , &src.addr, said_next->dst);
      return FALSE;
    }
}

/* teardown_ipsec_sa is a canibalized version of setup_ipsec_sa */

static bool
teardown_half_ipsec_sa(struct state *st, bool inbound)
{
    /* We need to delete AH, ESP, and IP in IP SPIs.
     * But if there is more than one, they have been grouped
     * so deleting any one will do.  So we just delete the
     * first one found.  It may or may not be the only one.
     */
    struct connection *c = st->st_connection;
    struct {
      unsigned proto;
      struct ipsec_proto_info *info;
    } protos[4];
    int i;
    bool result;

    i = 0;
    if (kernel_ops->inbound_eroute && inbound
      && c->spd.eroute_owner == SOS_NOBODY)
    {
      (void) raw_eroute(&c->spd.that.host_addr, &c->spd.that.client
          , &c->spd.this.host_addr, &c->spd.this.client
          , 256, IPSEC_PROTO_ANY, SADB_SATYPE_UNSPEC, c->spd.this.protocol
          , null_proto_info, 0
          , ERO_DEL_INBOUND, "delete inbound");
    }

    if (!kernel_ops->grp_sa)
    {
      if (st->st_ah.present)
      {
          protos[i].info = &st->st_ah;
          protos[i].proto = SA_AH;
          i++;
      }

      if (st->st_esp.present)
      {
          protos[i].info = &st->st_esp;
          protos[i].proto = SA_ESP;
          i++;
      }

      if (st->st_ipcomp.present)
      {
          protos[i].info = &st->st_ipcomp;
          protos[i].proto = SA_COMP;
          i++;
      }
    }
    else if (st->st_ah.present)
    {
      protos[i].info = &st->st_ah;
      protos[i].proto = SA_AH;
      i++;
    }
    else if (st->st_esp.present)
    {
      protos[i].info = &st->st_esp;
      protos[i].proto = SA_ESP;
      i++;
    }
    else
    {
      impossible();     /* neither AH nor ESP in outbound SA bundle! */
    }
    protos[i].proto = 0;

    result = TRUE;
    for (i = 0; protos[i].proto; i++)
    {
      unsigned proto = protos[i].proto;
      ipsec_spi_t spi;
      const ip_address *src, *dst;

      if (inbound)
      {
          spi = protos[i].info->our_spi;
          src = &c->spd.that.host_addr;
          dst = &c->spd.this.host_addr;
      }
      else
      {
          spi = protos[i].info->attrs.spi;
          src = &c->spd.this.host_addr;
          dst = &c->spd.that.host_addr;
      }

      result &= del_spi(spi, proto, src, dst);
    }
    return result;
}

/*
 * get information about a given sa
 */
bool
get_sa_info(struct state *st, bool inbound, u_int *bytes, time_t *use_time)
{
    char text_said[SATOT_BUF];
    struct kernel_sa sa;
    struct connection *c = st->st_connection;

    *use_time = UNDEFINED_TIME;

    if (kernel_ops->get_sa == NULL || !st->st_esp.present)
      return FALSE;

    memset(&sa, 0, sizeof(sa));
    sa.proto = SA_ESP;

    if (inbound)
    {
      sa.src = &c->spd.that.host_addr;
      sa.dst = &c->spd.this.host_addr;
      sa.spi = st->st_esp.our_spi;
    }
    else
    {
      sa.src = &c->spd.this.host_addr;
      sa.dst = &c->spd.that.host_addr;
      sa.spi = st->st_esp.attrs.spi;
    }
    set_text_said(text_said, sa.dst, sa.spi, sa.proto);

    sa.text_said = text_said;

    DBG(DBG_KLIPS,
      DBG_log("get %s", text_said)
    )
    if (!kernel_ops->get_sa(&sa, bytes))
      return FALSE;
    DBG(DBG_KLIPS,
      DBG_log("  current: %d bytes", *bytes)
    )

    if (st->st_serialno == c->spd.eroute_owner)
    {
      DBG(DBG_KLIPS,
          DBG_log("get %sbound policy with reqid %u"
            , inbound? "in":"out", (u_int)c->spd.reqid + 1)
      )
      sa.transport_proto = c->spd.this.protocol;
      sa.encapsulation = st->st_esp.attrs.encapsulation;

      if (inbound)
      {
          sa.src_client = &c->spd.that.client;
          sa.dst_client = &c->spd.this.client;
      }
      else
      {
          sa.src_client = &c->spd.this.client;
          sa.dst_client = &c->spd.that.client;
      }
      if (!kernel_ops->get_policy(&sa, inbound, use_time))
          return FALSE;
      DBG(DBG_KLIPS,
          DBG_log("  use_time: %s", timetoa(use_time, FALSE))
      )
    }
    return TRUE;
}

const struct kernel_ops *kernel_ops;

#endif /* KLIPS */

void
init_kernel(void)
{
#ifdef KLIPS

    if (no_klips)
    {
      kernel_ops = &noklips_kernel_ops;
      return;
    }

    init_pfkey();

    kernel_ops = &klips_kernel_ops;

#if defined(linux) && defined(KERNEL26_SUPPORT)
    {
      bool linux_ipsec = 0;
      struct stat buf;

      linux_ipsec = (stat("/proc/net/pfkey", &buf) == 0);
      if (linux_ipsec)
          {
            plog("Using Linux 2.6 IPsec interface code");
            kernel_ops = &linux_kernel_ops;
          }
      else
          {
            plog("Using KLIPS IPsec interface code");
          }
    }
#endif

    if (kernel_ops->init)
    {
      kernel_ops->init();
    }

    /* register SA types that we can negotiate */
    can_do_IPcomp = FALSE;  /* until we get a response from KLIPS */
    kernel_ops->pfkey_register();

    if (!kernel_ops->policy_lifetime)
    {
      event_schedule(EVENT_SHUNT_SCAN, SHUNT_SCAN_INTERVAL, NULL);
    }
#endif
}

/* Note: install_inbound_ipsec_sa is only used by the Responder.
 * The Responder will subsequently use install_ipsec_sa for the outbound.
 * The Initiator uses install_ipsec_sa to install both at once.
 */
bool
install_inbound_ipsec_sa(struct state *st)
{
    struct connection *const c = st->st_connection;

    /* If our peer has a fixed-address client, check if we already
     * have a route for that client that conflicts.  We will take this
     * as proof that that route and the connections using it are
     * obsolete and should be eliminated.  Interestingly, this is
     * the only case in which we can tell that a connection is obsolete.
     */
    passert(c->kind == CK_PERMANENT || c->kind == CK_INSTANCE);
    if (c->spd.that.has_client)
    {
      for (;;)
      {
          struct spd_route *esr;
          struct connection *o = route_owner(c, &esr, NULL, NULL);

          if (o == NULL)
            break;      /* nobody has a route */

          /* note: we ignore the client addresses at this end */
          if (sameaddr(&o->spd.that.host_addr, &c->spd.that.host_addr)
          && o->interface == c->interface)
            break;      /* existing route is compatible */

          if (o->kind == CK_TEMPLATE && streq(o->name, c->name))
            break;      /* ??? is this good enough?? */

          loglog(RC_LOG_SERIOUS, "route to peer's client conflicts with \"%s\" %s; releasing old connection to free the route"
            , o->name, ip_str(&o->spd.that.host_addr));
          release_connection(o, FALSE);
      }
    }

    DBG(DBG_CONTROL, DBG_log("install_inbound_ipsec_sa() checking if we can route"));
    /* check that we will be able to route and eroute */
    switch (could_route(c))
    {
    case route_easy:
    case route_nearconflict:
      break;

    default:
      return FALSE;
    }

#ifdef KLIPS
    /* (attempt to) actually set up the SAs */
    return setup_half_ipsec_sa(st, TRUE);
#else /* !KLIPS */
    DBG(DBG_CONTROL, DBG_log("install_inbound_ipsec_sa()"));
    return TRUE;
#endif /* !KLIPS */
}

/* Install a route and then a prospective shunt eroute or an SA group eroute.
 * Assumption: could_route gave a go-ahead.
 * Any SA Group must have already been created.
 * On failure, steps will be unwound.
 */
bool
route_and_eroute(struct connection *c USED_BY_KLIPS
             , struct spd_route *sr USED_BY_KLIPS
             , struct state *st USED_BY_KLIPS)
{
#ifdef KLIPS
    struct spd_route *esr;
    struct spd_route *rosr;
    struct connection *ero    /* who, if anyone, owns our eroute? */
      , *ro = route_owner(c, &rosr, &ero, &esr);
    bool eroute_installed = FALSE
      , firewall_notified = FALSE
      , route_installed = FALSE;

    struct connection *ero_top;
    struct bare_shunt **bspp;

    DBG(DBG_CONTROLMORE,
      DBG_log("route_and_eroute with c: %s (next: %s) ero:%s esr:{%p} ro:%s rosr:{%p} and state: %lu"
            , c->name
            , (c->policy_next ? c->policy_next->name : "none")
            , ero ? ero->name : "null"
            , esr 
            , ro ? ro->name : "null"
            , rosr
            , st ? st->st_serialno : 0));

    /* look along the chain of policies for one with the same name */
    ero_top = ero;

#if 0
    /* XXX - mcr this made sense before, and likely will make sense
     * again, so I'l leaving this to remind me what is up */
    if (ero!= NULL && ero->routing == RT_UNROUTED_KEYED)
      ero = NULL;

    for (ero2 = ero; ero2 != NULL; ero2 = ero->policy_next)
      if ((ero2->kind == CK_TEMPLATE || ero2->kind==CK_SECONDARY)
      && streq(ero2->name, c->name))
          break;
#endif

    bspp = (ero == NULL)
      ? bare_shunt_ptr(&sr->this.client, &sr->that.client, sr->this.protocol)
      : NULL;

    /* install the eroute */

    passert(bspp == NULL || ero == NULL); /* only one non-NULL */

    if (bspp != NULL || ero != NULL)
    {
      /* We're replacing an eroute */

      /* if no state provided, then install a shunt for later */
      if (st == NULL)
          eroute_installed = shunt_eroute(c, sr, RT_ROUTED_PROSPECTIVE
                                  , ERO_REPLACE, "replace");
      else
          eroute_installed = sag_eroute(st, sr, ERO_REPLACE, "replace");

#if 0
      /* XXX - MCR. I previously felt that this was a bogus check */
      if (ero != NULL && ero != c && esr != sr)
      {
          /* By elimination, we must be eclipsing ero.  Check. */
          passert(ero->kind == CK_TEMPLATE && streq(ero->name, c->name));
          passert(LHAS(LELEM(RT_ROUTED_PROSPECTIVE) | LELEM(RT_ROUTED_ECLIPSED)
            , esr->routing));
          passert(samesubnet(&esr->this.client, &sr->this.client)
            && samesubnet(&esr->that.client, &sr->that.client));
      }
#endif
      /* remember to free bspp iff we make it out of here alive */
    }
    else
    {
      /* we're adding an eroute */

      /* if no state provided, then install a shunt for later */
      if (st == NULL)
          eroute_installed = shunt_eroute(c, sr, RT_ROUTED_PROSPECTIVE
                                  , ERO_ADD, "add");
      else
          eroute_installed = sag_eroute(st, sr, ERO_ADD, "add");
    }

    /* notify the firewall of a new tunnel */

    if (eroute_installed)
    {
      /* do we have to notify the firewall?  Yes, if we are installing
       * a tunnel eroute and the firewall wasn't notified
       * for a previous tunnel with the same clients.  Any Previous
       * tunnel would have to be for our connection, so the actual
       * test is simple.
       */
      firewall_notified = st == NULL      /* not a tunnel eroute */
          || sr->eroute_owner != SOS_NOBODY     /* already notified */
          || do_command(c, sr, "up");     /* go ahead and notify */
    }

    /* install the route */

    DBG(DBG_CONTROL,
      DBG_log("route_and_eroute: firewall_notified: %s"
            , firewall_notified ? "true" : "false"));
    if (!firewall_notified)
    {
      /* we're in trouble -- don't do routing */
    }
    else if (ro == NULL)
    {
      /* a new route: no deletion required, but preparation is */
      (void) do_command(c, sr, "prepare");      /* just in case; ignore failure */
      route_installed = do_command(c, sr, "route");
    }
    else if (routed(sr->routing)
    || routes_agree(ro, c))
    {
      route_installed = TRUE; /* nothing to be done */
    }
    else
    {
      /* Some other connection must own the route
       * and the route must disagree.  But since could_route
       * must have allowed our stealing it, we'll do so.
       *
       * A feature of LINUX allows us to install the new route
       * before deleting the old if the nexthops differ.
       * This reduces the "window of vulnerability" when packets
       * might flow in the clear.
       */
      if (sameaddr(&sr->this.host_nexthop, &esr->this.host_nexthop))
      {
          (void) do_command(ro, sr, "unroute");
          route_installed = do_command(c, sr, "route");
      }
      else
      {
          route_installed = do_command(c, sr, "route");
          (void) do_command(ro, sr, "unroute");
      }

      /* record unrouting */
      if (route_installed)
      {
          do {
            passert(!erouted(rosr->routing));
            rosr->routing = RT_UNROUTED;

            /* no need to keep old value */
            ro = route_owner(c, &rosr, NULL, NULL);
          } while (ro != NULL);
      }
    }

    /* all done -- clean up */
    if (route_installed)
    {
      /* Success! */

      if (bspp != NULL)
      {
          free_bare_shunt(bspp);
      }
      else if (ero != NULL && ero != c)
      {
          /* check if ero is an ancestor of c. */
          struct connection *ero2;

          for (ero2 = c; ero2 != NULL && ero2 != c; ero2 = ero2->policy_next)
            ;

          if (ero2 == NULL)
          {
            /* By elimination, we must be eclipsing ero.  Checked above. */
            if (ero->spd.routing != RT_ROUTED_ECLIPSED)
            {
                ero->spd.routing = RT_ROUTED_ECLIPSED;
                eclipse_count++;
            }
          }
      }

      if (st == NULL)
      {
          passert(sr->eroute_owner == SOS_NOBODY);
          sr->routing = RT_ROUTED_PROSPECTIVE;
      }
      else
      {
          char cib[CONN_INST_BUF];
          sr->routing = RT_ROUTED_TUNNEL;

          DBG(DBG_CONTROL,
            DBG_log("route_and_eroute: instance \"%s\"%s, setting eroute_owner {spd=%p,sr=%p} to #%ld (was #%ld) (newest_ipsec_sa=#%ld)"
                  , st->st_connection->name
                  , (fmt_conn_instance(st->st_connection, cib), cib)
                  , &st->st_connection->spd, sr
                  , st->st_serialno
                  , sr->eroute_owner
                  , st->st_connection->newest_ipsec_sa));
          sr->eroute_owner = st->st_serialno;
      }

      return TRUE;
    }
    else
    {
      /* Failure!  Unwind our work. */
      if (firewall_notified && sr->eroute_owner == SOS_NOBODY)
          (void) do_command(c, sr, "down");

      if (eroute_installed)
      {
          /* Restore original eroute, if we can.
           * Since there is nothing much to be done if the restoration
           * fails, ignore success or failure.
           */
          if (bspp != NULL)
          {
            /* Restore old bare_shunt.
             * I don't think that this case is very likely.
             * Normally a bare shunt would have been assigned
             * to a connection before we've gotten this far.
             */
            struct bare_shunt *bs = *bspp;

            (void) raw_eroute(&bs->said.dst     /* should be useless */
                , &bs->ours
                , &bs->said.dst     /* should be useless */
                , &bs->his
                , bs->said.spi      /* network order */
                , SA_INT
                , SADB_X_SATYPE_INT
                , 0
                , null_proto_info
                , SHUNT_PATIENCE
                , ERO_REPLACE, "restore");
          }
          else if (ero != NULL)
          {
            /* restore ero's former glory */
            if (esr->eroute_owner == SOS_NOBODY)
            {
                /* note: normal or eclipse case */
                (void) shunt_eroute(ero, esr
                              , esr->routing, ERO_REPLACE, "restore");
            }
            else
            {
                /* Try to find state that owned eroute.
                 * Don't do anything if it cannot be found.
                 * This case isn't likely since we don't run
                 * the updown script when replacing a SA group
                 * with its successor (for the same conn).
                 */
                struct state *ost = state_with_serialno(esr->eroute_owner);

                if (ost != NULL)
                  (void) sag_eroute(ost, esr, ERO_REPLACE, "restore");
            }
          }
          else
          {
            /* there was no previous eroute: delete whatever we installed */
            if (st == NULL)
                (void) shunt_eroute(c, sr
                              , sr->routing, ERO_DELETE, "delete");
            else
                (void) sag_eroute(st, sr
                              , ERO_DELETE, "delete");
          }
      }

      return FALSE;
    }
#else /* !KLIPS */
    return TRUE;
#endif /* !KLIPS */
}

bool
install_ipsec_sa(struct state *st, bool inbound_also USED_BY_KLIPS)
{
#ifdef KLIPS
    struct spd_route *sr;

    DBG(DBG_CONTROL, DBG_log("install_ipsec_sa() for #%ld: %s"
                       , st->st_serialno
                       , inbound_also?
                       "inbound and outbound" : "outbound only"));

    switch (could_route(st->st_connection))
    {
    case route_easy:
    case route_nearconflict:
      break;

    default:
      return FALSE;
    }

    /* (attempt to) actually set up the SA group */
    if ((inbound_also && !setup_half_ipsec_sa(st, TRUE))
    || !setup_half_ipsec_sa(st, FALSE))
      return FALSE;

    for (sr = &st->st_connection->spd; sr != NULL; sr = sr->next)
    {
      DBG(DBG_CONTROL, DBG_log("sr for #%ld: %s"
                         , st->st_serialno
                         , enum_name(&routing_story, sr->routing)));

      /*
       * if the eroute owner is not us, then make it us.
       * See test co-terminal-02, pluto-rekey-01, pluto-unit-02/oppo-twice
       */
      pexpect(sr->eroute_owner == SOS_NOBODY
            || sr->routing >= RT_ROUTED_TUNNEL);

      if (sr->eroute_owner != st->st_serialno
          && sr->routing != RT_UNROUTED_KEYED)
      {
          if (!route_and_eroute(st->st_connection, sr, st))
          {
            delete_ipsec_sa(st, FALSE);
            /* XXX go and unroute any SRs that were successfully
             * routed already.
             */
            return FALSE;
          }
      }
    }
#else /* !KLIPS */
    DBG(DBG_CONTROL, DBG_log("install_ipsec_sa() %s"
      , inbound_also? "inbound and oubound" : "outbound only"));

    switch (could_route(st->st_connection))
    {
    case route_easy:
    case route_nearconflict:
      break;

    default:
      return FALSE;
    }


#endif /* !KLIPS */

    return TRUE;
}

/* delete an IPSEC SA.
 * we may not succeed, but we bull ahead anyway because
 * we cannot do anything better by recognizing failure
 */
void
delete_ipsec_sa(struct state *st USED_BY_KLIPS, bool inbound_only USED_BY_KLIPS)
{
#ifdef KLIPS
    if (!inbound_only)
    {
      /* If the state is the eroute owner, we must adjust
       * the routing for the connection.
       */
      struct connection *c = st->st_connection;
      struct spd_route *sr;

      passert(st->st_connection);

      for (sr = &c->spd; sr; sr = sr->next)
      {
          if (sr->eroute_owner == st->st_serialno
          && sr->routing == RT_ROUTED_TUNNEL)
          {
            sr->eroute_owner = SOS_NOBODY;

            /* Routing should become RT_ROUTED_FAILURE,
             * but if POLICY_FAIL_NONE, then we just go
             * right back to RT_ROUTED_PROSPECTIVE as if no
             * failure happened.
             */
            sr->routing = (c->policy & POLICY_FAIL_MASK) == POLICY_FAIL_NONE
                ? RT_ROUTED_PROSPECTIVE : RT_ROUTED_FAILURE;

            (void) do_command(c, sr, "down");
            if ((c->policy & POLICY_DONT_REKEY)
            && c->kind == CK_INSTANCE)
            {
                /* in this special case, even if the connection
                 * is still alive (due to an ISAKMP SA),
                 * we get rid of routing.
                 * Even though there is still an eroute, the c->routing
                 * setting will convince unroute_connection to delete it.
                 * unroute_connection would be upset if c->routing == RT_ROUTED_TUNNEL
                 */
                unroute_connection(c);
            }
            else
            {
                (void) shunt_eroute(c, sr, sr->routing, ERO_REPLACE, "replace with shunt");
            }
          }
      }
      (void) teardown_half_ipsec_sa(st, FALSE);
    }
    (void) teardown_half_ipsec_sa(st, TRUE);
#else /* !KLIPS */
    DBG(DBG_CONTROL, DBG_log("if I knew how, I'd eroute() and teardown_ipsec_sa()"));
#endif /* !KLIPS */
}
#ifdef NAT_TRAVERSAL
#ifdef KLIPS
static bool update_nat_t_ipsec_esp_sa (struct state *st, bool inbound)
{
      struct connection *c = st->st_connection;
      char text_said[SATOT_BUF];
      struct kernel_sa sa;    
      ip_address
            src = inbound? c->spd.that.host_addr : c->spd.this.host_addr,
            dst = inbound? c->spd.this.host_addr : c->spd.that.host_addr;
            

      ipsec_spi_t esp_spi = inbound? st->st_esp.our_spi : st->st_esp.attrs.spi;

      u_int16_t
            natt_sport = inbound? c->spd.that.host_port : c->spd.this.host_port,
            natt_dport = inbound? c->spd.this.host_port : c->spd.that.host_port;

      set_text_said(text_said, &dst, esp_spi, SA_ESP);
            
      memset(&sa, 0, sizeof(sa));
      sa.spi = esp_spi;
      sa.src = &src;
      sa.dst = &dst;
      sa.text_said = text_said;
      sa.authalg = alg_info_esp_aa2sadb(st->st_esp.attrs.auth);
      sa.natt_sport = natt_sport;
      sa.natt_dport = natt_dport;
      sa.transid = st->st_esp.attrs.transid;
      
        return kernel_ops->add_sa(&sa, TRUE);

}
#endif

bool update_ipsec_sa (struct state *st USED_BY_KLIPS)
{
#ifdef KLIPS
      if (IS_IPSEC_SA_ESTABLISHED(st->st_state)) {
            if ((st->st_esp.present) && (
                  (!update_nat_t_ipsec_esp_sa (st, TRUE)) ||
                  (!update_nat_t_ipsec_esp_sa (st, FALSE)))) {
                  return FALSE;
            }
      }
      else if (IS_ONLY_INBOUND_IPSEC_SA_ESTABLISHED(st->st_state)) {
            if ((st->st_esp.present) && (!update_nat_t_ipsec_esp_sa (st, FALSE))) {
                  return FALSE;
            }
      }
      else {
            DBG_log("assert failed at %s:%d st_state=%d", __FILE__, __LINE__,
                  st->st_state);
            return FALSE;
      }
      return TRUE;
#else /* !KLIPS */
    DBG(DBG_CONTROL, DBG_log("if I knew how, I'd update_ipsec_sa()"));
    return TRUE;
#endif /* !KLIPS */
}
#endif

/* Check if there was traffic on given SA during the last idle_max
 * seconds. If TRUE, the SA was idle and DPD exchange should be performed.
 * If FALSE, DPD is not necessary. We also return TRUE for errors, as they
 * could mean that the SA is broken and needs to be replace anyway.
 */
bool
was_eroute_idle(struct state *st, time_t idle_max, time_t *idle_time)
{
    static const char procname[] = "/proc/net/ipsec_spi";
    FILE *f;
    char buf[1024];
    u_int bytes;
    int ret = TRUE;

    passert(st != NULL);

    f = fopen(procname, "r");
    if (f == NULL)
    {
      /* Can't open the file, perhaps were are on 26sec? */
      time_t use_time;

      if (get_sa_info(st, TRUE, &bytes, &use_time)
      && use_time != UNDEFINED_TIME)
      {
          *idle_time = time(NULL) - use_time;
          ret = *idle_time >= idle_max;
      }
    }
    else 
    {
      while (f != NULL)
      {
          char *line;
          char text_said[SATOT_BUF];
          u_int8_t proto = 0;
          ip_address dst;
          ip_said said;
          ipsec_spi_t spi = 0;
          static const char idle[] = "idle=";

          dst = st->st_connection->spd.this.host_addr; /* inbound SA */
          if (st->st_ah.present)
          {
            proto = SA_AH;
            spi = st->st_ah.our_spi;
          }
          if (st->st_esp.present)
          {
            proto = SA_ESP;
            spi = st->st_esp.our_spi;
          }

          if (proto == 0 && spi == 0)
          {
            ret = TRUE;
            break;
          }

          initsaid(&dst, spi, proto, &said);
          satot(&said, 'x', text_said, SATOT_BUF);

          line = fgets(buf, sizeof(buf), f);
          if (line == NULL)
          {
            /* Reached end of list */
            ret = TRUE;
            break;
          }

          if (strncmp(line, text_said, strlen(text_said)) == 0)
          {
            /* we found a match, now try to find idle= */
            char *p = strstr(line, idle);

            if (p == NULL)
            {
                /* SAs which haven't been used yet don't have it */
                ret = TRUE; /* it didn't have traffic */
                break;
            }
            p += sizeof(idle)-1;
            if (*p == '\0')
            {
                ret = TRUE; /* be paranoid */
                break;
            }
            if (sscanf(p, "%d", (int *) idle_time) <= 0)
            {
                ret = TRUE;
                break;
            }
            if (*idle_time >= idle_max)
            {
                ret = TRUE;
                break;
            }
            else
            {
                ret = FALSE;
                break;
            }
          }
      }
      fclose(f);
    }
    return ret;
}

Generated by  Doxygen 1.6.0   Back to index