Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:pbaudis
unscd
nscd-0.39.c
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File nscd-0.39.c of Package unscd
/* This file is part of unscd, a complete nscd replacement. * Copyright (C) 2007 Denys Vlasenko. Licensed under the GPL version 2. */ /* unscd is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * unscd is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You can download the GNU General Public License from the GNU website * at http://www.gnu.org/ or write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ /* Build instructions: gcc -Os -o nscd nscd.c gcc -fomit-frame-pointer -Wl,--sort-section -Wl,alignment -Wl,--sort-common -Os -o nscd nscd.c Description: nscd problems are not exactly unheard of. Over the years, there were quite a bit of bugs in it. This leads people to invent babysitters which restart crashed/hung nscd. This is ugly. After looking at nscd source in glibc I arrived to the conclusion that its design is contributing to this significantly. Even if nscd's code is 100.00% perfect and bug-free, it can still suffer from bugs in libraries it calls. As designed, it's a multithreaded program which calls NSS libraries. These libraries are not part of libc, they may be provided by third-party projects (samba, ldap, you name it). Thus nscd cannot be sure that libraries it calls do not have memory or file descriptor leaks and other bugs. Since nscd is multithreaded program with single shared cache, any resource leak in any NSS library has cumulative effect. Even if a NSS library leaks a file descriptor 0.01% of the time, this will make nscd crash or hang after some time. Of course bugs in NSS .so modules should be fixed, but meanwhile I do want nscd which does not crash or lock up. So I went ahead and wrote a replacement. It is a single-threaded server process which offloads all NSS lookups to worker children (not threads, but fully independent processes). Cache hits are handled by parent. Only cache misses start worker children. This design is immune against resource leaks and hangs in NSS libraries. It is also many times smaller. Currently (v0.36) it emulates glibc nscd pretty closely (handles same command line flags and config file), and is moderately tested. Please note that as of 2008-08 it is not in wide use (yet?). If you have trouble compiling it, see an incompatibility with "standard" one or experience hangs/crashes, please report it to vda.linux@googlemail.com ***********************************************************************/ /* Make struct ucred appear in sys/socket.h */ #define _GNU_SOURCE 1 /* For all good things */ #include <stdio.h> #include <stddef.h> #include <stdlib.h> #include <stdarg.h> #include <unistd.h> #include <string.h> #include <ctype.h> #include <errno.h> #include <fcntl.h> #include <signal.h> #include <time.h> #include <netdb.h> #include <pwd.h> #include <grp.h> #include <getopt.h> #include <syscall.h> #include <sys/socket.h> #include <sys/time.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/poll.h> #include <sys/un.h> /* For INT_MAX */ #include <limits.h> /* For inet_ntoa (for debug build only) */ #include <arpa/inet.h> /* * 0.21 add SEGV reporting to worker * 0.22 don't do freeaddrinfo() in GETAI worker, it's crashy * 0.23 add parameter parsing * 0.24 add conf file parsing, not using results yet * 0.25 used some of conf file settings (not tested) * 0.26 almost all conf file settings are wired up * 0.27 a bit more of almost all conf file settings are wired up * 0.28 optimized cache aging * 0.29 implemented invalidate and shutdown options * 0.30 fixed buglet (sizeof(ptr) != sizeof(array)) * 0.31 reduced client_info by one member * 0.32 fix nttl/size defaults; simpler check for worker child in main() * 0.33 tweak includes so that it builds on my new machine (64-bit userspace); * do not die on unknown service name, just warn * ("services" is a new service we don't support) * 0.34 create /var/run/nscd/nscd.pid pidfile like glibc nscd 2.8 does; * delay setuid'ing itself to server-user after log and pidfile are open * 0.35 readlink /proc/self/exe and use result if execing /proc/self/exe fails * 0.36 excercise extreme paranoia handling server-user option; * a little bit more verbose logging: * L_DEBUG2 log level added, use debug-level 7 to get it * 0.37 users reported over-zealous "detected change in /etc/passwd", * apparently stat() returns random garbage in unused padding * on some systems. Made the check less paranoid. * 0.38 log POLLHUP better * 0.39 log answers to client better, log getpwnam in the worker, * pass debug level value down to worker. */ #define PROGRAM_VERSION "0.39" #define DEBUG_BUILD 1 /* ** Generic helpers */ #define NORETURN __attribute__ ((__noreturn__)) #ifdef MY_CPU_HATES_CHARS typedef int smallint; #else typedef signed char smallint; #endif enum { L_INFO = (1 << 0), L_DEBUG = ((1 << 1) * DEBUG_BUILD), L_DEBUG2 = ((1 << 2) * DEBUG_BUILD), L_DUMP = ((1 << 3) * DEBUG_BUILD), L_ALL = 0xf, D_DAEMON = (1 << 6), D_STAMP = (1 << 5), }; static smallint debug = D_DAEMON; static void verror(const char *s, va_list p, const char *strerr) { char msgbuf[1024]; int sz, rem, strerr_len; struct timeval tv; sz = 0; if (debug & D_STAMP) { gettimeofday(&tv, NULL); sz = sprintf(msgbuf, "%02u:%02u:%02u.%05u ", (unsigned)((tv.tv_sec / (60*60)) % 24), (unsigned)((tv.tv_sec / 60) % 60), (unsigned)(tv.tv_sec % 60), (unsigned)(tv.tv_usec / 10)); } rem = sizeof(msgbuf) - sz; sz += vsnprintf(msgbuf + sz, rem, s, p); rem = sizeof(msgbuf) - sz; /* can be negative after this! */ if (strerr) { strerr_len = strlen(strerr); if (rem >= strerr_len + 4) { /* ": STRERR\n\0" */ msgbuf[sz++] = ':'; msgbuf[sz++] = ' '; strcpy(msgbuf + sz, strerr); sz += strerr_len; } } if (rem >= 2) { msgbuf[sz++] = '\n'; msgbuf[sz] = '\0'; } fflush(NULL); fputs(msgbuf, stderr); } static void error(const char *msg, ...) { va_list p; va_start(p, msg); verror(msg, p, NULL); va_end(p); } static void error_and_die(const char *msg, ...) NORETURN; static void error_and_die(const char *msg, ...) { va_list p; va_start(p, msg); verror(msg, p, NULL); va_end(p); _exit(1); } static void perror_and_die(const char *msg, ...) NORETURN; static void perror_and_die(const char *msg, ...) { va_list p; va_start(p, msg); /* Guard against "<error message>: Success" */ verror(msg, p, errno ? strerror(errno) : NULL); va_end(p); _exit(1); } static void nscd_log(int mask, const char *msg, ...) { if (debug & mask) { va_list p; va_start(p, msg); verror(msg, p, NULL); va_end(p); } } #define log(lvl, ...) do { if (lvl) nscd_log(lvl, __VA_ARGS__); } while (0) #if DEBUG_BUILD static void dump(const void *ptr, int len) { char text[18]; const unsigned char *buf; char *p; if (!(debug & L_DUMP)) return; buf = ptr; while (len > 0) { int chunk = ((len >= 16) ? 16 : len); fprintf(stderr, "%02x %02x %02x %02x %02x %02x %02x %02x " "%02x %02x %02x %02x %02x %02x %02x %02x " + (16-chunk) * 5, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9],buf[10],buf[11],buf[12],buf[13],buf[14],buf[15] ); fprintf(stderr, "%*s", (16-chunk) * 3, ""); len -= chunk; p = text; do { unsigned char c = *buf++; *p++ = (c >= 32 && c < 127 ? c : '.'); } while (--chunk); *p++ = '\n'; *p = '\0'; fputs(text, stderr); } } #else void dump(const void *ptr, int len); #endif #define hex_dump(p,n) do { if (L_DUMP) dump(p,n); } while (0) static int xopen3(const char *pathname, int flags, int mode) { int fd = open(pathname, flags, mode); if (fd < 0) perror_and_die("open"); return fd; } static void xpipe(int *fds) { if (pipe(fds) < 0) perror_and_die("pipe"); } static void xexecve(const char *filename, char **argv, char **envp) NORETURN; static void xexecve(const char *filename, char **argv, char **envp) { execve(filename, argv, envp); perror_and_die("cannot re-exec %s", filename); } static void ndelay_on(int fd) { int fl = fcntl(fd, F_GETFL); if (fl < 0) perror_and_die("F_GETFL"); if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) < 0) perror_and_die("setting O_NONBLOCK"); } static void close_on_exec(int fd) { if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) perror_and_die("setting FD_CLOEXEC"); } static unsigned monotonic_ms(void) { struct timespec ts; if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts)) perror_and_die("clock_gettime(MONOTONIC)"); return ts.tv_sec * 1000 + ts.tv_nsec / 1000000; } static unsigned strsize(const char *str) { return strlen(str) + 1; } static unsigned strsize_aligned4(const char *str) { return (strlen(str) + 1 + 3) & (~3); } static ssize_t safe_read(int fd, void *buf, size_t count) { ssize_t n; do { n = read(fd, buf, count); } while (n < 0 && errno == EINTR); return n; } static ssize_t full_read(int fd, void *buf, size_t len) { ssize_t cc; ssize_t total; total = 0; while (len) { cc = safe_read(fd, buf, len); if (cc < 0) return cc; /* read() returns -1 on failure. */ if (cc == 0) break; buf = ((char *)buf) + cc; total += cc; len -= cc; } return total; } /* unused static void xsafe_read(int fd, void *buf, size_t len) { if (len != safe_read(fd, buf, len)) perror_and_die("short read"); } static void xfull_read(int fd, void *buf, size_t len) { if (len != full_read(fd, buf, len)) perror_and_die("short read"); } */ static ssize_t safe_write(int fd, const void *buf, size_t count) { ssize_t n; do { n = write(fd, buf, count); } while (n < 0 && errno == EINTR); return n; } static ssize_t full_write(int fd, const void *buf, size_t len) { ssize_t cc; ssize_t total; total = 0; while (len) { cc = safe_write(fd, buf, len); if (cc < 0) return cc; /* write() returns -1 on failure. */ total += cc; buf = ((const char *)buf) + cc; len -= cc; } return total; } static void xsafe_write(int fd, const void *buf, size_t count) { if (count != safe_write(fd, buf, count)) perror_and_die("short write of %ld bytes", (long)count); } static void xfull_write(int fd, const void *buf, size_t count) { if (count != full_write(fd, buf, count)) perror_and_die("short write of %ld bytes", (long)count); } static void xmovefd(int from_fd, int to_fd) { if (from_fd != to_fd) { if (dup2(from_fd, to_fd) < 0) perror_and_die("dup2"); close(from_fd); } } static unsigned getnum(const char *str) { if (str[0] >= '0' && str[0] <= '9') { char *p; unsigned long l = strtoul(str, &p, 10); /* must not overflow int even after x1000 */ if (!*p && l <= INT_MAX / 1000) return l; } error_and_die("malformed or too big number '%s'", str); }; static char *skip_whitespace(const char *s) { /* NB: isspace('\0') returns 0 */ while (isspace(*s)) ++s; return (char *) s; } static char *skip_non_whitespace(const char *s) { while (*s && !isspace(*s)) ++s; return (char *) s; } static void *xmalloc(unsigned sz) { void *p = malloc(sz); if (!p) error_and_die("out of memory"); return p; } static void *xzalloc(unsigned sz) { void *p = xmalloc(sz); memset(p, 0, sz); return p; } static void *xrealloc(void *p, unsigned size) { p = realloc(p, size); if (!p) error_and_die("out of memory"); return p; } static const char *xstrdup(const char *str) { const char *p = strdup(str); if (!p) error_and_die("out of memory"); return p; } /* ** Config data */ enum { SRV_PASSWD, SRV_GROUP, SRV_HOSTS, }; static struct { const char *logfile; const char *user; smallint srv_enable[3]; smallint check_files[3]; unsigned pttl[3]; unsigned nttl[3]; unsigned size[3]; } config = { /* We try to closely mimic glibc nscd */ .logfile = NULL, /* default is to not have a log file */ .user = NULL, .srv_enable = { 0, 0, 0 }, .check_files = { 1, 1, 1 }, .pttl = { 3600, 3600, 3600 }, .nttl = { 20, 60, 20 }, /* huh, what is the default cache size in glibc nscd? */ .size = { 256 * 8 / 3, 256 * 8 / 3, 256 * 8 / 3 }, }; static const char default_conffile[] = "/etc/nscd.conf"; static const char *self_exe_points_to = "/proc/self/exe"; /* ** Clients, workers machinery */ /* Header common to all requests */ #define USER_REQ_STRUCT \ int32_t version; /* Version number of the daemon interface */ \ int32_t type; /* Service requested */ \ int32_t key_len; /* Key length */ typedef struct user_req_header { USER_REQ_STRUCT } user_req_header; enum { NSCD_VERSION = 2, MAX_USER_REQ_SIZE = 1024, USER_HDR_SIZE = sizeof(user_req_header), /* DNS queries time out after 20 seconds, * we will allow for a bit more */ WORKER_TIMEOUT_SEC = 30, CLIENT_TIMEOUT_MS = 100, SMALL_POLL_TIMEOUT_MS = 200, }; typedef struct user_req { union { struct { /* as came from client */ USER_REQ_STRUCT }; struct { /* when stored in cache, overlaps .version */ unsigned refcount:8; /* actually, can be 1 or 0 only */ /* (timestamp24 * 256) == timestamp in ms */ unsigned timestamp24:24; }; }; char reqbuf[MAX_USER_REQ_SIZE - USER_HDR_SIZE]; } user_req; /* Compile-time check for correct size */ struct BUG_wrong_user_req_size { char BUG_wrong_user_req_size[sizeof(user_req) == MAX_USER_REQ_SIZE ? 1 : -1]; }; enum { GETPWBYNAME, GETPWBYUID, GETGRBYNAME, GETGRBYGID, GETHOSTBYNAME, GETHOSTBYNAMEv6, GETHOSTBYADDR, GETHOSTBYADDRv6, SHUTDOWN, /* Shut the server down */ GETSTAT, /* Get the server statistic */ INVALIDATE, /* Invalidate one special cache */ GETFDPW, GETFDGR, GETFDHST, GETAI, INITGROUPS, GETSERVBYNAME, GETSERVBYPORT, GETFDSERV, LASTREQ }; #if DEBUG_BUILD static const char *const typestr[] = { "GETPWBYNAME", /* done */ "GETPWBYUID", /* done */ "GETGRBYNAME", /* done */ "GETGRBYGID", /* done */ "GETHOSTBYNAME", /* done */ "GETHOSTBYNAMEv6", /* done */ "GETHOSTBYADDR", /* done */ "GETHOSTBYADDRv6", /* done */ "SHUTDOWN", /* done */ "GETSTAT", /* info? */ "INVALIDATE", /* done */ /* won't do: nscd passes a name of shmem segment * which client can map and "see" the db */ "GETFDPW", "GETFDGR", /* won't do */ "GETFDHST", /* won't do */ "GETAI", /* done */ "INITGROUPS", /* done */ "GETSERVBYNAME", /* prio 3 (no caching?) */ "GETSERVBYPORT", /* prio 3 (no caching?) */ "GETFDSERV" /* won't do */ }; #else extern const char *const typestr[]; #endif static const smallint type_to_srv[] = { [GETPWBYNAME ] = SRV_PASSWD, [GETPWBYUID ] = SRV_PASSWD, [GETGRBYNAME ] = SRV_GROUP, [GETGRBYGID ] = SRV_GROUP, [GETHOSTBYNAME ] = SRV_HOSTS, [GETHOSTBYNAMEv6 ] = SRV_HOSTS, [GETHOSTBYADDR ] = SRV_HOSTS, [GETHOSTBYADDRv6 ] = SRV_HOSTS, [GETAI ] = SRV_HOSTS, [INITGROUPS ] = SRV_GROUP, }; static int unsupported_ureq_type(unsigned type) { if (type == GETAI) return 0; if (type == INITGROUPS) return 0; if (type > GETHOSTBYADDRv6) return 1; return 0; } /* Possible reductions: * fd, bufidx - uint8_t * started_ms -> uint16_t started_s * ureq - eliminate (derivable from bufidx?) * cell - eliminate (derivable from resptr?) */ typedef struct client_info { /* if client_fd != 0, we are waiting for the reply from worker * on pfd[i].fd, and client_fd is saved client's fd * (we need to put it back into pfd[i].fd later) */ int client_fd; unsigned bytecnt; /* bytes read from client */ unsigned bufidx; /* buffer# in global client_buf[] */ unsigned started_ms; unsigned respos; /* response */ //unsigned resp_sz; user_req *resptr; /* response */ user_req *ureq; /* request (points to client_buf[x]) */ user_req **cell; /* cache cell ptr */ } client_info; static int min_closed = INT_MAX; static int cnt_closed = 0; static int num_clients = 2; /* two listening sockets are "clients" too */ /* We read up to max_reqnum requests in parallel */ static unsigned max_reqnum = 14; static int next_buf; /* Each of these points to [max_reqnum] sized array */ static char (*client_buf)[MAX_USER_REQ_SIZE]; static char *busy_cbuf; static struct pollfd *pfd; static client_info *cinfo; static inline unsigned ureq_size(const user_req *ureq) { return sizeof(user_req_header) + ureq->key_len; } static unsigned cache_age(unsigned now_ms, const user_req *ureq) { return (uint32_t)now_ms - (ureq->timestamp24 << 8); } static void set_cache_timestamp(user_req *ureq, unsigned now_ms) { ureq->timestamp24 = now_ms >> 8; } static int alloc_buf_no(void) { int n = next_buf; do { int cur = next_buf; next_buf = (next_buf + 1) % max_reqnum; if (!busy_cbuf[cur]) { busy_cbuf[cur] = 1; return cur; } } while (next_buf != n); error_and_die("no free bufs?!"); } static inline void *bufno2buf(int i) { return client_buf[i]; } static void close_client(int i) { log(L_DEBUG, "closing client %d (fd %d)", i, pfd[i].fd); close(pfd[i].fd); pfd[i].fd = 0; /* flag as unused */ busy_cbuf[cinfo[i].bufidx] = 0; cnt_closed++; if (i < min_closed) min_closed = i; } /* ** ncsd API <-> C API conversion */ typedef struct response_header { uint32_t version_or_size; int32_t found; char body[0]; } response_header; typedef struct initgr_response_header { uint32_t version_or_size; int32_t found; int32_t ngrps; /* code assumes gid_t == int32, let's check that */ int32_t gid[sizeof(gid_t) == sizeof(int32_t) ? 0 : -1]; /* char user_str[as_needed]; */ } initgr_response_header; static initgr_response_header *obtain_initgroups(const char *username) { struct initgr_response_header *resp; struct passwd *pw; enum { MAGIC_OFFSET = sizeof(*resp) / sizeof(int32_t) }; unsigned sz; int ngroups; pw = getpwnam(username); if (!pw) { resp = xzalloc(8); resp->version_or_size = sizeof(*resp); /*resp->found = 0;*/ /*resp->ngrps = 0;*/ goto ret; } /* getgrouplist may be very expensive, it's much better to allocate * a bit more than to run getgrouplist twice */ ngroups = 128; resp = NULL; do { sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups; resp = xrealloc(resp, sz); } while (getgrouplist(username, pw->pw_gid, (gid_t*) &resp->gid, &ngroups) == -1); log(L_DEBUG, "ngroups=%d", ngroups); sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups; /* resp = xrealloc(resp, sz); - why bother */ resp->version_or_size = sz; resp->found = 1; resp->ngrps = ngroups; ret: return resp; } typedef struct pw_response_header { uint32_t version_or_size; int32_t found; int32_t pw_name_len; int32_t pw_passwd_len; int32_t pw_uid; int32_t pw_gid; int32_t pw_gecos_len; int32_t pw_dir_len; int32_t pw_shell_len; /* char pw_name[pw_name_len]; */ /* char pw_passwd[pw_passwd_len]; */ /* char pw_gecos[pw_gecos_len]; */ /* char pw_dir[pw_dir_len]; */ /* char pw_shell[pw_shell_len]; */ } pw_response_header; static pw_response_header *marshal_passwd(struct passwd *pw) { char *p; pw_response_header *resp; unsigned pw_name_len; unsigned pw_passwd_len; unsigned pw_gecos_len; unsigned pw_dir_len; unsigned pw_shell_len; unsigned sz = sizeof(*resp); if (pw) { sz += (pw_name_len = strsize(pw->pw_name)); sz += (pw_passwd_len = strsize(pw->pw_passwd)); sz += (pw_gecos_len = strsize(pw->pw_gecos)); sz += (pw_dir_len = strsize(pw->pw_dir)); sz += (pw_shell_len = strsize(pw->pw_shell)); } resp = xzalloc(sz); resp->version_or_size = sz; if (!pw) { /*resp->found = 0;*/ goto ret; } resp->found = 1; resp->pw_name_len = pw_name_len; resp->pw_passwd_len = pw_passwd_len; resp->pw_uid = pw->pw_uid; resp->pw_gid = pw->pw_gid; resp->pw_gecos_len = pw_gecos_len; resp->pw_dir_len = pw_dir_len; resp->pw_shell_len = pw_shell_len; p = (char*)(resp + 1); strcpy(p, pw->pw_name); p += pw_name_len; strcpy(p, pw->pw_passwd); p += pw_passwd_len; strcpy(p, pw->pw_gecos); p += pw_gecos_len; strcpy(p, pw->pw_dir); p += pw_dir_len; strcpy(p, pw->pw_shell); p += pw_shell_len; log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp); ret: return resp; } typedef struct gr_response_header { uint32_t version_or_size; int32_t found; int32_t gr_name_len; /* strlen(gr->gr_name) + 1; */ int32_t gr_passwd_len; /* strlen(gr->gr_passwd) + 1; */ int32_t gr_gid; /* gr->gr_gid */ int32_t gr_mem_cnt; /* while (gr->gr_mem[gr_mem_cnt]) ++gr_mem_cnt; */ /* int32_t gr_mem_len[gr_mem_cnt]; */ /* char gr_name[gr_name_len]; */ /* char gr_passwd[gr_passwd_len]; */ /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */ /* char gr_gid_str[as_needed]; - huh? */ /* char orig_key[as_needed]; - needed?? I don't do this ATM... */ /* glibc adds gr_gid_str, but client doesn't get/use it: writev(3, [{"\2\0\0\0\2\0\0\0\5\0\0\0", 12}, {"root\0", 5}], 2) = 17 poll([{fd=3, events=POLLIN|POLLERR|POLLHUP, revents=POLLIN}], 1, 5000) = 1 read(3, "\2\0\0\0\1\0\0\0\10\0\0\0\4\0\0\0\0\0\0\0\0\0\0\0", 24) = 24 readv(3, [{"", 0}, {"root\0\0\0\0\0\0\0\0", 12}], 2) = 12 read(3, NULL, 0) = 0 */ } gr_response_header; static gr_response_header *marshal_group(struct group *gr) { char *p; gr_response_header *resp; unsigned gr_mem_cnt; unsigned sz = sizeof(*resp); if (gr) { sz += strsize(gr->gr_name); sz += strsize(gr->gr_passwd); gr_mem_cnt = 0; while (gr->gr_mem[gr_mem_cnt]) { sz += strsize(gr->gr_mem[gr_mem_cnt]); gr_mem_cnt++; } /* for int32_t gr_mem_len[gr_mem_cnt]; */ sz += gr_mem_cnt * sizeof(int32_t); } resp = xzalloc(sz); resp->version_or_size = sz; if (!gr) { /*resp->found = 0;*/ goto ret; } resp->found = 1; resp->gr_name_len = strsize(gr->gr_name); resp->gr_passwd_len = strsize(gr->gr_passwd); resp->gr_gid = gr->gr_gid; resp->gr_mem_cnt = gr_mem_cnt; p = (char*)(resp + 1); /* int32_t gr_mem_len[gr_mem_cnt]; */ gr_mem_cnt = 0; while (gr->gr_mem[gr_mem_cnt]) { *(uint32_t*)p = strsize(gr->gr_mem[gr_mem_cnt]); p += 4; gr_mem_cnt++; } /* char gr_name[gr_name_len]; */ strcpy(p, gr->gr_name); p += strsize(gr->gr_name); /* char gr_passwd[gr_passwd_len]; */ strcpy(p, gr->gr_passwd); p += strsize(gr->gr_passwd); /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */ gr_mem_cnt = 0; while (gr->gr_mem[gr_mem_cnt]) { strcpy(p, gr->gr_mem[gr_mem_cnt]); p += strsize(gr->gr_mem[gr_mem_cnt]); gr_mem_cnt++; } log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp); ret: return resp; } typedef struct hst_response_header { uint32_t version_or_size; int32_t found; int32_t h_name_len; int32_t h_aliases_cnt; int32_t h_addrtype; /* AF_INET or AF_INET6 */ int32_t h_length; /* 4 or 16 */ int32_t h_addr_list_cnt; int32_t error; /* char h_name[h_name_len]; - we pad it to 4 bytes */ /* uint32_t h_aliases_len[h_aliases_cnt]; */ /* char h_addr_list[h_addr_list_cnt][h_length]; - every one is the same size [h_length] (4 or 16) */ /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */ } hst_response_header; static hst_response_header *marshal_hostent(struct hostent *h) { char *p; hst_response_header *resp; unsigned h_name_len; unsigned h_aliases_cnt; unsigned h_addr_list_cnt; unsigned sz = sizeof(*resp); if (h) { /* char h_name[h_name_len] */ sz += h_name_len = strsize_aligned4(h->h_name); h_addr_list_cnt = 0; while (h->h_addr_list[h_addr_list_cnt]) { h_addr_list_cnt++; } /* char h_addr_list[h_addr_list_cnt][h_length] */ sz += h_addr_list_cnt * h->h_length; h_aliases_cnt = 0; while (h->h_aliases[h_aliases_cnt]) { /* char h_aliases[h_aliases_cnt][h_aliases_len[i]] */ sz += strsize(h->h_aliases[h_aliases_cnt]); h_aliases_cnt++; } /* uint32_t h_aliases_len[h_aliases_cnt] */ sz += h_aliases_cnt * 4; } resp = xzalloc(sz); resp->version_or_size = sz; if (!h) { /*resp->found = 0;*/ resp->error = HOST_NOT_FOUND; goto ret; } resp->found = 1; resp->h_name_len = h_name_len; resp->h_aliases_cnt = h_aliases_cnt; resp->h_addrtype = h->h_addrtype; resp->h_length = h->h_length; resp->h_addr_list_cnt = h_addr_list_cnt; /*resp->error = 0;*/ p = (char*)(resp + 1); /* char h_name[h_name_len]; */ strcpy(p, h->h_name); p += h_name_len; /* uint32_t h_aliases_len[h_aliases_cnt]; */ h_aliases_cnt = 0; while (h->h_aliases[h_aliases_cnt]) { *(uint32_t*)p = strsize(h->h_aliases[h_aliases_cnt]); p += 4; h_aliases_cnt++; } /* char h_addr_list[h_addr_list_cnt][h_length]; */ h_addr_list_cnt = 0; while (h->h_addr_list[h_addr_list_cnt]) { memcpy(p, h->h_addr_list[h_addr_list_cnt], h->h_length); p += h->h_length; h_addr_list_cnt++; } /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */ h_aliases_cnt = 0; while (h->h_aliases[h_aliases_cnt]) { strcpy(p, h->h_aliases[h_aliases_cnt]); p += strsize(h->h_aliases[h_aliases_cnt]); h_aliases_cnt++; } log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp); ret: return resp; } /* Reply to addrinfo query */ typedef struct ai_response_header { uint32_t version_or_size; int32_t found; int32_t naddrs; int32_t addrslen; int32_t canonlen; int32_t error; /* char ai_addr[naddrs][4 or 16]; - addrslen bytes in total */ /* char ai_family[naddrs]; - AF_INET[6] each (determines ai_addr[i] length) */ /* char ai_canonname[canonlen]; */ } ai_response_header; static ai_response_header *obtain_addrinfo(const char *hostname) { struct addrinfo hints; struct addrinfo *ai; struct addrinfo *ap; ai_response_header *resp; char *p, *family; int err; unsigned sz; unsigned naddrs = 0; unsigned addrslen = 0; unsigned canonlen = 0; memset(&hints, 0, sizeof(hints)); hints.ai_flags = AI_CANONNAME; /* hinst.ai_socktype = SOCK_STREAM; - can kill dups (one for each possible SOCK_xxx) */ ai = NULL; /* on failure getaddrinfo may leave it as-is */ err = getaddrinfo(hostname, NULL, &hints, &ai); sz = sizeof(*resp); if (!err) { if (ai->ai_canonname) sz += canonlen = strsize(ai->ai_canonname); ap = ai; do { naddrs++; addrslen += (ap->ai_family == AF_INET ? 4 : 16); ap = ap->ai_next; } while (ap); sz += naddrs + addrslen; } resp = xzalloc(sz); resp->version_or_size = sz; resp->error = err; if (err) { /*resp->found = 0;*/ goto ret; } resp->found = 1; resp->naddrs = naddrs; resp->addrslen = addrslen; resp->canonlen = canonlen; p = (char*)(resp + 1); family = p + addrslen; ap = ai; do { /* char ai_family[naddrs]; */ *family++ = ap->ai_family; /* char ai_addr[naddrs][4 or 16]; */ if (ap->ai_family == AF_INET) { memcpy(p, &(((struct sockaddr_in*)(ap->ai_addr))->sin_addr), 4); p += 4; } else { memcpy(p, &(((struct sockaddr_in6*)(ap->ai_addr))->sin6_addr), 16); p += 16; } ap = ap->ai_next; } while (ap); /* char ai_canonname[canonlen]; */ if (ai->ai_canonname) strcpy(family, ai->ai_canonname); log(L_DEBUG, "sz:%u realsz:%u", sz, family + strsize(ai->ai_canonname) - (char*)resp); ret: /* glibc 2.3.6 segfaults here sometimes * (maybe my mistake, fixed by "ai = NULL;" above). * Since we are in worker and are going to exit anyway, why bother? */ /*freeaddrinfo(ai);*/ return resp; } /* ** Cache management */ /* one 8-element "cacheline" */ typedef user_req *cacheline_t[8]; static unsigned cache_size; /* Points to cacheline_t cache[cache_size] array, or in other words, * points to user_req* cache[cache_size][8] array */ static cacheline_t *cache; static unsigned cached_cnt; static unsigned cache_access_cnt = 1; /* prevent division by zero */ static unsigned cache_hit_cnt = 1; static unsigned last_age_time; static unsigned aging_interval_ms; static unsigned min_aging_interval_ms; static response_header *ureq_response(user_req *ureq) { /* Skip query part, find answer part * (answer is 32-bit aligned) */ return (void*) ((char*)ureq + ((ureq_size(ureq) + 3) & ~3)); } /* This hash is supposed to be good for short textual data */ static uint32_t bernstein_hash(void *p, unsigned sz, uint32_t hash) { uint8_t *key = p; do { hash = (32 * hash + hash) ^ *key++; } while (--sz); return hash; } static user_req *find_cell_and_response(user_req ***cellp, user_req *ureq) { user_req **cell; unsigned hash; unsigned i; unsigned ureq_sz = ureq_size(ureq); /* prevent overflow and division by zero */ if ((int)(cache_access_cnt+1) < 0) { cache_access_cnt = (cache_access_cnt >> 1) + 1; cache_hit_cnt = (cache_hit_cnt >> 1) + 1; } cache_access_cnt++; hash = bernstein_hash(&ureq->key_len, ureq_sz - offsetof(user_req, key_len), ureq->type); log(L_DEBUG, "hash:%08x", hash); hash = hash % cache_size; (*cellp) = cell = cache[hash]; for (i = 0; i < 8; i++) { if (!cell[i]) continue; // TODO: do secondary hash match /* ureq->version is always 2 and is reused in cache * for other purposes, we need to skip it here */ if (memcmp(&ureq->type, &cell[i]->type, ureq_sz - offsetof(user_req, type)) == 0) { log(L_DEBUG, "found in cache[%u][%u]", hash, i); cache_hit_cnt++; return cell[i]; } } log(L_DEBUG, "not found in cache[%u][x]", hash); return NULL; } static void free_refcounted_ureq(user_req **ureqp) { user_req *ureq = *ureqp; /* is it in use? */ if (ureq->refcount) { ureq->refcount = 0; /* since it can be only 1 or 0... */ } else { free(ureq); } *ureqp = NULL; } static void save_in_cell(user_req **cell, user_req *new_cached, unsigned now_ms) { unsigned oldest_idx = 0; unsigned oldest_age = 0; unsigned age; unsigned i; for (i = 0; i < 8; i++) { if (!cell[i]) { log(L_DEBUG, "using free cache[x][%u]", i); cached_cnt++; cell[i] = new_cached; aging_interval_ms = min_aging_interval_ms; return; } age = cache_age(now_ms, cell[i]); if (age > oldest_age) { oldest_age = age; oldest_idx = i; } } log(L_DEBUG, "freeing and reusing cache[x][%u] (age %u)", oldest_idx, oldest_age); if (cell[oldest_idx]) { free_refcounted_ureq(&cell[oldest_idx]); } else { cached_cnt++; } cell[oldest_idx] = new_cached; aging_interval_ms = min_aging_interval_ms; } static void age_cache(unsigned now_ms, int srv) { user_req **cp = *cache; int i; unsigned sv = cached_cnt; log(L_DEBUG, "aging cache, srv:%d, now:%u", srv, now_ms); if (srv == -1 || !now_ms) aging_interval_ms = INT_MAX; i = cache_size * 8; do { user_req *cached = *cp; if (cached) { int csrv = type_to_srv[cached->type]; if (srv == -1 || srv == csrv) { if (!now_ms) { cached_cnt--; free_refcounted_ureq(cp); } else { unsigned age = cache_age(now_ms, cached); response_header *resp = ureq_response(cached); unsigned ttl = (resp->found ? config.pttl : config.nttl)[csrv]; if (age >= ttl) { log(L_DEBUG, "freeing: age %u positive %d ttl %u", age, resp->found, ttl); cached_cnt--; free_refcounted_ureq(cp); } else if (srv == -1) { ttl -= age; if (aging_interval_ms > ttl) aging_interval_ms = ttl; } } } } cp++; } while (--i); log(L_INFO, "aged cache, freed:%u, remain:%u", sv - cached_cnt, cached_cnt); if (srv == -1 || !now_ms) log(L_DEBUG, "aging interval now %u ms", aging_interval_ms); } /* ** Worker child */ /* Spawns a worker and feeds it with user query on stdin */ /* Returns stdout fd of the worker, in blocking mode */ static int create_and_feed_worker(user_req *ureq) { pid_t pid; struct { int rd; int wr; } to_child, to_parent; /* NB: these pipe fds are in blocking mode and non-CLOEXECed */ xpipe(&to_child.rd); xpipe(&to_parent.rd); pid = vfork(); if (pid < 0) /* error */ perror_and_die("vfork"); if (!pid) { /* child */ char param[16]; char *argv[3]; close(to_child.wr); close(to_parent.rd); xmovefd(to_child.rd, 0); xmovefd(to_parent.wr, 1); sprintf(param, "%u", debug); argv[0] = (char*) "worker_nscd"; argv[1] = param; argv[2] = NULL; /* Re-exec ourself, cleaning up all allocated memory. * fds in parent are marked CLOEXEC and will be closed too * (modulo bugs) */ execve("/proc/self/exe", argv, argv+2); xexecve(self_exe_points_to, argv, argv+2); } /* parent */ close(to_child.rd); close(to_parent.wr); /* We do not expect child to block for any noticeably long time, * and also we expect write to be one-piece one: * ureq size is <= 1k and pipes are guaranteed to accept * at least PIPE_BUF at once */ xsafe_write(to_child.wr, ureq, ureq_size(ureq)); close(to_child.wr); return to_parent.rd; } static user_req *worker_ureq; #if DEBUG_BUILD static const char *req_str(unsigned type, const char *buf) { if (type == GETHOSTBYADDR) { struct in_addr in; in.s_addr = *((uint32_t*)buf); return inet_ntoa(in); } if (type == GETHOSTBYADDRv6) { return "IPv6"; } return buf; } #else const char *req_str(unsigned type, const char *buf); #endif static void worker_signal_handler(int sig) { #if DEBUG_BUILD log(L_INFO, "worker:%d got sig:%d while handling req " "type:%d(%s) key_len:%d '%s'", getpid(), sig, worker_ureq->type, typestr[worker_ureq->type], worker_ureq->key_len, req_str(worker_ureq->type, worker_ureq->reqbuf) ); #else log(L_INFO, "worker:%d got sig:%d while handling req " "type:%d key_len:%d", getpid(), sig, worker_ureq->type, worker_ureq->key_len); #endif _exit(0); } static void worker(const char *param) NORETURN; static void worker(const char *param) { user_req ureq; void *resp; debug = atoi(param); worker_ureq = &ureq; /* for signal handler */ /* Make sure we won't hang, but rather die */ if (WORKER_TIMEOUT_SEC) alarm(WORKER_TIMEOUT_SEC); /* NB: fds 0, 1 are in blocking mode */ /* We block here (for a short time) */ /* Due to ureq size < PIPE_BUF read is atomic */ /* No error or size checking: we trust the parent */ safe_read(0, &ureq, sizeof(ureq)); signal(SIGSEGV, worker_signal_handler); signal(SIGBUS, worker_signal_handler); signal(SIGILL, worker_signal_handler); signal(SIGFPE, worker_signal_handler); signal(SIGABRT, worker_signal_handler); signal(SIGSTKFLT, worker_signal_handler); if (ureq.type == GETHOSTBYNAME || ureq.type == GETHOSTBYNAMEv6 ) { resp = marshal_hostent( ureq.type == GETHOSTBYNAME ? gethostbyname(ureq.reqbuf) : gethostbyname2(ureq.reqbuf, AF_INET6) ); } else if (ureq.type == GETHOSTBYADDR || ureq.type == GETHOSTBYADDRv6 ) { resp = marshal_hostent(gethostbyaddr(ureq.reqbuf, ureq.key_len, (ureq.type == GETHOSTBYADDR ? AF_INET : AF_INET6) )); } else if (ureq.type == GETPWBYNAME) { struct passwd *pw; log(L_DEBUG2, "getpwnam('%s')", ureq.reqbuf); pw = getpwnam(ureq.reqbuf); log(L_DEBUG2, "getpwnam result:%p", pw); resp = marshal_passwd(pw); } else if (ureq.type == GETPWBYUID) { resp = marshal_passwd(getpwuid(atoi(ureq.reqbuf))); } else if (ureq.type == GETGRBYNAME) { struct group *gr = getgrnam(ureq.reqbuf); resp = marshal_group(gr); } else if (ureq.type == GETGRBYGID) { struct group *gr = getgrgid(atoi(ureq.reqbuf)); resp = marshal_group(gr); } else if (ureq.type == GETAI) { resp = obtain_addrinfo(ureq.reqbuf); } else /*if (ureq.type == INITGROUPS)*/ { resp = obtain_initgroups(ureq.reqbuf); } if (!((response_header*)resp)->found) { /* Parent knows about this special case */ xfull_write(1, resp, 8); } else { /* Responses can be big (getgrnam("guest") on a big user db), * we cannot rely on them being atomic. full_write loops * if needed */ xfull_write(1, resp, ((response_header*)resp)->version_or_size); } _exit(0); } /* ** Main loop */ static const char checked_filenames[][sizeof("/etc/passwd")] = { [SRV_PASSWD] = "/etc/passwd", /* "/etc/shadow"? */ [SRV_GROUP] = "/etc/group", [SRV_HOSTS] = "/etc/hosts", /* "/etc/resolv.conf" "/etc/nsswitch.conf"? */ }; static long checked_status[sizeof(checked_filenames) / sizeof(checked_filenames[0])]; static void check_files(int srv) { struct stat tsb; const char *file = checked_filenames[srv]; long v; memset(&tsb, 0, sizeof(tsb)); stat(file, &tsb); /* ignore errors */ /* Comparing struct stat's was giving false positives. * Extracting only those fields which are interesting: */ v = (long)tsb.st_mtime ^ (long)tsb.st_size ^ (long)tsb.st_ino; /* ^ (long)tsb.st_dev ? */ if (v != checked_status[srv]) { checked_status[srv] = v; log(L_INFO, "detected change in %s", file); age_cache(0, srv); /* frees entire cache */ } } /* Returns 1 if we immediately have the answer */ static int handle_client(int i) { int srv; user_req *ureq = cinfo[i].ureq; user_req **cell; user_req *ureq_and_resp; log(L_DEBUG, "version:%d type:%d(%s) key_len:%d '%s'", ureq->version, ureq->type, typestr[ureq->type], ureq->key_len, req_str(ureq->type, ureq->reqbuf)); hex_dump(cinfo[i].ureq, cinfo[i].bytecnt); if (unsupported_ureq_type(ureq->type)) { /* We don't know this request. Just close the connection */ /* (glibc client interprets this like "not supported by this nscd") */ log(L_INFO, "unsupported query, dropping"); close_client(i); return 0; } srv = type_to_srv[ureq->type]; if (!config.srv_enable[srv]) { log(L_INFO, "service %d is disabled, dropping", srv); close_client(i); return 0; } if (cinfo[i].bytecnt < USER_HDR_SIZE + ureq->key_len) { log(L_INFO, "read %d, need %d more to read", cinfo[i].bytecnt, USER_HDR_SIZE + ureq->key_len); return 0; /* more to read */ } if (cinfo[i].bytecnt > USER_HDR_SIZE + ureq->key_len) { log(L_INFO, "read overflow"); close_client(i); return 0; } if (ureq->version != NSCD_VERSION) { log(L_INFO, "wrong version"); close_client(i); return 0; } if (ureq->type != GETHOSTBYADDR && ureq->type != GETHOSTBYADDRv6 ) { if (ureq->key_len && ureq->reqbuf[ureq->key_len - 1] != '\0') { log(L_INFO, "badly terminated buffer"); close_client(i); return 0; } } if (config.check_files[srv]) { check_files(srv); } /* If in cache, save ptr to response into cinfo and return */ ureq_and_resp = find_cell_and_response(&cell, ureq); if (ureq_and_resp) { response_header *resp = ureq_response(ureq_and_resp); unsigned sz = resp->version_or_size; log(L_DEBUG, "sz:%u", sz); hex_dump(resp, sz); ureq_and_resp->refcount = 1; /* cache shouldn't free it under us! */ pfd[i].events = POLLOUT; /* we want to write out */ cinfo[i].resptr = ureq_and_resp; cinfo[i].respos = 0; //cinfo[i].resp_sz = sz; return 1; } /* Start worker thread */ cinfo[i].cell = cell; /* Now we will wait on worker's fd, not client's! */ cinfo[i].client_fd = pfd[i].fd; pfd[i].fd = create_and_feed_worker(ureq); /* We can do it here, but we don't really need to. * We need to have client_buf[] big enough anyway for worst case scenario, * so we can simply keep cbuf allocated until we close a client. cinfo[i].ureq = NULL; busy_cbuf[cinfo[i].bufidx] = 0; */ return 0; } /* When we return, reply is fully read and stored in cache, * worker's fd is closed, pfd[i] and cinfo[i] are updated. */ static void handle_worker_response(int i, unsigned now_ms) { response_header sz_and_found; user_req *cached; user_req *ureq = cinfo[i].ureq; response_header *resp; unsigned resp_sz; unsigned ureq_sz_aligned = (char*)ureq_response(ureq) - (char*)ureq; //TODO: optimization: //try to read more (say 1k). Many responses will fit in the very first read resp_sz = full_read(pfd[i].fd, &sz_and_found, 8); if (resp_sz != 8) { /* worker was killed? */ log(L_DEBUG, "worker gave short reply:%u != 8", resp_sz); goto err; } resp_sz = sz_and_found.version_or_size; if (resp_sz < 8 || resp_sz > 0xfffffff) { /* 256 mb */ error("BUG: bad size from worker:%u", resp_sz); goto err; } /* Create new block of cached info */ cached = xzalloc(ureq_sz_aligned + resp_sz); resp = (void*) ((char*)cached + ureq_sz_aligned); memcpy(cached, ureq, ureq_size(ureq)); resp->version_or_size = resp_sz; resp->found = sz_and_found.found; if (sz_and_found.found) { /* We need to read data only if it's found * (otherwise worker sends only 8 bytes). * Replies can be big (getgrnam("guest") on a big user db), * we cannot rely on them being atomic. However, we know that worker * _always_ gives reply in one full_write(), so loop and read it all * (looping is implemented inside full_read()) */ if (full_read(pfd[i].fd, resp->body, resp_sz - 8) != resp_sz - 8) { /* worker was killed? */ log(L_DEBUG, "worker gave short reply"); free(cached); err: cached = NULL; goto wo; } } hex_dump(resp, resp_sz); /* save in cache */ cached->refcount = 1; /* cache shouldn't free it under us! */ set_cache_timestamp(cached, now_ms); save_in_cell(cinfo[i].cell, cached, now_ms); wo: close(pfd[i].fd); /* schedule for writeout */ pfd[i].fd = cinfo[i].client_fd; cinfo[i].client_fd = 0; /* no, we don't wait for worker reply anymore */ pfd[i].events = POLLOUT; /* pfd[i].revents = 0; - not needed? */ /* writeout position etc */ cinfo[i].resptr = cached; cinfo[i].respos = 0; //cinfo[i].resp_sz = resp_sz; /* if worker took some time to get info (e.g. DNS query), * prevent client timeout from triggering at once */ cinfo[i].started_ms = now_ms; } static void main_loop(void) { /* 1/2 of smallest negative TTL */ min_aging_interval_ms = config.nttl[0]; if (min_aging_interval_ms > config.nttl[1]) min_aging_interval_ms = config.nttl[1]; if (min_aging_interval_ms > config.nttl[2]) min_aging_interval_ms = config.nttl[2]; min_aging_interval_ms = (min_aging_interval_ms / 2) | 1; aging_interval_ms = min_aging_interval_ms; while (1) { int i, j; int r; unsigned now_ms; r = SMALL_POLL_TIMEOUT_MS; if (num_clients <= 2 && !cached_cnt) r = -1; /* infinite */ else if (num_clients < max_reqnum) r = aging_interval_ms; #if 0 /* Debug: leak detector */ { static unsigned long long cnt; void *p = malloc(240); /* should not be too small */ void *s = sbrk(0); free(p); log(L_INFO, "entering poll %llu (%d ms). num_clients:%u cached:%u %u/%u next malloc:%p, sbrk:%p", cnt, r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt, p, s); cnt++; } #else log(L_DEBUG, "entering poll (%d ms). num_clients:%u cached:%u hit_ratio:%u/%u", r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt); #endif r = poll(pfd, num_clients, r); log(L_DEBUG2, "poll returns %d", r); if (r < 0) { if (errno != EINTR) perror_and_die("poll"); continue; } /* Everything between polls never sleeps. * There is no blocking I/O (except when we talk to worker thread * which is guaranteed to not block us for long) */ now_ms = monotonic_ms(); if (r == 0) goto skip_fd_checks; for (i = 0; i < 2; i++) { int cfd; if (!pfd[i].revents) continue; /* pfd[i].revents = 0; - not needed */ cfd = accept(pfd[i].fd, NULL, NULL); if (cfd < 0) { /* odd... poll() says we can accept but accept failed? */ log(L_DEBUG2, "accept failed with %s", strerror(errno)); continue; } ndelay_on(cfd); close_on_exec(cfd); /* x[num_clients] is next free element, taking it */ pfd[num_clients].fd = cfd; pfd[num_clients].events = POLLIN; /* this will make us do read() in next for() loop: */ pfd[num_clients].revents = POLLIN; memset(&cinfo[num_clients], 0, sizeof(cinfo[num_clients])); /* cinfo[num_clients].bytecnt = 0; - done */ cinfo[num_clients].started_ms = now_ms; cinfo[num_clients].bufidx = alloc_buf_no(); cinfo[num_clients].ureq = bufno2buf(cinfo[num_clients].bufidx); num_clients++; if (num_clients >= max_reqnum) { /* stop accepting new connects for now */ pfd[0].events = pfd[0].revents = 0; pfd[1].events = pfd[1].revents = 0; } } for (; i < num_clients; i++) { if (!pfd[i].revents) continue; log(L_DEBUG2, "pfd[%d].revents:0x%x", i, pfd[i].revents); /* pfd[i].revents = 0; - not needed */ /* "Write out result" case */ if (pfd[i].revents == POLLOUT) { response_header *resp; uint32_t resp_sz; if (!cinfo[i].resptr) { /* corner case: worker gave bad response earlier */ close_client(i); continue; } write_out: resp = ureq_response(cinfo[i].resptr); resp_sz = resp->version_or_size; resp->version_or_size = NSCD_VERSION; r = safe_write(pfd[i].fd, resp + cinfo[i].respos, resp_sz - cinfo[i].respos); resp->version_or_size = resp_sz; if (r < 0 && errno == EAGAIN) continue; if (r <= 0) { /* client isn't there anymore */ log(L_DEBUG, "client %d is gone (write returned %d)", i, r); write_out_is_done: free_refcounted_ureq(&cinfo[i].resptr); close_client(i); continue; } cinfo[i].respos += r; if (cinfo[i].respos >= resp_sz) { /* We wrote everything */ /* No point in trying to get next request, it won't come. * glibc 2.4 client closes its end after each request, * without testing for EOF from server. strace: * ... * read(3, "www.google.com\0\0", 16) = 16 * close(3) = 0 */ log(L_DEBUG, "client %d: sent answer %u bytes", i, cinfo[i].respos); goto write_out_is_done; } } /* "Read reply from worker" case. Worker may be * already dead, revents may contain other bits too */ if ((pfd[i].revents & POLLIN) && cinfo[i].client_fd) { log(L_DEBUG, "reading response for client %u", i); handle_worker_response(i, now_ms); /* We can immediately try to write a response * to client */ goto write_out; } if (pfd[i].revents == POLLHUP) { /* POLLHUP means we can't write to it anymore */ log(L_INFO, "client %u disappered (got POLLHUP)"); close_client(i); continue; } /* All strange and unexpected cases */ if (pfd[i].revents != POLLIN) { /* Not just "can read", but some other bits are there */ log(L_INFO, "client %u revents is strange:%x", i, pfd[i].revents); close_client(i); continue; } /* "Read request from client" case */ r = safe_read(pfd[i].fd, (char*)(cinfo[i].ureq) + cinfo[i].bytecnt, MAX_USER_REQ_SIZE - cinfo[i].bytecnt); if (r < 0) { log(L_DEBUG2, "error reading from client: %s", strerror(errno)); if (errno == EAGAIN) continue; close_client(i); continue; } if (r == 0) { log(L_INFO, "premature EOF from client, dropping"); close_client(i); continue; } cinfo[i].bytecnt += r; if (cinfo[i].bytecnt >= sizeof(user_req_header)) { if (cinfo[i].ureq->type == SHUTDOWN || cinfo[i].ureq->type == INVALIDATE ) { const char *service; unsigned len; #ifdef SO_PEERCRED struct ucred caller; socklen_t optlen = sizeof(caller); if (getsockopt(pfd[i].fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0) { log(L_INFO, "ignoring special request - cannot get caller's id: %s", strerror(errno)); close_client(i); continue; } if (caller.uid != 0) { log(L_INFO, "special request from non-root - ignoring"); close_client(i); continue; } #endif if (cinfo[i].ureq->type == SHUTDOWN) { log(L_INFO, "got shutdown request, exiting"); return; /* exits nscd */; } len = cinfo[i].ureq->key_len; service = (char*)&cinfo[i].ureq + len; if (sizeof(user_req_header) + len != cinfo[i].bytecnt || !len || service[len-1] != '\0' ) { log(L_INFO, "malformed invalidate request - ignoring"); close_client(i); continue; } log(L_INFO, "got invalidate request, flushing cache"); age_cache(0, -1); /* frees entire cache. TODO: replace -1 with service */ close_client(i); continue; } if (handle_client(i)) { /* Response is found in cache! */ goto write_out; } } } /* for each client[2..num_clients-1] */ skip_fd_checks: /* Age cache */ if ((now_ms - last_age_time) >= aging_interval_ms) { last_age_time = now_ms; age_cache(now_ms, -1); } /* Close timed out client connections */ for (i = 2; i < num_clients; i++) { if (pfd[i].fd && !cinfo[i].client_fd && (now_ms - cinfo[i].started_ms) > CLIENT_TIMEOUT_MS ) { log(L_INFO, "timed out waiting for client %u, dropping", i); close_client(i); } } if (!cnt_closed) continue; /* We closed at least one client, coalesce pfd[], cinfo[] */ if (min_closed + cnt_closed >= num_clients) { /* clients [min_closed..num_clients-1] are all closed */ /* log(L_DEBUG, "taking shortcut"); - almost always happens */ goto coalesce_done; } j = min_closed; i = min_closed + 1; while (i < num_clients) { while (1) { if (pfd[i].fd) break; if (++i >= num_clients) goto coalesce_done; } pfd[j] = pfd[i]; cinfo[j++] = cinfo[i++]; } coalesce_done: num_clients -= cnt_closed; log(L_DEBUG, "removing %d closed clients. num_clients:%d", cnt_closed, num_clients); min_closed = INT_MAX; cnt_closed = 0; /* start accepting new connects */ pfd[0].events = POLLIN; pfd[1].events = POLLIN; } /* while (1) */ } /* ** Initialization */ #define NSCD_PIDFILE "/var/run/nscd/nscd.pid" #define NSCD_DIR "/var/run/nscd" #define NSCD_SOCKET "/var/run/nscd/socket" #define NSCD_SOCKET_OLD "/var/run/.nscd_socket" static smallint wrote_pidfile; static void cleanup_on_signal(int sig) { if (wrote_pidfile) unlink(NSCD_PIDFILE); unlink(NSCD_SOCKET_OLD); unlink(NSCD_SOCKET); exit(0); } static void write_pid(void) { FILE *pid = fopen(NSCD_PIDFILE, "w"); if (!pid) return; fprintf(pid, "%d\n", getpid()); fclose(pid); wrote_pidfile = 1; } /* Open a listening nscd server socket */ static int open_socket(const char *name) { struct sockaddr_un sun; int sock = socket(AF_UNIX, SOCK_STREAM, 0); if (sock < 0) perror_and_die("cannot create unix domain socket"); ndelay_on(sock); close_on_exec(sock); sun.sun_family = AF_UNIX; strcpy(sun.sun_path, name); unlink(name); if (bind(sock, (struct sockaddr *) &sun, sizeof(sun)) < 0) perror_and_die("bind(%s)", name); if (chmod(name, 0666) < 0) perror_and_die("chmod(%s)", name); if (listen(sock, (max_reqnum/8) | 1) < 0) perror_and_die("listen"); return sock; } static const struct option longopt[] = { /* name, has_arg, int *flag, int val */ { "debug" , no_argument , NULL, 'd' }, { "config-file", required_argument, NULL, 'f' }, { "invalidate" , required_argument, NULL, 'i' }, { "shutdown" , no_argument , NULL, 'K' }, { "nthreads" , required_argument, NULL, 't' }, { "version" , no_argument , NULL, 'V' }, { "help" , no_argument , NULL, '?' }, { "usage" , no_argument , NULL, '?' }, /* just exit(0). TODO: "test" connect? */ { "statistic" , no_argument , NULL, 'g' }, { "secure" , no_argument , NULL, 'S' }, /* ? */ { } }; static const char *const help[] = { "Do not daemonize; log to stderr", "File to read configuration from", "Invalidate cache", "Shut the server down", "Serve N requests in parallel", "Version", }; static void print_help_and_die(void) { const struct option *opt = longopt; const char *const *h = help; puts("Usage: nscd [OPTION...]\n" "Name Service Cache Daemon\n"); do { printf("\t" "-%c,--%-11s %s\n", opt->val, opt->name, *h); h++; opt++; } while (opt->val != '?'); exit(1); } static char *skip_service(int *srv, const char *s) { if (strcmp("passwd", s) == 0) { *srv = SRV_PASSWD; s++; } else if (strcmp("group", s) == 0) { *srv = SRV_GROUP; } else if (strcmp("hosts", s) == 0) { *srv = SRV_HOSTS; } else { return NULL; } return skip_whitespace(s + 6); } static void handle_null(const char *str, int srv) {} static void handle_logfile(const char *str, int srv) { config.logfile = xstrdup(str); } static void handle_debuglvl(const char *str, int srv) { debug |= (uint8_t) getnum(str); } static void handle_threads(const char *str, int srv) { unsigned n = getnum(str); if (max_reqnum < n) max_reqnum = n; } static void handle_user(const char *str, int srv) { config.user = xstrdup(str); } static void handle_enable(const char *str, int srv) { config.srv_enable[srv] = ((str[0] | 0x20) == 'y'); } static void handle_pttl(const char *str, int srv) { config.pttl[srv] = getnum(str); } static void handle_nttl(const char *str, int srv) { config.nttl[srv] = getnum(str); } static void handle_size(const char *str, int srv) { config.size[srv] = getnum(str); } static void handle_chfiles(const char *str, int srv) { config.check_files[srv] = ((str[0] | 0x20) == 'y'); } static void parse_conffile(const char *conffile, int warn) { static const struct confword { const char *str; void (*handler)(const char *, int); } conf_words[] = { { "_" "logfile" , handle_logfile }, { "_" "debug-level" , handle_debuglvl }, { "_" "threads" , handle_threads }, { "_" "max-threads" , handle_threads }, { "_" "server-user" , handle_user }, /* ignore: any user can stat */ { "_" "stat-user" , handle_null }, { "_" "paranoia" , handle_null }, /* ? */ /* ignore: design goal is to never crash/hang */ { "_" "reload-count" , handle_null }, { "_" "restart-interval" , handle_null }, { "S" "enable-cache" , handle_enable }, { "S" "positive-time-to-live" , handle_pttl }, { "S" "negative-time-to-live" , handle_nttl }, { "S" "suggested-size" , handle_size }, { "S" "check-files" , handle_chfiles }, { "S" "persistent" , handle_null }, /* ? */ { "S" "shared" , handle_null }, /* ? */ { "S" "auto-propagate" , handle_null }, /* ? */ { } }; char buf[128]; FILE *file = fopen(conffile, "r"); int lineno = 0; if (!file) { if (conffile != default_conffile) perror_and_die("cannot open %s", conffile); return; } while (fgets(buf, sizeof(buf), file) != NULL) { const struct confword *word; char *p; int len = strlen(buf); lineno++; if (len) { if (buf[len-1] != '\n') { if (len >= sizeof(buf) - 1) error_and_die("%s:%d: line is too long", conffile, lineno); len++; /* last line, not terminated by '\n' */ } buf[len-1] = '\0'; } p = strchr(buf, '#'); if (p) *p = '\0'; p = skip_whitespace(buf); if (!*p) continue; *skip_non_whitespace(p) = '\0'; word = conf_words; while (1) { if (strcmp(word->str + 1, p) == 0) { int srv; p = skip_whitespace(p + strlen(p) + 1); *skip_non_whitespace(p) = '\0'; if (word->str[0] == 'S') { char *p2 = skip_service(&srv, p); if (!p2) { if (warn) error("%s:%d: ignoring unknown service name '%s'", conffile, lineno, p); break; } p = p2; *skip_non_whitespace(p) = '\0'; } word->handler(p, srv); break; } word++; if (!word->str) { if (warn) error("%s:%d: ignoring unknown directive '%s'", conffile, lineno, p); break; } } } fclose(file); } /* "XX,XX[,XX]..." -> gid_t[] */ static gid_t* env_U_to_uid_and_gids(const char *str, int *sizep) { const char *sp; gid_t *ug, *gp; int ng; sp = str; ng = 1; while (*sp) if (*sp++ == ',') ng++; ug = xmalloc(ng * sizeof(ug[0])); ng = 0; gp = ug; sp = str; errno = 0; while (1) { ng++; *gp++ = strtoul(sp, (char**)&sp, 16); if (errno || (*sp != ',' && *sp != '\0')) error_and_die("internal error"); if (*sp == '\0') break; sp++; } *sizep = ng; return ug; } static char* user_to_env_U(const char *user) { int ng; char *ug_str, *sp; gid_t *ug, *gp; struct passwd *pw; pw = getpwnam(user); if (!pw) perror_and_die("user '%s' is not known", user); ng = 64; /* 0th cell will be used for uid */ ug = xmalloc((1 + ng) * sizeof(ug[0])); if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0) { ug = xrealloc(ug, (1 + ng) * sizeof(ug[0])); if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0) perror_and_die("can't get groups of user '%s'", user); } ng++; ug[0] = pw->pw_uid; /* How much do we need for "-Uxx,xx[,xx]..." string? */ ug_str = xmalloc((sizeof(unsigned long)+1)*2 * ng + 3); gp = ug; sp = ug_str; *sp++ = 'U'; *sp++ = '='; do { sp += sprintf(sp, "%lx,", (unsigned long)(*gp++)); } while (--ng); sp[-1] = '\0'; free(ug); return ug_str; } /* not static - don't inline me, compiler! */ void readlink_self_exe(void) { char buf[PATH_MAX + 1]; ssize_t sz = readlink("/proc/self/exe", buf, sizeof(buf) - 1); if (sz < 0) perror_and_die("readlink %s failed", "/proc/self/exe"); buf[sz] = 0; self_exe_points_to = xstrdup(buf); } static void special_op(const char *arg) NORETURN; static void special_op(const char *arg) { static const user_req_header ureq = { NSCD_VERSION, SHUTDOWN, 0 }; struct sockaddr_un addr; int sock; sock = socket(PF_UNIX, SOCK_STREAM, 0); if (sock < 0) error_and_die("cannot create AF_UNIX socket"); addr.sun_family = AF_UNIX; strcpy(addr.sun_path, NSCD_SOCKET); if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0) error_and_die("cannot connect to %s", NSCD_SOCKET); if (!arg) { /* shutdown */ xfull_write(sock, &ureq, sizeof(ureq)); dup2(2, 1); error_and_die("sent shutdown request, exiting"); } else { /* invalidate */ size_t arg_len = strlen(arg) + 1; struct { user_req_header req; char arg[arg_len]; } reqdata; reqdata.req.version = NSCD_VERSION; reqdata.req.type = INVALIDATE; reqdata.req.key_len = arg_len; memcpy(reqdata.arg, arg, arg_len); xfull_write(sock, &reqdata, arg_len + sizeof(ureq)); dup2(2, 1); error_and_die("sent invalidate(%s) request, exiting", arg); } } /* This internal glibc function is called to disable trying to contact nscd. * We _are_ nscd, so we need to do the lookups, and not recurse. */ void __nss_disable_nscd(void); int main(int argc, char **argv) { int n; const char *env_U; const char *conffile; /* make sure we don't get recursive calls */ __nss_disable_nscd(); if (argv[0][0] == 'w') /* "worker_nscd" */ worker(argv[1]); setlinebuf(stdout); setlinebuf(stderr); /* For idiotic kernels which disallow "exec /proc/self/exe" */ readlink_self_exe(); conffile = default_conffile; while ((n = getopt_long(argc, argv, "df:i:KVgt:", longopt, NULL)) != -1) { switch (n) { case 'd': debug &= ~D_DAEMON; break; case 'f': conffile = optarg; break; case 'i': /* invalidate */ special_op(optarg); /* exits */ case 'K': /* shutdown server */ special_op(NULL); /* exits */ case 'V': puts("unscd - nscd which does not hang, v."PROGRAM_VERSION); exit(0); case 'g': exit(0); case 't': /* N threads */ max_reqnum = getnum(optarg); break; case 'S': /* secure (?) */ break; default: print_help_and_die(); } } env_U = getenv("U"); /* Avoid duplicate warnings if $U exists */ parse_conffile(conffile, /* warn? */ (env_U == NULL)); /* I have a user report of (broken?) ldap nss library * opening and never closing a socket to a ldap server, * even across fork() and exec(). This messes up * worker child's operations for the reporter. * * This strenghtens my belief that nscd _must not_ trust * nss libs to be written correctly. * * Here, we need to jump through the hoops to guard against * such problems. If config file has server-user setting, we need * to setgroups + setuid. For that, we need to get uid and gid vector. * And that means possibly using buggy nss libs. * We will do it here, but then we will re-exec, passing uid+gids * in an environment variable. */ if (!env_U && config.user) { /* user_to_env_U() does getpwnam and getgrouplist */ if (putenv(user_to_env_U(config.user))) error_and_die("out of memory"); /* fds leaked by nss will be closed by execed copy */ execv("/proc/self/exe", argv); xexecve(self_exe_points_to, argv, environ); } /* Allocate dynamically sized stuff */ max_reqnum += 2; /* account for 2 first "fake" clients */ if (max_reqnum < 8) max_reqnum = 8; /* sanitize */ if (max_reqnum > 0xffff) max_reqnum = 0xffff; log(L_DEBUG, "will handle %u requests in parallel", max_reqnum - 2); client_buf = xzalloc(max_reqnum * sizeof(client_buf[0])); busy_cbuf = xzalloc(max_reqnum * sizeof(busy_cbuf[0])); pfd = xzalloc(max_reqnum * sizeof(pfd[0])); cinfo = xzalloc(max_reqnum * sizeof(cinfo[0])); cache_size = (config.size[0] + config.size[1] + config.size[2]) / 8; if (cache_size < 64) cache_size = 64; /* 8*64 = 512 entries min */ if (cache_size > 0xffff) cache_size = 0xffff; /* 8*64k entries max */ cache_size |= 1; /* force it to be odd */ log(L_DEBUG, "cache size %u x 8 entries", cache_size); cache = xzalloc(cache_size * sizeof(cache[0])); /* Make sure stdio is not closed */ n = xopen3("/dev/null", O_RDWR, 0); while (n < 2) n = dup(n); /* Close unexpected open file descriptors */ n |= 0xff; /* start from at least fd# 255 */ do { close(n--); } while (n > 2); /* Register cleanup hooks */ signal(SIGINT, cleanup_on_signal); signal(SIGTERM, cleanup_on_signal); /* Don't die if a client closes a socket on us */ signal(SIGPIPE, SIG_IGN); /* Avoid creating zombies */ signal(SIGCHLD, SIG_IGN); #if !DEBUG_BUILD /* Ensure workers don't have SIGALRM ignored */ signal(SIGALRM, SIG_DFL); #endif mkdir(NSCD_DIR, 0777); pfd[0].fd = open_socket(NSCD_SOCKET); pfd[1].fd = open_socket(NSCD_SOCKET_OLD); pfd[0].events = POLLIN; pfd[1].events = POLLIN; if (debug & D_DAEMON) { daemon(/*nochdir*/ 1, /*noclose*/ 0); if (config.logfile) { /* nochdir=1: relative paths still work as expected */ xmovefd(xopen3(config.logfile, O_WRONLY|O_CREAT|O_TRUNC, 0666), 2); debug |= D_STAMP; } else { debug = 0; /* why bother? it's /dev/null'ed anyway */ } chdir("/"); /* compat */ write_pid(); setsid(); /* ignore job control signals */ signal(SIGTTOU, SIG_IGN); signal(SIGTTIN, SIG_IGN); signal(SIGTSTP, SIG_IGN); } if (env_U) { int size; gid_t *ug = env_U_to_uid_and_gids(env_U, &size); if (setgroups(size, &ug[1])) perror_and_die("cannot set groups for user '%s'", config.user); if (setuid(ug[0])) perror_and_die("cannot set uid to %u", (unsigned)(ug[0])); free(ug); } log(L_ALL, "nscd v" PROGRAM_VERSION ", debug level %x", debug & L_ALL); log(L_DEBUG, "passwd cache: %d pttl %u nttl %u", config.srv_enable[SRV_PASSWD], config.pttl[SRV_PASSWD], config.nttl[SRV_PASSWD]); log(L_DEBUG, " group cache: %d pttl %u nttl %u", config.srv_enable[SRV_GROUP ], config.pttl[SRV_GROUP], config.nttl[SRV_GROUP]); log(L_DEBUG, " hosts cache: %d pttl %u nttl %u", config.srv_enable[SRV_HOSTS ], config.pttl[SRV_HOSTS], config.nttl[SRV_HOSTS]); for (n = 0; n < 3; n++) { config.pttl[n] *= 1000; config.nttl[n] *= 1000; } main_loop(); return 0; }
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor