1 /* Cache memory handling.
2    Copyright (C) 2004-2021 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published
7    by the Free Software Foundation; version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
17 
18 #include <assert.h>
19 #include <errno.h>
20 #include <error.h>
21 #include <fcntl.h>
22 #include <inttypes.h>
23 #include <libintl.h>
24 #include <limits.h>
25 #include <obstack.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <sys/mman.h>
30 #include <sys/param.h>
31 
32 #include "dbg_log.h"
33 #include "nscd.h"
34 
35 
36 static int
sort_he(const void * p1,const void * p2)37 sort_he (const void *p1, const void *p2)
38 {
39   struct hashentry *h1 = *(struct hashentry **) p1;
40   struct hashentry *h2 = *(struct hashentry **) p2;
41 
42   if (h1 < h2)
43     return -1;
44   if (h1 > h2)
45     return 1;
46   return 0;
47 }
48 
49 
50 static int
sort_he_data(const void * p1,const void * p2)51 sort_he_data (const void *p1, const void *p2)
52 {
53   struct hashentry *h1 = *(struct hashentry **) p1;
54   struct hashentry *h2 = *(struct hashentry **) p2;
55 
56   if (h1->packet < h2->packet)
57     return -1;
58   if (h1->packet > h2->packet)
59     return 1;
60   return 0;
61 }
62 
63 
64 /* Basic definitions for the bitmap implementation.  Only BITMAP_T
65    needs to be changed to choose a different word size.  */
66 #define BITMAP_T uint8_t
67 #define BITS (CHAR_BIT * sizeof (BITMAP_T))
68 #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
69 #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
70 
71 
72 static void
markrange(BITMAP_T * mark,ref_t start,size_t len)73 markrange (BITMAP_T *mark, ref_t start, size_t len)
74 {
75   /* Adjust parameters for block alignment.  */
76   assert ((start & BLOCK_ALIGN_M1) == 0);
77   start /= BLOCK_ALIGN;
78   len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
79 
80   size_t elem = start / BITS;
81 
82   if (start % BITS != 0)
83     {
84       if (start % BITS + len <= BITS)
85 	{
86 	  /* All fits in the partial byte.  */
87 	  mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
88 	  return;
89 	}
90 
91       mark[elem++] |= ALLBITS << (start % BITS);
92       len -= BITS - (start % BITS);
93     }
94 
95   while (len >= BITS)
96     {
97       mark[elem++] = ALLBITS;
98       len -= BITS;
99     }
100 
101   if (len > 0)
102     mark[elem] |= ALLBITS >> (BITS - len);
103 }
104 
105 
106 void
gc(struct database_dyn * db)107 gc (struct database_dyn *db)
108 {
109   /* We need write access.  */
110   pthread_rwlock_wrlock (&db->lock);
111 
112   /* And the memory handling lock.  */
113   pthread_mutex_lock (&db->memlock);
114 
115   /* We need an array representing the data area.  All memory
116      allocation is BLOCK_ALIGN aligned so this is the level at which
117      we have to look at the memory.  We use a mark and sweep algorithm
118      where the marks are placed in this array.  */
119   assert (db->head->first_free % BLOCK_ALIGN == 0);
120 
121   BITMAP_T *mark;
122   bool mark_use_malloc;
123   /* In prune_cache we are also using a dynamically allocated array.
124      If the array in the caller is too large we have malloc'ed it.  */
125   size_t stack_used = sizeof (bool) * db->head->module;
126   if (__glibc_unlikely (stack_used > MAX_STACK_USE))
127     stack_used = 0;
128   size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
129   size_t memory_needed = nmark * sizeof (BITMAP_T);
130   if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
131     {
132       mark = (BITMAP_T *) alloca_account (memory_needed, stack_used);
133       mark_use_malloc = false;
134       memset (mark, '\0', memory_needed);
135     }
136   else
137     {
138       mark = (BITMAP_T *) xcalloc (1, memory_needed);
139       mark_use_malloc = true;
140     }
141 
142   /* Create an array which can hold pointer to all the entries in hash
143      entries.  */
144   memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
145   struct hashentry **he;
146   struct hashentry **he_data;
147   bool he_use_malloc;
148   if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
149     {
150       he = alloca_account (memory_needed, stack_used);
151       he_use_malloc = false;
152     }
153   else
154     {
155       he = xmalloc (memory_needed);
156       he_use_malloc = true;
157     }
158   he_data = &he[db->head->nentries];
159 
160   size_t cnt = 0;
161   for (size_t idx = 0; idx < db->head->module; ++idx)
162     {
163       ref_t *prevp = &db->head->array[idx];
164       ref_t run = *prevp;
165 
166       while (run != ENDREF)
167 	{
168 	  assert (cnt < db->head->nentries);
169 	  he[cnt] = (struct hashentry *) (db->data + run);
170 
171 	  he[cnt]->prevp = prevp;
172 	  prevp = &he[cnt]->next;
173 
174 	  /* This is the hash entry itself.  */
175 	  markrange (mark, run, sizeof (struct hashentry));
176 
177 	  /* Add the information for the data itself.  We do this
178 	     only for the one special entry marked with FIRST.  */
179 	  if (he[cnt]->first)
180 	    {
181 	      struct datahead *dh
182 		= (struct datahead *) (db->data + he[cnt]->packet);
183 	      markrange (mark, he[cnt]->packet, dh->allocsize);
184 	    }
185 
186 	  run = he[cnt]->next;
187 
188 	  ++cnt;
189 	}
190     }
191   assert (cnt == db->head->nentries);
192 
193   /* Sort the entries by the addresses of the referenced data.  All
194      the entries pointing to the same DATAHEAD object will have the
195      same key.  Stability of the sorting is unimportant.  */
196   memcpy (he_data, he, cnt * sizeof (struct hashentry *));
197   qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
198 
199   /* Sort the entries by their address.  */
200   qsort (he, cnt, sizeof (struct hashentry *), sort_he);
201 
202 #define obstack_chunk_alloc xmalloc
203 #define obstack_chunk_free free
204   struct obstack ob;
205   obstack_init (&ob);
206 
207   /* Determine the highest used address.  */
208   size_t high = nmark;
209   while (high > 0 && mark[high - 1] == 0)
210     --high;
211 
212   /* No memory used.  */
213   if (high == 0)
214     {
215       db->head->first_free = 0;
216       goto out;
217     }
218 
219   /* Determine the highest offset.  */
220   BITMAP_T mask = HIGHBIT;
221   ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
222   while ((mark[high - 1] & mask) == 0)
223     {
224       mask >>= 1;
225       highref -= BLOCK_ALIGN;
226     }
227 
228   /* Now we can iterate over the MARK array and find bits which are not
229      set.  These represent memory which can be recovered.  */
230   size_t byte = 0;
231   /* Find the first gap.  */
232   while (byte < high && mark[byte] == ALLBITS)
233     ++byte;
234 
235   if (byte == high
236       || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
237     /* No gap.  */
238     goto out;
239 
240   mask = 1;
241   cnt = 0;
242   while ((mark[byte] & mask) != 0)
243     {
244       ++cnt;
245       mask <<= 1;
246     }
247   ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
248   assert (off_free <= db->head->first_free);
249 
250   struct hashentry **next_hash = he;
251   struct hashentry **next_data = he_data;
252 
253   /* Skip over the hash entries in the first block which does not get
254      moved.  */
255   while (next_hash < &he[db->head->nentries]
256 	 && *next_hash < (struct hashentry *) (db->data + off_free))
257     ++next_hash;
258 
259   while (next_data < &he_data[db->head->nentries]
260 	 && (*next_data)->packet < off_free)
261     ++next_data;
262 
263 
264   /* Now we start modifying the data.  Make sure all readers of the
265      data are aware of this and temporarily don't use the data.  */
266   atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
267   assert ((db->head->gc_cycle & 1) == 1);
268 
269 
270   /* We do not perform the move operations right away since the
271      he_data array is not sorted by the address of the data.  */
272   struct moveinfo
273   {
274     void *from;
275     void *to;
276     size_t size;
277     struct moveinfo *next;
278   } *moves = NULL;
279 
280   while (byte < high)
281     {
282       /* Search for the next filled block.  BYTE is the index of the
283 	 entry in MARK, MASK is the bit, and CNT is the bit number.
284 	 OFF_FILLED is the corresponding offset.  */
285       if ((mark[byte] & ~(mask - 1)) == 0)
286 	{
287 	  /* No other bit set in the same element of MARK.  Search in the
288 	     following memory.  */
289 	  do
290 	    ++byte;
291 	  while (byte < high && mark[byte] == 0);
292 
293 	  if (byte == high)
294 	    /* That was it.  */
295 	    break;
296 
297 	  mask = 1;
298 	  cnt = 0;
299 	}
300       /* Find the exact bit.  */
301       while ((mark[byte] & mask) == 0)
302 	{
303 	  ++cnt;
304 	  mask <<= 1;
305 	}
306 
307       ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
308       assert (off_alloc <= db->head->first_free);
309 
310       /* Find the end of the used area.  */
311       if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
312 	{
313 	  /* All other bits set.  Search the next bytes in MARK.  */
314 	  do
315 	    ++byte;
316 	  while (byte < high && mark[byte] == ALLBITS);
317 
318 	  mask = 1;
319 	  cnt = 0;
320 	}
321       if (byte < high)
322 	{
323 	  /* Find the exact bit.  */
324 	  while ((mark[byte] & mask) != 0)
325 	    {
326 	      ++cnt;
327 	      mask <<= 1;
328 	    }
329 	}
330 
331       ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
332       assert (off_allocend <= db->head->first_free);
333       /* Now we know that we can copy the area from OFF_ALLOC to
334 	 OFF_ALLOCEND (not included) to the memory starting at
335 	 OFF_FREE.  First fix up all the entries for the
336 	 displacement.  */
337       ref_t disp = off_alloc - off_free;
338 
339       struct moveinfo *new_move;
340       if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE,
341 			    1))
342 	new_move = alloca_account (sizeof (*new_move), stack_used);
343       else
344 	new_move = obstack_alloc (&ob, sizeof (*new_move));
345       new_move->from = db->data + off_alloc;
346       new_move->to = db->data + off_free;
347       new_move->size = off_allocend - off_alloc;
348       /* Create a circular list to be always able to append at the end.  */
349       if (moves == NULL)
350 	moves = new_move->next = new_move;
351       else
352 	{
353 	  new_move->next = moves->next;
354 	  moves = moves->next = new_move;
355 	}
356 
357       /* The following loop will prepare to move this much data.  */
358       off_free += off_allocend - off_alloc;
359 
360       while (off_alloc < off_allocend)
361 	{
362 	  /* Determine whether the next entry is for a hash entry or
363 	     the data.  */
364 	  if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
365 	    {
366 	      /* Just correct the forward reference.  */
367 	      *(*next_hash++)->prevp -= disp;
368 
369 	      off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
370 			    & ~BLOCK_ALIGN_M1);
371 	    }
372 	  else
373 	    {
374 	      assert (next_data < &he_data[db->head->nentries]);
375 	      assert ((*next_data)->packet == off_alloc);
376 
377 	      struct datahead *dh = (struct datahead *) (db->data + off_alloc);
378 	      do
379 		{
380 		  assert ((*next_data)->key >= (*next_data)->packet);
381 		  assert ((*next_data)->key + (*next_data)->len
382 			  <= (*next_data)->packet + dh->allocsize);
383 
384 		  (*next_data)->packet -= disp;
385 		  (*next_data)->key -= disp;
386 		  ++next_data;
387 		}
388 	      while (next_data < &he_data[db->head->nentries]
389 		     && (*next_data)->packet == off_alloc);
390 
391 	      off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
392 	    }
393 	}
394       assert (off_alloc == off_allocend);
395 
396       assert (off_alloc <= db->head->first_free);
397       if (off_alloc == db->head->first_free)
398 	/* We are done, that was the last block.  */
399 	break;
400     }
401   assert (next_hash == &he[db->head->nentries]);
402   assert (next_data == &he_data[db->head->nentries]);
403 
404   /* Now perform the actual moves.  */
405   if (moves != NULL)
406     {
407       struct moveinfo *runp = moves->next;
408       do
409 	{
410 	  assert ((char *) runp->to >= db->data);
411 	  assert ((char *) runp->to + runp->size
412 		  <= db->data  + db->head->first_free);
413 	  assert ((char *) runp->from >= db->data);
414 	  assert ((char *) runp->from + runp->size
415 		  <= db->data  + db->head->first_free);
416 
417 	  /* The regions may overlap.  */
418 	  memmove (runp->to, runp->from, runp->size);
419 	  runp = runp->next;
420 	}
421       while (runp != moves->next);
422 
423       if (__glibc_unlikely (debug_level >= 3))
424 	dbg_log (_("freed %zu bytes in %s cache"),
425 		 (size_t) (db->head->first_free
426 			   - ((char *) moves->to + moves->size - db->data)),
427 		 dbnames[db - dbs]);
428 
429       /* The byte past the end of the last copied block is the next
430 	 available byte.  */
431       db->head->first_free = (char *) moves->to + moves->size - db->data;
432 
433       /* Consistency check.  */
434       if (__glibc_unlikely (debug_level >= 3))
435 	{
436 	  for (size_t idx = 0; idx < db->head->module; ++idx)
437 	    {
438 	      ref_t run = db->head->array[idx];
439 	      size_t cnt = 0;
440 
441 	      while (run != ENDREF)
442 		{
443 		  if (run + sizeof (struct hashentry) > db->head->first_free)
444 		    {
445 		      dbg_log ("entry %zu in hash bucket %zu out of bounds: "
446 			       "%" PRIu32 "+%zu > %zu\n",
447 			       cnt, idx, run, sizeof (struct hashentry),
448 			       (size_t) db->head->first_free);
449 		      break;
450 		    }
451 
452 		  struct hashentry *he = (struct hashentry *) (db->data + run);
453 
454 		  if (he->key + he->len > db->head->first_free)
455 		    dbg_log ("key of entry %zu in hash bucket %zu out of "
456 			     "bounds: %" PRIu32 "+%zu > %zu\n",
457 			     cnt, idx, he->key, (size_t) he->len,
458 			     (size_t) db->head->first_free);
459 
460 		  if (he->packet + sizeof (struct datahead)
461 		      > db->head->first_free)
462 		    dbg_log ("packet of entry %zu in hash bucket %zu out of "
463 			     "bounds: %" PRIu32 "+%zu > %zu\n",
464 			     cnt, idx, he->packet, sizeof (struct datahead),
465 			     (size_t) db->head->first_free);
466 		  else
467 		    {
468 		      struct datahead *dh = (struct datahead *) (db->data
469 								 + he->packet);
470 		      if (he->packet + dh->allocsize
471 			  > db->head->first_free)
472 			dbg_log ("full key of entry %zu in hash bucket %zu "
473 				 "out of bounds: %" PRIu32 "+%zu > %zu",
474 				 cnt, idx, he->packet, (size_t) dh->allocsize,
475 				 (size_t) db->head->first_free);
476 		    }
477 
478 		  run = he->next;
479 		  ++cnt;
480 		}
481 	    }
482 	}
483     }
484 
485   /* Make sure the data on disk is updated.  */
486   if (db->persistent)
487     msync (db->head, db->data + db->head->first_free - (char *) db->head,
488 	   MS_ASYNC);
489 
490 
491   /* Now we are done modifying the data.  */
492   atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
493   assert ((db->head->gc_cycle & 1) == 0);
494 
495   /* We are done.  */
496  out:
497   pthread_mutex_unlock (&db->memlock);
498   pthread_rwlock_unlock (&db->lock);
499 
500   if (he_use_malloc)
501     free (he);
502   if (mark_use_malloc)
503     free (mark);
504 
505   obstack_free (&ob, NULL);
506 }
507 
508 
509 void *
mempool_alloc(struct database_dyn * db,size_t len,int data_alloc)510 mempool_alloc (struct database_dyn *db, size_t len, int data_alloc)
511 {
512   /* Make sure LEN is a multiple of our maximum alignment so we can
513      keep track of used memory is multiples of this alignment value.  */
514   if ((len & BLOCK_ALIGN_M1) != 0)
515     len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
516 
517   if (data_alloc)
518     pthread_rwlock_rdlock (&db->lock);
519 
520   pthread_mutex_lock (&db->memlock);
521 
522   assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
523 
524   bool tried_resize = false;
525   void *res;
526  retry:
527   res = db->data + db->head->first_free;
528 
529   if (__glibc_unlikely (db->head->first_free + len > db->head->data_size))
530     {
531       if (! tried_resize)
532 	{
533 	  /* Try to resize the database.  Grow size of 1/8th.  */
534 	  size_t oldtotal = (sizeof (struct database_pers_head)
535 			     + roundup (db->head->module * sizeof (ref_t),
536 					ALIGN)
537 			     + db->head->data_size);
538 	  size_t new_data_size = (db->head->data_size
539 				  + MAX (2 * len, db->head->data_size / 8));
540 	  size_t newtotal = (sizeof (struct database_pers_head)
541 			     + roundup (db->head->module * sizeof (ref_t), ALIGN)
542 			     + new_data_size);
543 	  if (newtotal > db->max_db_size)
544 	    {
545 	      new_data_size -= newtotal - db->max_db_size;
546 	      newtotal = db->max_db_size;
547 	    }
548 
549 	  if (db->mmap_used && newtotal > oldtotal
550 	      /* We only have to adjust the file size.  The new pages
551 		 become magically available.  */
552 	      && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
553 							  newtotal
554 							  - oldtotal)) == 0)
555 	    {
556 	      db->head->data_size = new_data_size;
557 	      tried_resize = true;
558 	      goto retry;
559 	    }
560 	}
561 
562       if (data_alloc)
563 	pthread_rwlock_unlock (&db->lock);
564 
565       if (! db->last_alloc_failed)
566 	{
567 	  dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
568 
569 	  db->last_alloc_failed = true;
570 	}
571 
572       ++db->head->addfailed;
573 
574       /* No luck.  */
575       res = NULL;
576     }
577   else
578     {
579       db->head->first_free += len;
580 
581       db->last_alloc_failed = false;
582 
583     }
584 
585   pthread_mutex_unlock (&db->memlock);
586 
587   return res;
588 }
589