1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-02-14 03:12:10 +02:00

di-set: manipulate sets of dev/inode pairs efficiently

* gl/lib/di-set.c: Implementation.
* gl/lib/di-set.h: Declarations.
* gl/modules/di-set: Define module.
* gl/modules/di-set-tests: Define test module.
* gl/tests/test-di-set.c: Likewise.
This commit is contained in:
Jim Meyering
2010-06-27 23:29:07 +02:00
parent 6357909ee6
commit f42496b72b
5 changed files with 424 additions and 0 deletions

276
gl/lib/di-set.c Normal file
View File

@@ -0,0 +1,276 @@
/* Set operations for device-inode pairs stored in a space-efficient manner.
Copyright 2009-2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* written by Jim Meyering */
#include <config.h>
#include "di-set.h"
#include <stdio.h>
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "verify.h"
/* Set operations for device-inode pairs stored in a space-efficient manner.
A naive mapping uses 16 bytes to save a single st_dev, st_ino pair.
However, in many applications, the vast majority of actual device,inode
number pairs can be efficiently compressed to fit in 8 or even 4 bytes,
by using a separate table to map a relatively small number of devices
to small integers. */
#define N_DEV_BITS_4 5
#define N_INO_BITS_4 (32 - N_DEV_BITS_4 - 2 - 1)
#define N_DEV_BITS_8 8
#define N_INO_BITS_8 (64 - N_DEV_BITS_8 - 2 - 1)
/* Note how the last bit is always set.
This is required, in order to be able to distinguish
an encoded di_ent value from a malloc-returned pointer,
which must be 4-byte-aligned or better. */
struct dev_ino_4
{
uint32_t mode:2; /* must be first */
uint32_t short_ino:N_INO_BITS_4;
uint32_t mapped_dev:N_DEV_BITS_4;
uint32_t always_set:1;
};
verify (N_DEV_BITS_4 <= 8 * sizeof (int));
verify (sizeof (struct dev_ino_4) == 4);
struct dev_ino_8
{
uint32_t mode:2; /* must be first */
uint64_t short_ino:N_INO_BITS_8;
uint32_t mapped_dev:N_DEV_BITS_8;
uint32_t always_set:1;
};
verify (sizeof (struct dev_ino_8) == 8);
struct dev_ino_full
{
uint32_t mode:2; /* must be first */
dev_t dev;
ino_t ino;
};
enum di_mode
{
DI_MODE_4 = 1,
DI_MODE_8 = 2,
DI_MODE_FULL = 3
};
/*
di_mode raw_inode mapped dev always_set
\____________|_______________\_____/
4-byte | 2| 25 | 5 |1| mapped_dev
`----------------------------------------------------|-----.
8-byte | 2| 53 | 8 |1|
`----------------------------------------------------------'
*/
struct di_ent
{
union
{
struct dev_ino_4 di4;
struct dev_ino_8 di8;
struct dev_ino_full full;
uint32_t u32;
uint64_t u64;
void *ptr;
} u;
};
struct dev_map_ent
{
dev_t dev;
uint32_t mapped_dev;
};
static inline bool
is_encoded_ptr (struct di_ent const *v)
{
return (size_t) v % 4;
}
static struct di_ent
decode_ptr (struct di_ent const *v)
{
if (!is_encoded_ptr (v))
return *v;
struct di_ent di;
di.u.ptr = (void *) v;
return di;
}
static size_t
di_ent_hash (void const *x, size_t table_size)
{
struct di_ent e = decode_ptr (x);
return (e.u.di4.mode == DI_MODE_4
? e.u.u32
: (e.u.di4.mode == DI_MODE_8
? e.u.u64
: e.u.full.ino)) % table_size;
}
/* Compare two di_ent structs.
Return true if they are the same. */
static bool
di_ent_compare (void const *x, void const *y)
{
struct di_ent a = decode_ptr (x);
struct di_ent b = decode_ptr (y);
if (a.u.di4.mode != b.u.di4.mode)
return false;
if (a.u.di4.mode == DI_MODE_4)
return (a.u.di4.short_ino == b.u.di4.short_ino
&& a.u.di4.mapped_dev == b.u.di4.mapped_dev);
if (a.u.di8.mode == DI_MODE_8)
return (a.u.di8.short_ino == b.u.di8.short_ino
&& a.u.di8.mapped_dev == b.u.di8.mapped_dev);
return (a.u.full.ino == b.u.full.ino
&& a.u.full.dev == b.u.full.dev);
}
static void
di_ent_free (void *v)
{
if ( ! is_encoded_ptr (v))
free (v);
}
int
di_set_init (struct di_set_state *dis, size_t initial_size)
{
if (dev_map_init (&dis->dev_map) < 0)
return -1;
dis->di_set = hash_initialize (initial_size, NULL,
di_ent_hash, di_ent_compare, di_ent_free);
return dis->di_set ? 0 : -1;
}
void
di_set_free (struct di_set_state *dis)
{
dev_map_free (&dis->dev_map);
hash_free (dis->di_set);
}
/* Given a device-inode set, DIS, create an entry for the DEV,INO
pair, and store it in *V. If possible, encode DEV,INO into the pointer
itself, but if not, allocate space for a full "struct di_ent" and set *V
to that pointer. Upon memory allocation failure, return -1.
Otherwise return 0. */
int
di_ent_create (struct di_set_state *dis,
dev_t dev, ino_t ino,
struct di_ent **v)
{
static int prev_m = -1;
static dev_t prev_dev = -1;
struct di_ent di_ent;
int mapped_dev;
if (dev == prev_dev)
mapped_dev = prev_m;
else
{
mapped_dev = dev_map_insert (&dis->dev_map, dev);
if (mapped_dev < 0)
return -1;
prev_dev = dev;
prev_m = mapped_dev;
}
if (mapped_dev < (1 << N_DEV_BITS_4)
&& ino < (1 << N_INO_BITS_4))
{
#if lint
/* When this struct is smaller than a pointer, initialize
the pointer so tools like valgrind don't complain about
the uninitialized bytes. */
if (sizeof di_ent.u.di4 < sizeof di_ent.u.ptr)
di_ent.u.ptr = NULL;
#endif
di_ent.u.di4.mode = DI_MODE_4;
di_ent.u.di4.short_ino = ino;
di_ent.u.di4.mapped_dev = mapped_dev;
di_ent.u.di4.always_set = 1;
*v = di_ent.u.ptr;
}
else if (mapped_dev < (1 << N_DEV_BITS_8)
&& ino < ((uint64_t) 1 << N_INO_BITS_8))
{
di_ent.u.di8.mode = DI_MODE_8;
di_ent.u.di8.short_ino = ino;
di_ent.u.di8.mapped_dev = mapped_dev;
di_ent.u.di8.always_set = 1;
*v = di_ent.u.ptr;
}
else
{
/* Handle the case in which INO is too large or in which (far less
likely) we encounter hard-linked files on 2^N_DEV_BITS_8
different devices. */
struct di_ent *p = malloc (sizeof *p);
if (!p)
return -1;
assert ((size_t) p % 4 == 0);
p->u.full.mode = DI_MODE_FULL;
p->u.full.ino = ino;
p->u.full.dev = dev;
*v = p;
}
return 0;
}
/* Attempt to insert the DEV,INO pair into the set, DIS.
If it matches a pair already in DIS, don't modify DIS and return 0.
Otherwise, if insertion is successful, return 1.
Upon any failure return -1. */
int
di_set_insert (struct di_set_state *dis, dev_t dev, ino_t ino)
{
struct di_ent *v;
if (di_ent_create (dis, dev, ino, &v) < 0)
return -1;
int err = hash_insert0 (dis->di_set, v, NULL);
if (err == -1) /* Insertion failed due to lack of memory. */
return -1;
if (err == 1) /* Insertion succeeded. */
return 1;
/* That pair is already in the table, so ENT was not inserted. Free it. */
if (! is_encoded_ptr (v))
free (v);
return 0;
}

28
gl/lib/di-set.h Normal file
View File

@@ -0,0 +1,28 @@
#include "dev-map.h"
struct di_set_state
{
/* A map to help compact device numbers. */
struct dev_map dev_map;
/* A set of compact dev,inode pairs. */
struct hash_table *di_set;
};
#undef _ATTRIBUTE_NONNULL_
#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__
# define _ATTRIBUTE_NONNULL_(m,...) __attribute__ ((__nonnull__ (m)))
#else
# define _ATTRIBUTE_NONNULL_(m,...)
#endif
int di_set_init (struct di_set_state *, size_t) _ATTRIBUTE_NONNULL_ (1);
void di_set_free (struct di_set_state *) _ATTRIBUTE_NONNULL_ (1);
int di_set_insert (struct di_set_state *, dev_t, ino_t)
_ATTRIBUTE_NONNULL_ (1);
struct di_ent;
int di_ent_create (struct di_set_state *di_set_state,
dev_t dev, ino_t ino,
struct di_ent **di_ent)
_ATTRIBUTE_NONNULL_ (1,4);

25
gl/modules/di-set Normal file
View File

@@ -0,0 +1,25 @@
Description:
manipulate sets of device-inode pairs efficiently
Files:
lib/di-set.c
lib/di-set.h
Depends-on:
dev-map
hash
verify
configure.ac:
Makefile.am:
lib_SOURCES += di-set.c di-set.h
Include:
"di-set.h"
License
GPL
Maintainer:
Jim Meyering

10
gl/modules/di-set-tests Normal file
View File

@@ -0,0 +1,10 @@
Files:
tests/test-di-set.c
Depends-on:
configure.ac:
Makefile.am:
TESTS += test-di-set
check_PROGRAMS += test-di-set

85
gl/tests/test-di-set.c Normal file
View File

@@ -0,0 +1,85 @@
/* Test the dev-map module.
Copyright (C) 2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Written by Jim Meyering. */
#include <config.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#define ASSERT(expr) \
do \
{ \
if (!(expr)) \
{ \
fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
fflush (stderr); \
abort (); \
} \
} \
while (0)
#include "di-set.h"
/* FIXME: ugly duplication of code from di-set.c */
#define N_DEV_BITS_4 5
#define N_INO_BITS_4 (32 - N_DEV_BITS_4 - 2 - 1)
#define N_DEV_BITS_8 8
#define N_INO_BITS_8 (64 - N_DEV_BITS_8 - 2 - 1)
int
main (void)
{
/* set_program_name (argv[0]); placate overzealous "syntax-check" test. */
size_t initial_size = 61;
/* "real" code might prefer to avoid the allocation here, simply
declaring "struct di_set_state dis;", do a global substitution,
s/\<dis\>/\&dis/, and remove the final free. */
struct di_set_state *dis = malloc (sizeof *dis);
ASSERT (dis);
ASSERT (di_set_init (dis, initial_size) == 0);
struct di_ent *di_ent;
ASSERT (di_ent_create (dis, 1, 1, &di_ent) == 0);
ASSERT (di_ent_create (dis, 1 << N_DEV_BITS_4, 1, &di_ent) == 0);
ASSERT (di_ent_create (dis, 1, 1 << N_INO_BITS_4, &di_ent) == 0);
ASSERT (di_ent_create (dis, 1,
(uint64_t) 1 << N_INO_BITS_8, &di_ent) == 0);
free (di_ent);
ASSERT (di_set_insert (dis, 2, 5) == 1); /* first insertion succeeds */
ASSERT (di_set_insert (dis, 2, 5) == 0); /* duplicate fails */
ASSERT (di_set_insert (dis, 3, 5) == 1); /* diff dev, duplicate inode is ok */
ASSERT (di_set_insert (dis, 2, 8) == 1); /* same dev, different inode is ok */
/* very large inode number */
ASSERT (di_set_insert (dis, 5, (uint64_t) 1 << 63) == 1);
ASSERT (di_set_insert (dis, 5, (uint64_t) 1 << 63) == 0); /* dup */
unsigned int i;
for (i = 0; i < 3000; i++)
ASSERT (di_set_insert (dis, 9, i) == 1);
for (i = 0; i < 3000; i++)
ASSERT (di_set_insert (dis, 9, i) == 0); /* duplicate fails */
di_set_free (dis);
free (dis);
return 0;
}