librsync  2.0.1
sumset.c
1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2  *
3  * librsync -- library for network deltas
4  *
5  * Copyright (C) 1999, 2000, 2001 by Martin Pool <mbp@sourcefrog.net>
6  * Copyright (C) 1999 by Andrew Tridgell
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation; either version 2.1 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  */
22 
23 #include "config.h"
24 
25 #include <assert.h>
26 #include <stddef.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30 
31 #include "librsync.h"
32 #include "sumset.h"
33 #include "util.h"
34 #include "trace.h"
35 
36 const int RS_MD4_SUM_LENGTH = 16;
37 const int RS_BLAKE2_SUM_LENGTH = 32;
38 
39 void rs_block_sig_init(rs_block_sig_t *sig, rs_weak_sum_t weak_sum, rs_strong_sum_t *strong_sum, int strong_len)
40 {
41  sig->weak_sum = weak_sum;
42  memcpy(sig->strong_sum, strong_sum, strong_len);
43 }
44 
45 unsigned rs_block_sig_hash(const rs_block_sig_t *sig)
46 {
47  return (unsigned)sig->weak_sum;
48 }
49 
50 typedef struct rs_block_match {
51  rs_block_sig_t block_sig;
52  rs_signature_t *signature;
53  const void *buf;
54  size_t len;
56 
57 void rs_block_match_init(rs_block_match_t *match, rs_signature_t *sig, rs_weak_sum_t weak_sum, const void *buf,
58  size_t len)
59 {
60  match->block_sig.weak_sum = weak_sum;
61  match->signature = sig;
62  match->buf = buf;
63  match->len = len;
64 }
65 
66 int rs_block_match_cmp(rs_block_match_t *match, const rs_block_sig_t *block_sig)
67 {
68  /* If buf is not NULL, the strong sum is yet to be calculated. */
69  if (match->buf) {
70 #ifndef HASHTABLE_NSTATS
71  match->signature->calc_strong_count++;
72 #endif
73  rs_signature_calc_strong_sum(match->signature, match->buf, match->len, &(match->block_sig.strong_sum));
74  match->buf = NULL;
75  }
76  return memcmp(&match->block_sig.strong_sum, &block_sig->strong_sum, match->signature->strong_sum_len);
77 }
78 
79 /* Get the size of a packed rs_block_sig_t. */
80 static inline size_t rs_block_sig_size(const rs_signature_t *sig)
81 {
82  return offsetof(rs_block_sig_t, strong_sum) + sig->strong_sum_len;
83 }
84 
85 /* Get the pointer to the block_sig_t from a block index. */
86 static inline rs_block_sig_t *rs_block_sig_ptr(const rs_signature_t *sig, int block_idx)
87 {
88  return sig->block_sigs + block_idx * rs_block_sig_size(sig);
89 }
90 
91 /* Get the index of a block from a block_sig_t pointer. */
92 static inline int rs_block_sig_idx(const rs_signature_t *sig, rs_block_sig_t *block_sig)
93 {
94  return ((void *)block_sig - sig->block_sigs) / rs_block_sig_size(sig);
95 }
96 
97 rs_result rs_signature_init(rs_signature_t *sig, int magic, int block_len, int strong_len, rs_long_t sig_fsize)
98 {
99  int max_strong_len;
100 
101  /* Check and set default arguments. */
102  magic = magic ? magic : RS_BLAKE2_SIG_MAGIC;
103  switch (magic) {
104  case RS_BLAKE2_SIG_MAGIC:
105  max_strong_len = RS_BLAKE2_SUM_LENGTH;
106  break;
107  case RS_MD4_SIG_MAGIC:
108  max_strong_len = RS_MD4_SUM_LENGTH;
109  break;
110  default:
111  rs_error("invalid magic %#x", magic);
112  return RS_BAD_MAGIC;
113  }
114  strong_len = strong_len ? strong_len : max_strong_len;
115  if (strong_len < 1 || max_strong_len < strong_len) {
116  rs_error("invalid strong_sum_len %d for magic %#x", strong_len, magic);
117  return RS_PARAM_ERROR;
118  }
119  /* Set attributes from args. */
120  sig->magic = magic;
121  sig->block_len = block_len;
122  sig->strong_sum_len = strong_len;
123  sig->count = 0;
124  /* Calculate the number of blocks if we have the signature file size. */
125  /* Magic+header is 12 bytes, each block thereafter is 4 bytes weak_sum+strong_sum_len bytes */
126  sig->size = (int)(sig_fsize ? (sig_fsize - 12) / (4 + strong_len) : 0);
127  if (sig->size)
128  sig->block_sigs = rs_alloc(sig->size * rs_block_sig_size(sig), "signature->block_sigs");
129  else
130  sig->block_sigs = NULL;
131  sig->hashtable = NULL;
132 #ifndef HASHTABLE_NSTATS
133  sig->calc_strong_count = 0;
134 #endif
135  rs_signature_check(sig);
136  return RS_DONE;
137 }
138 
139 void rs_signature_done(rs_signature_t *sig)
140 {
141  hashtable_free(sig->hashtable);
142  rs_bzero(sig, sizeof(*sig));
143 }
144 
145 rs_block_sig_t *rs_signature_add_block(rs_signature_t *sig, rs_weak_sum_t weak_sum, rs_strong_sum_t *strong_sum)
146 {
147  rs_signature_check(sig);
148  /* If block_sigs is full, allocate more space. */
149  if (sig->count == sig->size) {
150  sig->size = sig->size ? sig->size * 2 : 16;
151  sig->block_sigs = rs_realloc(sig->block_sigs, sig->size * rs_block_sig_size(sig), "signature->block_sigs");
152  }
153  rs_block_sig_t *b = rs_block_sig_ptr(sig, sig->count++);
154  rs_block_sig_init(b, weak_sum, strong_sum, sig->strong_sum_len);
155  return b;
156 }
157 
158 rs_long_t rs_signature_find_match(rs_signature_t *sig, rs_weak_sum_t weak_sum, void const *buf, size_t len)
159 {
161  rs_block_sig_t *b;
162 
163  rs_signature_check(sig);
164  rs_block_match_init(&m, sig, weak_sum, buf, len);
165  if ((b = hashtable_find(sig->hashtable, &m))) {
166  return (rs_long_t)rs_block_sig_idx(sig, b) * sig->block_len;
167  }
168  return -1;
169 }
170 
171 void rs_signature_log_stats(rs_signature_t const *sig)
172 {
173 #ifndef HASHTABLE_NSTATS
174  hashtable_t *t = sig->hashtable;
175 
176  rs_log(RS_LOG_INFO|RS_LOG_NONAME,
177  "match statistics: signature[%ld searches, %ld (%.3f%%) matches, "
178  "%ld (%.3fx) weak sum compares, %ld (%.3f%%) strong sum compares, "
179  "%ld (%.3f%%) strong sum calcs]",
180  t->find_count,
181  t->match_count, 100.0 * (double)t->match_count / t->find_count,
182  t->hashcmp_count, (double)t->hashcmp_count / t->find_count,
183  t->entrycmp_count, 100.0 * (double)t->entrycmp_count / t->find_count,
184  sig->calc_strong_count, 100.0 * (double)sig->calc_strong_count / t->find_count);
185 #endif
186 }
187 
189 {
190  int i;
191 
192  rs_signature_check(sig);
193  sig->hashtable = hashtable_new(sig->count, (hash_f)&rs_block_sig_hash, (cmp_f)&rs_block_match_cmp);
194  if (!sig->hashtable)
195  return RS_MEM_ERROR;
196  for (i = 0; i < sig->count; i++)
197  hashtable_add(sig->hashtable, rs_block_sig_ptr(sig, i));
198  return RS_DONE;
199 }
200 
202 {
203  rs_signature_done(psums);
204  free(psums);
205 }
206 
208 {
209  int i;
210  rs_block_sig_t *b;
211  char strong_hex[RS_MAX_STRONG_SUM_LENGTH * 3];
212 
213  rs_log(RS_LOG_INFO, "sumset info: magic=%x, block_len=%d, block_num=%d", sums->magic, sums->block_len, sums->count);
214 
215  for (i = 0; i < sums->count; i++) {
216  b = rs_block_sig_ptr(sums, i);
217  rs_hexify(strong_hex, b->strong_sum, sums->strong_sum_len);
218  rs_log(RS_LOG_INFO, "sum %6d: weak=%08x, strong=%s", i, b->weak_sum, strong_hex);
219  }
220 }
int size
Total number of blocks allocated.
Definition: sumset.h:44
hashtable_t * hashtable
The hashtable for finding matches.
Definition: sumset.h:46
rs_result rs_build_hash_table(rs_signature_t *sums)
Call this after loading a signature to index it.
Definition: sumset.c:188
int count
Total number of blocks.
Definition: sumset.h:43
int block_len
The block length.
Definition: sumset.h:41
A signature file using the BLAKE2 hash.
Definition: librsync.h:107
Bad value passed in to library, probably an application bug.
Definition: librsync.h:217
Out of memory.
Definition: librsync.h:205
Bad magic number at start of stream.
Definition: librsync.h:209
void rs_hexify(char *to_buf, void const *from_buf, int from_len)
Convert from_len bytes at from_buf into a hex representation in to_buf, which must be twice as long p...
Definition: hex.c:34
long long rs_long_t
A long integer type that can handle the largest file offsets.
The hashtable type.
Definition: hashtable.h:123
rs_weak_sum_t weak_sum
Block&#39;s weak checksum.
Definition: sumset.h:29
int strong_sum_len
The block strong sum length.
Definition: sumset.h:42
void rs_free_sumset(rs_signature_t *)
Deep deallocation of checksums.
Definition: sumset.c:201
A signature file with MD4 signatures.
Definition: librsync.h:98
void * block_sigs
The packed block_sigs for all blocks.
Definition: sumset.h:45
Public header for librsync.
Signature of a whole file.
Definition: sumset.h:39
Informational.
Definition: librsync.h:126
rs_result
Return codes from nonblocking rsync operations.
Definition: librsync.h:193
int magic
The signature magic value.
Definition: sumset.h:40
long calc_strong_count
The count of strongsum calcs done.
Definition: sumset.h:49
void rs_sumset_dump(rs_signature_t const *)
Dump signatures to the log.
Definition: sumset.c:207
Signature of a single block.
Definition: sumset.h:28
Completed successfully.
Definition: librsync.h:194
rs_strong_sum_t strong_sum
Block&#39;s strong checksum.
Definition: sumset.h:30