librsync  2.0.1
scoop.c
1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2  *
3  * librsync -- the library for network deltas
4  *
5  * Copyright (C) 2000, 2001 by Martin Pool <mbp@sourcefrog.net>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public License
9  * as published by the Free Software Foundation; either version 2.1 of
10  * the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21 
22 /*
23  * scoop.c -- This file deals with readahead from caller-supplied
24  * buffers.
25  *
26  * Many functions require a certain minimum amount of input to do their
27  * processing. For example, to calculate a strong checksum of a block
28  * we need at least a block of input.
29  *
30  * Since we put the buffers completely under the control of the caller,
31  * we can't count on ever getting this much data all in one go. We
32  * can't simply wait, because the caller might have a smaller buffer
33  * than we require and so we'll never get it. For the same reason we
34  * must always accept all the data we're given.
35  *
36  * So, stream input data that's required for readahead is put into a
37  * special buffer, from which the caller can then read. It's
38  * essentially like an internal pipe, which on any given read request
39  * may or may not be able to actually supply the data.
40  *
41  * As a future optimization, we might try to take data directly from the
42  * input buffer if there's already enough there.
43  */
44 
45 /*
46  * TODO: We probably know a maximum amount of data that can be scooped
47  * up, so we could just avoid dynamic allocation. However that can't
48  * be fixed at compile time, because when generating a delta it needs
49  * to be large enough to hold one full block. Perhaps we can set it
50  * up when the job is allocated? It would be kind of nice to not do
51  * any memory allocation after startup, as bzlib does this.
52  */
53 
54 
55  /*
56  | To walk on water you've gotta sink
57  | in the ice.
58  | -- Shihad, `The General Electric'.
59  */
60 
61 #include "config.h"
62 
63 #include <assert.h>
64 #include <stdlib.h>
65 #include <stdio.h>
66 #include <string.h>
67 
68 #include "librsync.h"
69 #include "job.h"
70 #include "stream.h"
71 #include "trace.h"
72 #include "util.h"
73 
74 
75 /**
76  * Try to accept a from the input buffer to get LEN bytes in the scoop.
77  */
78 void rs_scoop_input(rs_job_t *job, size_t len)
79 {
80  rs_buffers_t *stream = job->stream;
81  size_t tocopy;
82 
83  assert(len > job->scoop_avail);
84 
85  if (job->scoop_alloc < len) {
86  /* need to allocate a new buffer, too */
87  rs_byte_t *newbuf;
88  int newsize = 2 * len;
89  newbuf = rs_alloc(newsize, "scoop buffer");
90  if (job->scoop_avail)
91  memcpy(newbuf, job->scoop_next, job->scoop_avail);
92  if (job->scoop_buf)
93  free(job->scoop_buf);
94  job->scoop_buf = job->scoop_next = newbuf;
95  rs_trace("resized scoop buffer to " PRINTF_FORMAT_U64 " bytes from " PRINTF_FORMAT_U64 "",
96  PRINTF_CAST_U64(newsize), PRINTF_CAST_U64(job->scoop_alloc));
97  job->scoop_alloc = newsize;
98  } else {
99  /* this buffer size is fine, but move the existing
100  * data down to the front. */
101  memmove(job->scoop_buf, job->scoop_next, job->scoop_avail);
102  job->scoop_next = job->scoop_buf;
103  }
104 
105  /* take as much input as is available, to give up to LEN bytes
106  * in the scoop. */
107  tocopy = len - job->scoop_avail;
108  if (tocopy > stream->avail_in)
109  tocopy = stream->avail_in;
110  assert(tocopy + job->scoop_avail <= job->scoop_alloc);
111 
112  memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy);
113  rs_trace("accepted " PRINTF_FORMAT_U64 " bytes from input to scoop", PRINTF_CAST_U64(tocopy));
114  job->scoop_avail += tocopy;
115  stream->next_in += tocopy;
116  stream->avail_in -= tocopy;
117 }
118 
119 
120 /**
121  * Advance the input cursor forward \p len bytes. This is used after
122  * doing readahead, when you decide you want to keep it. \p len must
123  * be no more than the amount of available data, so you can't cheat.
124  *
125  * So when creating a delta, we require one block of readahead. But
126  * after examining that block, we might decide to advance over all of
127  * it (if there is a match), or just one byte (if not).
128  */
129 void rs_scoop_advance(rs_job_t *job, size_t len)
130 {
131  rs_buffers_t *stream = job->stream;
132 
133  /* It never makes sense to advance over a mixture of bytes from
134  * the scoop and input, because you couldn't possibly have looked
135  * at them all at the same time. */
136  if (job->scoop_avail) {
137  /* reading from the scoop buffer */
138  rs_trace("advance over %ld bytes from scoop", (long) len);
139  assert(len <= job->scoop_avail);
140  job->scoop_avail -= len;
141  job->scoop_next += len;
142  } else {
143  rs_trace("advance over %ld bytes from input buffer", (long) len);
144  assert(len <= stream->avail_in);
145  stream->avail_in -= len;
146  stream->next_in += len;
147  }
148 }
149 
150 
151 
152 /**
153  * \brief Read from scoop without advancing.
154  *
155  * Ask for LEN bytes of input from the stream. If that much data is
156  * available, then return a pointer to it in PTR, advance the stream
157  * input pointer over the data, and return RS_DONE. If there's not
158  * enough data, then accept whatever is there into a buffer, advance
159  * over it, and return RS_BLOCKED.
160  *
161  * The data is not actually removed from the input, so this function
162  * lets you do readahead. If you want to keep any of the data, you
163  * should also call rs_scoop_advance() to skip over it.
164  */
165 rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
166 {
167  rs_buffers_t *stream = job->stream;
168  rs_job_check(job);
169 
170  if (job->scoop_avail >= len) {
171  /* We have enough data queued to satisfy the request,
172  * so go straight from the scoop buffer. */
173  rs_trace("got " PRINTF_FORMAT_U64 " bytes direct from scoop", PRINTF_CAST_U64(len));
174  *ptr = job->scoop_next;
175  return RS_DONE;
176  } else if (job->scoop_avail) {
177  /* We have some data in the scoop, but not enough to
178  * satisfy the request. */
179  rs_trace("data is present in the scoop and must be used");
180  rs_scoop_input(job, len);
181 
182  if (job->scoop_avail < len) {
183  rs_trace("still have only " PRINTF_FORMAT_U64 " bytes in scoop",
184  PRINTF_CAST_U64(job->scoop_avail));
185  return RS_BLOCKED;
186  } else {
187  rs_trace("scoop now has " PRINTF_FORMAT_U64 " bytes, this is enough",
188  PRINTF_CAST_U64(job->scoop_avail));
189  *ptr = job->scoop_next;
190  return RS_DONE;
191  }
192  } else if (stream->avail_in >= len) {
193  /* There's enough data in the stream's input */
194  *ptr = stream->next_in;
195  rs_trace("got " PRINTF_FORMAT_U64 " bytes from input buffer", PRINTF_CAST_U64(len));
196  return RS_DONE;
197  } else if (stream->avail_in > 0) {
198  /* Nothing was queued before, but we don't have enough
199  * data to satisfy the request. So queue what little
200  * we have, and try again next time. */
201  rs_trace("couldn't satisfy request for " PRINTF_FORMAT_U64 ", scooping " PRINTF_FORMAT_U64 " bytes",
202  PRINTF_CAST_U64(len), PRINTF_CAST_U64(job->scoop_avail));
203  rs_scoop_input(job, len);
204  return RS_BLOCKED;
205  } else if (stream->eof_in) {
206  /* Nothing is queued before, and nothing is in the input
207  * buffer at the moment. */
208  rs_trace("reached end of input stream");
209  return RS_INPUT_ENDED;
210  } else {
211  /* Nothing queued at the moment. */
212  rs_trace("blocked with no data in scoop or input buffer");
213  return RS_BLOCKED;
214  }
215 }
216 
217 
218 
219 /**
220  * Read LEN bytes if possible, and remove them from the input scoop.
221  * If there's not enough data yet, return RS_BLOCKED.
222  *
223  * \param ptr will be updated to point to a read-only buffer holding
224  * the data, if enough is available.
225  *
226  * \return RS_DONE if all the data was available, RS_BLOCKED if it's
227  * not there.
228  */
229 rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
230 {
231  rs_result result;
232 
233  result = rs_scoop_readahead(job, len, ptr);
234  if (result == RS_DONE)
235  rs_scoop_advance(job, len);
236 
237  return result;
238 }
239 
240 
241 
242 /*
243  * Read whatever remains in the input stream, assuming that it runs up
244  * to the end of the file. Set LEN appropriately.
245  */
246 rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
247 {
248  rs_buffers_t *stream = job->stream;
249 
250  *len = job->scoop_avail + stream->avail_in;
251 
252  return rs_scoop_read(job, *len, ptr);
253 }
254 
255 
256 
257 /**
258  * Return the total number of bytes available including the scoop and input
259  * buffer.
260  */
261 size_t rs_scoop_total_avail(rs_job_t *job)
262 {
263  return job->scoop_avail + job->stream->avail_in;
264 }
Description of input and output buffers.
Definition: librsync.h:361
size_t avail_in
Number of bytes available at next_in References the length of available input.
Definition: librsync.h:376
Public header for librsync.
char * next_in
Next input byte.
Definition: librsync.h:367
rs_result
Return codes from nonblocking rsync operations.
Definition: librsync.h:193
Unexpected end of input file, perhaps due to a truncated file or dropped network connection.
Definition: librsync.h:208
Blocked waiting for more data.
Definition: librsync.h:195
rs_byte_t * scoop_buf
Buffer of data in the scoop.
Definition: job.h:82
int eof_in
True if there is no more data after this.
Definition: librsync.h:381
Completed successfully.
Definition: librsync.h:194
The contents of this structure are private.
Definition: job.h:29