librsync  2.3.0
scoop.c
Go to the documentation of this file.
1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2  *
3  * librsync -- the library for network deltas
4  *
5  * Copyright (C) 2000, 2001 by Martin Pool <mbp@sourcefrog.net>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public License
9  * as published by the Free Software Foundation; either version 2.1 of
10  * the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21 
22  /*=
23  | To walk on water you've gotta sink
24  | in the ice.
25  | -- Shihad, `The General Electric'.
26  */
27 
28 /** \file scoop.c
29  * This file deals with readahead from caller-supplied buffers.
30  *
31  * Many functions require a certain minimum amount of input to do their
32  * processing. For example, to calculate a strong checksum of a block we need
33  * at least a block of input.
34  *
35  * Since we put the buffers completely under the control of the caller, we
36  * can't count on ever getting this much data all in one go. We can't simply
37  * wait, because the caller might have a smaller buffer than we require and so
38  * we'll never get it. For the same reason we must always accept all the data
39  * we're given.
40  *
41  * So, stream input data that's required for readahead is put into a special
42  * buffer, from which the caller can then read. It's essentially like an
43  * internal pipe, which on any given read request may or may not be able to
44  * actually supply the data.
45  *
46  * As a future optimization, we might try to take data directly from the input
47  * buffer if there's already enough there.
48  *
49  * \todo We probably know a maximum amount of data that can be scooped up, so
50  * we could just avoid dynamic allocation. However that can't be fixed at
51  * compile time, because when generating a delta it needs to be large enough to
52  * hold one full block. Perhaps we can set it up when the job is allocated? It
53  * would be kind of nice to not do any memory allocation after startup, as
54  * bzlib does this. */
55 
56 #include "config.h"
57 #include <assert.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include "librsync.h"
61 #include "job.h"
62 #include "stream.h"
63 #include "trace.h"
64 #include "util.h"
65 
66 /** Try to accept a from the input buffer to get LEN bytes in the scoop. */
67 void rs_scoop_input(rs_job_t *job, size_t len)
68 {
69  rs_buffers_t *stream = job->stream;
70  size_t tocopy;
71 
72  assert(len > job->scoop_avail);
73 
74  if (job->scoop_alloc < len) {
75  /* Need to allocate a larger scoop. */
76  rs_byte_t *newbuf;
77  size_t newsize;
78  for (newsize = 64; newsize < len; newsize <<= 1) ;
79  newbuf = rs_alloc(newsize, "scoop buffer");
80  if (job->scoop_avail)
81  memcpy(newbuf, job->scoop_next, job->scoop_avail);
82  if (job->scoop_buf)
83  free(job->scoop_buf);
84  job->scoop_buf = job->scoop_next = newbuf;
85  rs_trace("resized scoop buffer to " FMT_SIZE " bytes from " FMT_SIZE "",
86  newsize, job->scoop_alloc);
87  job->scoop_alloc = newsize;
88  } else if (job->scoop_buf != job->scoop_next) {
89  /* Move existing data to the front of the scoop. */
90  rs_trace("moving scoop " FMT_SIZE " bytes to reuse " FMT_SIZE " bytes",
91  job->scoop_avail, (size_t)(job->scoop_next - job->scoop_buf));
92  memmove(job->scoop_buf, job->scoop_next, job->scoop_avail);
93  job->scoop_next = job->scoop_buf;
94  }
95  /* take as much input as is available, to give up to LEN bytes in the
96  scoop. */
97  tocopy = len - job->scoop_avail;
98  if (tocopy > stream->avail_in)
99  tocopy = stream->avail_in;
100  assert(tocopy + job->scoop_avail <= job->scoop_alloc);
101 
102  memcpy(job->scoop_next + job->scoop_avail, stream->next_in, tocopy);
103  rs_trace("accepted " FMT_SIZE " bytes from input to scoop", tocopy);
104  job->scoop_avail += tocopy;
105  stream->next_in += tocopy;
106  stream->avail_in -= tocopy;
107 }
108 
109 /** Advance the input cursor forward \p len bytes.
110  *
111  * This is used after doing readahead, when you decide you want to keep it. \p
112  * len must be no more than the amount of available data, so you can't cheat.
113  *
114  * So when creating a delta, we require one block of readahead. But after
115  * examining that block, we might decide to advance over all of it (if there is
116  * a match), or just one byte (if not). */
117 void rs_scoop_advance(rs_job_t *job, size_t len)
118 {
119  rs_buffers_t *stream = job->stream;
120 
121  /* It never makes sense to advance over a mixture of bytes from the scoop
122  and input, because you couldn't possibly have looked at them all at the
123  same time. */
124  if (job->scoop_avail) {
125  /* reading from the scoop buffer */
126  rs_trace("advance over " FMT_SIZE " bytes from scoop", len);
127  assert(len <= job->scoop_avail);
128  job->scoop_avail -= len;
129  job->scoop_next += len;
130  } else {
131  rs_trace("advance over " FMT_SIZE " bytes from input buffer", len);
132  assert(len <= stream->avail_in);
133  stream->avail_in -= len;
134  stream->next_in += len;
135  }
136 }
137 
138 /** Read from scoop without advancing.
139  *
140  * Ask for LEN bytes of input from the stream. If that much data is available,
141  * then return a pointer to it in PTR, advance the stream input pointer over
142  * the data, and return RS_DONE. If there's not enough data, then accept
143  * whatever is there into a buffer, advance over it, and return RS_BLOCKED.
144  *
145  * The data is not actually removed from the input, so this function lets you
146  * do readahead. If you want to keep any of the data, you should also call
147  * rs_scoop_advance() to skip over it. */
148 rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
149 {
150  rs_buffers_t *stream = job->stream;
151  rs_job_check(job);
152 
153  if (!job->scoop_avail && stream->avail_in >= len) {
154  /* The scoop is empty and there's enough data in the input. */
155  *ptr = stream->next_in;
156  rs_trace("got " FMT_SIZE " bytes direct from input", len);
157  return RS_DONE;
158  } else if (job->scoop_avail < len && stream->avail_in) {
159  /* There is not enough data in the scoop. */
160  rs_trace("scoop has less than " FMT_SIZE " bytes, scooping from "
161  FMT_SIZE " input bytes", len, stream->avail_in);
162  rs_scoop_input(job, len);
163  }
164  if (job->scoop_avail >= len) {
165  /* There is enough data in the scoop now. */
166  rs_trace("scoop has at least " FMT_SIZE " bytes, this is enough",
167  job->scoop_avail);
168  *ptr = job->scoop_next;
169  return RS_DONE;
170  } else if (stream->eof_in) {
171  /* Not enough input data and at EOF. */
172  rs_trace("reached end of input stream");
173  return RS_INPUT_ENDED;
174  } else {
175  /* Not enough input data yet. */
176  rs_trace("blocked with insufficient input data");
177  return RS_BLOCKED;
178  }
179 }
180 
181 /** Read LEN bytes if possible, and remove them from the input scoop.
182  *
183  * \param *job An rs_job_t pointer to the job instance.
184  *
185  * \param len The length of the data in the ptr buffer.
186  *
187  * \param **ptr will be updated to point to a read-only buffer holding the
188  * data, if enough is available.
189  *
190  * \return RS_DONE if there was enough data, RS_BLOCKED if there was not enough
191  * data yet, or RS_INPUT_ENDED if there was not enough data and at EOF. */
192 rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
193 {
194  rs_result result;
195 
196  result = rs_scoop_readahead(job, len, ptr);
197  if (result == RS_DONE)
198  rs_scoop_advance(job, len);
199  return result;
200 }
201 
202 /** Read whatever data remains in the input stream.
203  *
204  * \param *job The rs_job_t instance the job instance.
205  *
206  * \param *len will be updated to the length of the available data.
207  *
208  * \param **ptr will point at the available data.
209  *
210  * \return RS_DONE if there was data, RS_INPUT_ENDED if there was no data and
211  * at EOF, RS_BLOCKED if there was no data and not at EOF. */
212 rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
213 {
214  rs_buffers_t *stream = job->stream;
215 
216  *len = job->scoop_avail + stream->avail_in;
217  if (*len)
218  return rs_scoop_read(job, *len, ptr);
219  else if (stream->eof_in)
220  return RS_INPUT_ENDED;
221  else
222  return RS_BLOCKED;
223 }
224 
225 /** Return the total number of bytes available including the scoop and input
226  * buffer. */
228 {
229  return job->scoop_avail + job->stream->avail_in;
230 }
rs_scoop_input
void rs_scoop_input(rs_job_t *job, size_t len)
Try to accept a from the input buffer to get LEN bytes in the scoop.
Definition: scoop.c:67
rs_buffers_s::avail_in
size_t avail_in
Number of bytes available at next_in.
Definition: librsync.h:336
rs_result
rs_result
Return codes from nonblocking rsync operations.
Definition: librsync.h:180
trace.h
RS_DONE
@ RS_DONE
Completed successfully.
Definition: librsync.h:181
librsync.h
rs_job
The contents of this structure are private.
Definition: job.h:26
RS_INPUT_ENDED
@ RS_INPUT_ENDED
Unexpected end of input file, perhaps due to a truncated file or dropped network connection.
Definition: librsync.h:190
rs_buffers_s::next_in
char * next_in
Next input byte.
Definition: librsync.h:328
rs_scoop_read_rest
rs_result rs_scoop_read_rest(rs_job_t *job, size_t *len, void **ptr)
Read whatever data remains in the input stream.
Definition: scoop.c:212
rs_buffers_s::eof_in
int eof_in
True if there is no more data after this.
Definition: librsync.h:339
RS_BLOCKED
@ RS_BLOCKED
Blocked waiting for more data.
Definition: librsync.h:182
rs_job::scoop_buf
rs_byte_t * scoop_buf
Buffer of data in the scoop.
Definition: job.h:77
rs_scoop_readahead
rs_result rs_scoop_readahead(rs_job_t *job, size_t len, void **ptr)
Read from scoop without advancing.
Definition: scoop.c:148
rs_scoop_total_avail
size_t rs_scoop_total_avail(rs_job_t *job)
Return the total number of bytes available including the scoop and input buffer.
Definition: scoop.c:227
rs_scoop_read
rs_result rs_scoop_read(rs_job_t *job, size_t len, void **ptr)
Read LEN bytes if possible, and remove them from the input scoop.
Definition: scoop.c:192
rs_scoop_advance
void rs_scoop_advance(rs_job_t *job, size_t len)
Advance the input cursor forward len bytes.
Definition: scoop.c:117
rs_buffers_s
Description of input and output buffers.
Definition: librsync.h:322