1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-04-19 18:26:32 +02:00

Initial revision

This commit is contained in:
Jim Meyering
1992-11-08 02:50:43 +00:00
parent f33e06711c
commit b25038ce9a
31 changed files with 22940 additions and 0 deletions

339
COPYING Normal file
View File

@@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
675 Mass Ave, Cambridge, MA 02139, USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
Appendix: How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) 19yy <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) 19yy name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.

19
lib/bcopy.c Normal file
View File

@@ -0,0 +1,19 @@
/* bcopy.c -- copy memory.
Copy LENGTH bytes from SOURCE to DEST. Does not null-terminate.
In the public domain.
By David MacKenzie <djm@gnu.ai.mit.edu>. */
void
bcopy (source, dest, length)
char *source, *dest;
unsigned length;
{
if (source < dest)
/* Moving from low mem to hi mem; start at end. */
for (source += length, dest += length; length; --length)
*--dest = *--source;
else if (source != dest)
/* Moving from hi mem to low mem; start at beginning. */
for (; length; --length)
*dest++ = *source++;
}

91
lib/linebuffer.c Normal file
View File

@@ -0,0 +1,91 @@
/* linebuffer.c -- read arbitrarily long lines
Copyright (C) 1986, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Richard Stallman. */
#include <stdio.h>
#include "linebuffer.h"
char *xmalloc ();
char *xrealloc ();
void free ();
/* Initialize linebuffer LINEBUFFER for use. */
void
initbuffer (linebuffer)
struct linebuffer *linebuffer;
{
linebuffer->length = 0;
linebuffer->size = 200;
linebuffer->buffer = (char *) xmalloc (linebuffer->size);
}
/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
Remove any newline. Does not null terminate.
Return LINEBUFFER, except at end of file return 0. */
struct linebuffer *
readline (linebuffer, stream)
struct linebuffer *linebuffer;
FILE *stream;
{
int c;
char *buffer = linebuffer->buffer;
char *p = linebuffer->buffer;
char *end = buffer + linebuffer->size; /* Sentinel. */
if (feof (stream))
{
linebuffer->length = 0;
return 0;
}
while (1)
{
c = getc (stream);
if (p == end)
{
linebuffer->size *= 2;
buffer = (char *) xrealloc (buffer, linebuffer->size);
p += buffer - linebuffer->buffer;
linebuffer->buffer = buffer;
end = buffer + linebuffer->size;
}
if (c == EOF || c == '\n')
break;
*p++ = c;
}
if (feof (stream) && p == buffer)
{
linebuffer->length = 0;
return 0;
}
linebuffer->length = p - linebuffer->buffer;
return linebuffer;
}
/* Free linebuffer LINEBUFFER and its data, all allocated with malloc. */
void
freebuffer (linebuffer)
struct linebuffer *linebuffer;
{
free (linebuffer->buffer);
free (linebuffer);
}

42
lib/linebuffer.h Normal file
View File

@@ -0,0 +1,42 @@
/* linebuffer.h -- declarations for reading arbitrarily long lines
Copyright (C) 1986, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* A `struct linebuffer' holds a line of text. */
struct linebuffer
{
long size; /* Allocated. */
long length; /* Used. */
char *buffer;
};
#ifdef __STDC__
/* Initialize linebuffer LINEBUFFER for use. */
void initbuffer (struct linebuffer *linebuffer);
/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
Remove any newline. Does not null terminate.
Return LINEBUFFER, except at end of file return 0. */
struct linebuffer *readline (struct linebuffer *linebuffer, FILE *stream);
/* Free linebuffer LINEBUFFER and its data, all allocated with malloc. */
void freebuffer (struct linebuffer *);
#else
void initbuffer ();
struct linebuffer *readline ();
void freebuffer ();
#endif

145
lib/memchr.c Normal file
View File

@@ -0,0 +1,145 @@
/* Copyright (C) 1991 Free Software Foundation, Inc.
Based on strlen implemention by Torbjorn Granlund (tege@sics.se),
with help from Dan Sahlin (dan@sics.se) and
commentary by Jim Blandy (jimb@ai.mit.edu);
adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
and implemented by Roland McGrath (roland@ai.mit.edu).
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
/* Search no more than N bytes of S for C. */
char *
memchr(s, c, n)
unsigned char * s ;
int c ;
unsigned n;
{
unsigned char *char_ptr;
unsigned long int *longword_ptr;
unsigned long int longword, magic_bits, charmask;
c = (unsigned char) c;
/* Handle the first few characters by reading one character at a time.
Do this until CHAR_PTR is aligned on a 4-byte border. */
for (char_ptr = s; n > 0 && ((unsigned long int) char_ptr & 3) != 0;
--n, ++char_ptr)
if (*char_ptr == c)
return (char *) char_ptr;
longword_ptr = (unsigned long int *) char_ptr;
/* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
the "holes." Note that there is a hole just to the left of
each byte, with an extra at the end:
bits: 01111110 11111110 11111110 11111111
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
The 1-bits make sure that carries propagate to the next 0-bit.
The 0-bits provide holes for carries to fall into. */
magic_bits = 0x7efefeff;
/* Set up a longword, each of whose bytes is C. */
charmask = c | (c << 8);
charmask |= charmask << 16;
/* Instead of the traditional loop which tests each character,
we will test a longword at a time. The tricky part is testing
if *any of the four* bytes in the longword in question are zero. */
while (n >= 4)
{
/* We tentatively exit the loop if adding MAGIC_BITS to
LONGWORD fails to change any of the hole bits of LONGWORD.
1) Is this safe? Will it catch all the zero bytes?
Suppose there is a byte with all zeros. Any carry bits
propagating from its left will fall into the hole at its
least significant bit and stop. Since there will be no
carry from its most significant bit, the LSB of the
byte to the left will be unchanged, and the zero will be
detected.
2) Is this worthwhile? Will it ignore everything except
zero bytes? Suppose every byte of LONGWORD has a bit set
somewhere. There will be a carry into bit 8. If bit 8
is set, this will carry into bit 16. If bit 8 is clear,
one of bits 9-15 must be set, so there will be a carry
into bit 16. Similarly, there will be a carry into bit
24. If one of bits 24-30 is set, there will be a carry
into bit 31, so all of the hole bits will be changed.
The one misfire occurs when bits 24-30 are clear and bit
31 is set; in this case, the hole at bit 31 is not
changed. If we had access to the processor carry flag,
we could close this loophole by putting the fourth hole
at bit 32!
So it ignores everything except 128's, when they're aligned
properly.
3) But wait! Aren't we looking for C, not zero?
Good point. So what we do is XOR LONGWORD with a longword,
each of whose bytes is C. This turns each byte that is C
into a zero. */
longword = *longword_ptr++ ^ charmask;
/* Add MAGIC_BITS to LONGWORD. */
if ((((longword + magic_bits)
/* Set those bits that were unchanged by the addition. */
^ ~longword)
/* Look at only the hole bits. If any of the hole bits
are unchanged, most likely one of the bytes was a
zero. */
& ~magic_bits) != 0)
{
/* Which of the bytes was C? If none of them were, it was
a misfire; continue the search. */
unsigned char *cp = ( unsigned char *) (longword_ptr - 1);
if (cp[0] == c)
return (char *) cp;
if (cp[1] == c)
return (char *) &cp[1];
if (cp[2] == c)
return (char *) &cp[2];
if (cp[3] == c)
return (char *) &cp[3];
}
n -= 4;
}
char_ptr = ( unsigned char *) longword_ptr;
while (n-- > 0)
{
if (*char_ptr == c)
return (char *) char_ptr;
else
++char_ptr;
}
return 0;
}

29
lib/memset.c Normal file
View File

@@ -0,0 +1,29 @@
/* memset.c -- set an area of memory to a given value
Copyright (C) 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
char *
memset (str, c, len)
char *str;
int c;
unsigned len;
{
register char *st = str;
while (len-- > 0)
*st++ = c;
return str;
}

4870
lib/regex.c Normal file

File diff suppressed because it is too large Load Diff

481
lib/regex.h Normal file
View File

@@ -0,0 +1,481 @@
/* Definitions for data structures and routines for the regular
expression library, version 0.11.
Copyright (C) 1985, 89, 90, 91, 92 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifndef __REGEXP_LIBRARY_H__
#define __REGEXP_LIBRARY_H__
/* POSIX says that <sys/types.h> must be included before <regex.h>. */
/* The following bits are used to determine the regexp syntax we
recognize. The set/not-set meanings are chosen so that Emacs syntax
remains the value 0. The bits are given in alphabetical order, and
the definitions shifted by one from the previous bit; thus, when we
add or remove a bit, only one other definition need change. */
typedef unsigned reg_syntax_t;
/* If this bit is not set, then \ inside a bracket expression is literal.
If set, then such a \ quotes the following character. */
#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
/* If this bit is not set, then + and ? are operators, and \+ and \? are
literals.
If set, then \+ and \? are operators and + and ? are literals. */
#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
/* If this bit is set, then character classes are supported. They are:
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
If not set, then character classes are not supported. */
#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
/* If this bit is set, then ^ and $ are always anchors (outside bracket
expressions, of course).
If this bit is not set, then it depends:
^ is an anchor if it is at the beginning of a regular
expression or after an open-group or an alternation operator;
$ is an anchor if it is at the end of a regular expression, or
before a close-group or an alternation operator.
This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
POSIX draft 11.2 says that * etc. in leading positions is undefined.
We already implemented a previous draft which made those constructs
invalid, though, so we haven't changed the code back. */
#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
/* If this bit is set, then special characters are always special
regardless of where they are in the pattern.
If this bit is not set, then special characters are special only in
some contexts; otherwise they are ordinary. Specifically,
* + ? and intervals are only special when not after the beginning,
open-group, or alternation operator. */
#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
/* If this bit is set, then *, +, ?, and { cannot be first in an re or
immediately after an alternation or begin-group operator. */
#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
/* If this bit is set, then . matches newline.
If not set, then it doesn't. */
#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
/* If this bit is set, then . doesn't match NUL.
If not set, then it does. */
#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
/* If this bit is set, nonmatching lists [^...] do not match newline.
If not set, they do. */
#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
/* If this bit is set, either \{...\} or {...} defines an
interval, depending on RE_NO_BK_BRACES.
If not set, \{, \}, {, and } are literals. */
#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
/* If this bit is set, +, ? and | aren't recognized as operators.
If not set, they are. */
#define RE_LIMITED_OPS (RE_INTERVALS << 1)
/* If this bit is set, newline is an alternation operator.
If not set, newline is literal. */
#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
/* If this bit is set, then `{...}' defines an interval, and \{ and \}
are literals.
If not set, then `\{...\}' defines an interval. */
#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
/* If this bit is set, (...) defines a group, and \( and \) are literals.
If not set, \(...\) defines a group, and ( and ) are literals. */
#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
/* If this bit is set, then \<digit> matches <digit>.
If not set, then \<digit> is a back-reference. */
#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
/* If this bit is set, then | is an alternation operator, and \| is literal.
If not set, then \| is an alternation operator, and | is literal. */
#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
/* If this bit is set, then an ending range point collating higher
than the starting range point, as in [z-a], is invalid.
If not set, then when ending range point collates higher than the
starting range point, the range is ignored. */
#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
/* If this bit is set, then an unmatched ) is ordinary.
If not set, then an unmatched ) is invalid. */
#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
already-compiled regexps. */
extern reg_syntax_t re_syntax_options;
/* Define combinations of the above bits for the standard possibilities.
(The [[[ comments delimit what gets put into the Texinfo file, so
don't delete them!) */
/* [[[begin syntaxes]]] */
#define RE_SYNTAX_EMACS 0
#define RE_SYNTAX_AWK \
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VAR | RE_NO_EMPTY_RANGES \
| RE_UNMATCHED_RIGHT_PAREN_ORD)
#define RE_SYNTAX_POSIX_AWK \
(RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
#define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
| RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
| RE_NEWLINE_ALT)
#define RE_SYNTAX_EGREP \
(RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
| RE_NEWLINE_ALT | RE_NO_BK_PARENS \
| RE_NO_BK_VBAR)
#define RE_SYNTAX_POSIX_EGREP \
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
/* Syntax bits common to both basic and extended POSIX regex syntax. */
#define _RE_SYNTAX_POSIX_COMMON \
(RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
| RE_INTERVALS | RE_NO_EMPTY_RANGES)
#define RE_SYNTAX_POSIX_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
isn't minimal, since other operators, such as \`, aren't disabled. */
#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
#define RE_SYNTAX_POSIX_EXTENDED \
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
| RE_NO_BK_PARENS | RE_NO_BK_VBAR \
| RE_UNMATCHED_RIGHT_PAREN_ORD)
/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
/* [[[end syntaxes]]] */
/* Maximum number of duplicates an interval can allow. Some systems
(erroneously) define this in other header files, but we want our
value, so remove any previous define. */
#ifdef RE_DUP_MAX
#undef RE_DUP_MAX
#endif
#define RE_DUP_MAX ((1 << 15) - 1)
/* POSIX `cflags' bits (i.e., information for `regcomp'). */
/* If this bit is set, then use extended regular expression syntax.
If not set, then use basic regular expression syntax. */
#define REG_EXTENDED 1
/* If this bit is set, then ignore case when matching.
If not set, then case is significant. */
#define REG_ICASE (REG_EXTENDED << 1)
/* If this bit is set, then anchors do not match at newline
characters in the string.
If not set, then anchors do match at newlines. */
#define REG_NEWLINE (REG_ICASE << 1)
/* If this bit is set, then report only success or fail in regexec.
If not set, then returns differ between not matching and errors. */
#define REG_NOSUB (REG_NEWLINE << 1)
/* POSIX `eflags' bits (i.e., information for regexec). */
/* If this bit is set, then the beginning-of-line operator doesn't match
the beginning of the string (presumably because it's not the
beginning of a line).
If not set, then the beginning-of-line operator does match the
beginning of the string. */
#define REG_NOTBOL 1
/* Like REG_NOTBOL, except for the end-of-line. */
#define REG_NOTEOL (1 << 1)
/* If any error codes are removed, changed, or added, update the
`re_error_msg' table in regex.c. */
typedef enum
{
REG_NOERROR = 0, /* Success. */
REG_NOMATCH, /* Didn't find a match (for regexec). */
/* POSIX regcomp return error codes. (In the order listed in the
standard.) */
REG_BADPAT, /* Invalid pattern. */
REG_ECOLLATE, /* Not implemented. */
REG_ECTYPE, /* Invalid character class name. */
REG_EESCAPE, /* Trailing backslash. */
REG_ESUBREG, /* Invalid back reference. */
REG_EBRACK, /* Unmatched left bracket. */
REG_EPAREN, /* Parenthesis imbalance. */
REG_EBRACE, /* Unmatched \{. */
REG_BADBR, /* Invalid contents of \{\}. */
REG_ERANGE, /* Invalid range end. */
REG_ESPACE, /* Ran out of memory. */
REG_BADRPT, /* No preceding re for repetition op. */
/* Error codes we've added. */
REG_EEND, /* Premature end. */
REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
} reg_errcode_t;
/* This data structure represents a compiled pattern. Before calling
the pattern compiler, the fields `buffer', `allocated', `fastmap',
`translate', and `no_sub' can be set. After the pattern has been
compiled, the `re_nsub' field is available. All other fields are
private to the regex routines. */
struct re_pattern_buffer
{
/* [[[begin pattern_buffer]]] */
/* Space that holds the compiled pattern. It is declared as
`unsigned char *' because its elements are
sometimes used as array indexes. */
unsigned char *buffer;
/* Number of bytes to which `buffer' points. */
unsigned long allocated;
/* Number of bytes actually used in `buffer'. */
unsigned long used;
/* Syntax setting with which the pattern was compiled. */
reg_syntax_t syntax;
/* Pointer to a fastmap, if any, otherwise zero. re_search uses
the fastmap, if there is one, to skip over impossible
starting points for matches. */
char *fastmap;
/* Either a translate table to apply to all characters before
comparing them, or zero for no translation. The translation
is applied to a pattern when it is compiled and to a string
when it is matched. */
char *translate;
/* Number of subexpressions found by the compiler. */
size_t re_nsub;
/* Zero if this pattern cannot match the empty string, one else.
Well, in truth it's used only in `re_search_2', to see
whether or not we should use the fastmap, so we don't set
this absolutely perfectly; see `re_compile_fastmap' (the
`duplicate' case). */
unsigned can_be_null : 1;
/* If REGS_UNALLOCATED, allocate space in the `regs' structure
for `max (RE_NREGS, re_nsub + 1)' groups.
If REGS_REALLOCATE, reallocate space if necessary.
If REGS_FIXED, use what's there. */
#define REGS_UNALLOCATED 0
#define REGS_REALLOCATE 1
#define REGS_FIXED 2
unsigned regs_allocated : 2;
/* Set to zero when regex_compile compiles a pattern; set to one
by re_compile_fastmap when it updates the fastmap, if any. */
unsigned fastmap_accurate : 1;
/* If set, regexec reports only success or failure and does not
return anything in pmatch. */
unsigned no_sub : 1;
/* If set, a beginning-of-line anchor doesn't match at the
beginning of the string. */
unsigned not_bol : 1;
/* Similarly for an end-of-line anchor. */
unsigned not_eol : 1;
/* If true, an anchor at a newline matches. */
unsigned newline_anchor : 1;
/* [[[end pattern_buffer]]] */
};
typedef struct re_pattern_buffer regex_t;
/* search.c (search_buffer) in Emacs needs this one opcode value. It is
defined both in `regex.c' and here. */
#define RE_EXACTN_VALUE 1
/* Type for byte offsets within the string. POSIX mandates this. */
typedef int regoff_t;
/* This is the structure we store register match data in. See
regex.texinfo for a full description of what registers match. */
struct re_registers
{
unsigned num_regs;
regoff_t *start;
regoff_t *end;
};
/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
`re_match_2' returns information about at least this many registers
the first time a `regs' structure is passed. */
#ifndef RE_NREGS
#define RE_NREGS 30
#endif
/* POSIX specification for registers. Aside from the different names than
`re_registers', POSIX uses an array of structures, instead of a
structure of arrays. */
typedef struct
{
regoff_t rm_so; /* Byte offset from string's start to substring's start. */
regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
} regmatch_t;
/* Declarations for routines. */
/* To avoid duplicating every routine declaration -- once with a
prototype (if we are ANSI), and once without (if we aren't) -- we
use the following macro to declare argument types. This
unfortunately clutters up the declarations a bit, but I think it's
worth it.
We also have to undo `const' if we are not ANSI and if it hasn't
previously being taken care of. */
#if __STDC__
#define _RE_ARGS(args) args
#else
#define _RE_ARGS(args) ()
#ifndef const
#define const
#endif
#endif
/* Sets the current default syntax to SYNTAX, and return the old syntax.
You can also simply assign to the `re_syntax_options' variable. */
extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
/* Compile the regular expression PATTERN, with length LENGTH
and syntax given by the global `re_syntax_options', into the buffer
BUFFER. Return NULL if successful, and an error string if not. */
extern const char *re_compile_pattern
_RE_ARGS ((const char *pattern, int length,
struct re_pattern_buffer *buffer));
/* Compile a fastmap for the compiled pattern in BUFFER; used to
accelerate searches. Return 0 if successful and -2 if was an
internal error. */
extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
/* Search in the string STRING (with length LENGTH) for the pattern
compiled into BUFFER. Start searching at position START, for RANGE
characters. Return the starting position of the match, -1 for no
match, or -2 for an internal error. Also return register
information in REGS (if REGS and BUFFER->no_sub are nonzero). */
extern int re_search
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
int length, int start, int range, struct re_registers *regs));
/* Like `re_search', but search in the concatenation of STRING1 and
STRING2. Also, stop searching at index START + STOP. */
extern int re_search_2
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
int length1, const char *string2, int length2,
int start, int range, struct re_registers *regs, int stop));
/* Like `re_search', but return how many characters in STRING the regexp
in BUFFER matched, starting at position START. */
extern int re_match
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
int length, int start, struct re_registers *regs));
/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
extern int re_match_2
_RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
int length1, const char *string2, int length2,
int start, struct re_registers *regs, int stop));
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
ENDS. Subsequent matches using BUFFER and REGS will use this memory
for recording register information. STARTS and ENDS must be
allocated with malloc, and must each be at least `NUM_REGS * sizeof
(regoff_t)' bytes long.
If NUM_REGS == 0, then subsequent matches should allocate their own
register data.
Unless this function is called, the first search or match using
PATTERN_BUFFER will allocate its own register data, without
freeing the old data. */
extern void re_set_registers
_RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
unsigned num_regs, regoff_t *starts, regoff_t *ends));
/* 4.2 bsd compatibility. */
extern char *re_comp _RE_ARGS ((const char *));
extern int re_exec _RE_ARGS ((const char *));
/* POSIX compatibility. */
extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
extern int regexec
_RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
regmatch_t pmatch[], int eflags));
extern size_t regerror
_RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
size_t errbuf_size));
extern void regfree _RE_ARGS ((regex_t *preg));
#endif /* not __REGEXP_LIBRARY_H__ */
/*
Local variables:
make-backup-files: t
version-control: t
trim-versions-without-asking: nil
End:
*/

855
old/textutils/ChangeLog Normal file
View File

@@ -0,0 +1,855 @@
Sat Nov 7 00:26:14 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
* wc.c (wc): If doing only -c, use st_size for regular files.
* fold.c (fold_file): Was folding 1 column too early.
From Eric Backus <ericb@lsid.hp.com>.
* memset.c: New file.
Fri Nov 6 20:14:51 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
* cksum.c: New file.
Tue Oct 13 16:24:06 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
* tac.c (tac_stdin): Handle SIGPIPE.
* sort.c (main): Handle SIGTERM.
* od.c: New file.
* system.h [USG || STDC_HEADERS]: Define bcmp.
Sat Oct 3 20:41:24 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
* sort.c (main): Handle SIGPIPE. From trq@dionysos.thphys.ox.ac.uk.
Tue Sep 29 01:10:05 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
* paste.c (main): Don't write on a string constant.
Mon Aug 24 00:02:45 1992 Jim Meyering (meyering@churchy.gnu.ai.mit.edu)
* tr.c: Minor cleanup. Replaced some assert(0) with abort().
Tue Jul 7 02:14:19 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
* cmp.c, cmp.1: Move to diff distribution.
Fri Jul 3 16:37:59 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
* system.h: Change FOO_MISSING to HAVE_FOO.
Wed May 13 20:05:41 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu)
* pr.c (COLUMN): Add structure member to remember filename.
(main, init_fps, open_file, close_file): Use it.
(close_file): Don't decrement cols_ready_to_print when closing
a file. From cdl@mpl.UCSD.EDU (Carl Lowenstein).
Mon May 11 19:17:33 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu)
* cmp.c: --show-chars -> --print-chars.
* pr.c: Rename some variables.
Sat May 9 18:39:47 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
* system.h: Define DEV_BSIZE if not defined.
Wed Apr 22 02:15:09 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu)
* system.h, tac.c: SIGTYPE -> RETSIGTYPE.
Fri Apr 17 10:42:23 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
* sort.c (main): Don't stop processing args when we hit "-";
treat it like a regular filename.
From ian@airs.com (Ian Lance Taylor).
* pr.c (print_page): Fix off by one line count when ^L is in input.
From Andreas Schwab (schwab@ls5.informatik.uni-dortmund.de).
Mon Apr 6 20:52:29 1992 Jim Meyering (meyering@churchy.gnu.ai.mit.edu)
* tr.c (validate): Change error message so it doesn't mention
actual name of --truncate-set1 option. From David MacKenzie.
Sun Apr 5 14:22:42 1992 Jim Meyering (meyering@hal.gnu.ai.mit.edu)
* tr.c (string2_extend, validate): Give an error message when
translating without --truncate-set1, with empty string2, and
with non-empty string1. "tr 1 ''" produced a failed assertion.
Mon Mar 30 02:20:56 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
* system.h: Change how ST_BLKSIZE is calculated to allow for
non-POSIX systems that don't define BSIZE in sys/param.h.
Sat Mar 28 11:18:01 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
* sum.c (main, bsd_sum_file): Don't print filename if BSD
algorithm is used and only one file was given.
Wed Mar 25 11:34:41 1992 Jim Meyering (meyering@wombat.gnu.ai.mit.edu)
* tr.c (get_spec_stats): Fix assertion to allow ranges like a-a
with starting character equal to ending character. This is
contrary to the POSIX spec, but what is already implemented
in find_closing_delim.
Mon Mar 16 00:15:11 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
* Version 1.3.
* sort.c (numcompare, checkfp): Add parens to placate gcc2.
* sort.c (mergefps): For -u, output the first, not last, of
the lines that compare equal. From Mike Haertel.
Tue Mar 10 10:51:38 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
* tr.c: Remove initial capitals and periods from error messages.
Sun Mar 8 22:03:45 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
* sum.c (main): Add -r option for SYSV compat.
Thu Feb 27 22:26:25 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
* sort.c (compare): If -s given, leave lines in their original order.
(main): Recognize -s.
(usage): Document -s.
From Mike Haertel.
Tue Feb 18 20:29:45 1992 Randall Smith (randy at geech.gnu.ai.mit.edu)
* sort.c (sort): Check for complete parsing of buffer into
lines before nixing temp files.
Mon Feb 17 10:35:58 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* sum.c (sysv_sum_file): Use %lu instead of %u to print a
long. Not that it matters for GNU . . .
* tr.c (unquote, make_printable_str): Use \007 instead of ANSI \a.
(append_normal_char, append_range, append_char_class,
append_repeated_char, append_equiv_class, spec_init):
Initialize `next' field of new `struct List_element'.
From rommel@informatik.tu-muenchen.de (Kai-Uwe Rommel).
Sat Feb 8 17:16:49 1992 David J. MacKenzie (djm at apple-gunkies.gnu.ai.mit.edu)
* join.c (get_line): Renamed from getline to avoid GNU libc conflict.
Sun Feb 2 21:22:01 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* Version 1.2.
* nl.c: Support multiple files and "-" for stdin.
(main): Check for read and write errors.
(nl_file): New function.
Wed Jan 29 10:09:10 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* tr.c (main): -t option was called -b in getopt spec.
(validate): Don't warn that set1 is longer than set2.
* tr.c: Rename --sysv-string2-truncate to --truncate-string1.
Fri Jan 17 16:29:05 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* nl.c: New program from bin-src.
* nl.c (main): Use a struct linebuffer for line_buf.
(process_file): Use readline instead of fgets, to preserve NULs.
(check_section): Use memcmp instead of strncmp.
(proc_text): Print line_buf with fwrite instead of printf.
* nl.c (main): Usage message if too many args given. Check
for error in closing input file. Lengths of section delimiter
strings were 1 too large. Take separator_str into account in
length of print_no_line_fmt.
(build_print_fmt): Allocate space for print_fmt, in case
separator_str is long.
(proc_text): A blank line is one that contains nothing, not
even nonprinting characters.
Fri Jan 17 01:04:22 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* All programs: Document `--' instead of `+' to introduce
long-named options, in usage messages.
* sum.c (bsd_sum_file): Renamed from sum_file.
(sysv_sum_file): New function.
(main): Recognize an option to select between the algorithms.
Sun Jan 5 17:41:18 1992 Jim Meyering (meyering at churchy.gnu.ai.mit.edu)
* pr.c (close_file, print_page): Fixed bug that had the command
yes |head |pr -t printing "yyyyyyyyyy".
* (print_page): Fixed bug that had pr -3 -a printing two too few
trailer lines per page.
* (main): Added restriction that -a and -m are incompatible.
Although the POSIX spec doesn't explicitly say they shouldn't
be used together, it says -a modifies the -column option and
that -column shouldn't be used with -m.
Thu Jan 2 15:23:59 1992 David J. MacKenzie (djm at albert.gnu.ai.mit.edu)
* nl.c: Include regex.h after, not before, sys/types.h.
Thu Jan 2 12:18:10 1992 Tom Lord (lord at geech.gnu.ai.mit.edu)
* sort.c (fillbuf) return bytes buffered instead of bytes read.
Fri Dec 27 22:53:36 1991 Jim Kingdon (kingdon at geech.gnu.ai.mit.edu)
* sort.c (LINEALLOC): New #define.
(struct lines): New field ``limit''.
(initlines): Set it from new arg ``limit''.
(sort, mergefps, checkfp): Pass new arg to initlines().
(findlines): Don't realloc past lines->limit.
Tue Dec 24 01:24:03 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* tac.c, sort.c, csplit.c, system.h: Change POSIX ifdefs to
HAVE_UNISTD_H and _POSIX_VERSION.
* xwrite.c: Change POSIX ifdef to HAVE_UNISTD_H.
Sat 14 Dec 1991 11:46:42 Jim Meyering (meyering at wombat)
* tr.c: Fixed an inaccurate comment on posix_pedantic.
Thu 12 Dec 1991 21:15:20 Jim Meyering (meyering at hal)
* tr.c: Changed underscores to hyphens in long option name
"sysv_string2_truncate".
Wed Dec 11 13:33:34 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* tac.c (main): Set obscure_syntax to tell re_search to
allocate memory for the group registers.
Fri Dec 6 18:26:27 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* tac.c, sort.c, csplit.c [POSIX]: Use sigaction instead of
signal, which POSIX doesn't have.
* sort.c: Replace inthandler and huphandler with sighandler.
* csplit.c (main): Only handle signals if they were not being
ignored.
* tr.c: POSIX_ME_HARDER -> POSIXLY_CORRECT.
Wed Dec 4 00:47:47 1991 Jim Meyering (meyering at wombat)
* tr.c (unquote): Reformat code so it doesn't go beyond column 80.
* tr.c (squeeze_filter): Comment a little on why it's better
to step through the input by two.
* tr.c (set_initialize): Write a comment describing the function.
* tr.c: Eliminated the variable `portability_warnings' and replaced
references to it by references to `!posix_pedantic'. One of the
uses of portability_warnings had been wrong.
Tue Dec 3 14:03:35 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* tr.c: New program.
Sun Dec 1 15:07:35 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* linebuffer.[ch] (freebuffer): New function (used by cron).
Thu Oct 17 22:30:22 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* system.h, configure, Makefile.in: Don't include memory.h if
STDC_HEADERS, removing need for MEMORY_H_MISSING.
Thu 17 Oct 1991 16:53:07 Jim Meyering (meyering at wombat)
* pr.c (print_page): REALLY fixed `extra newline at EOF' problem.
Somehow, part of my patch didn't make it last time.
Sat Oct 12 12:04:47 1991 David J. MacKenzie (djm at churchy.gnu.ai.mit.edu)
* tail.c (pipe_lines, pipe_bytes): Initialize `first->next'.
* cmp.c (cmp): Print EOF message to stderr, not stdout, for
POSIX 1003.2.11.2.
* sort.c (xfwrite): fwrite never returns < 0, so check if
number written is number we asked to write.
(fillbuf, main): fread never returns < 0, so check ferror instead.
From Rainer Orth.
Tue Oct 8 18:07:08 1991 Jim Meyering (meyering at churchy)
* pr.c (print_page): Really fixed `extra newline at EOF' problem.
* (store_columns): Fixed bug that caused `pr -b -2' to coredump
on files of certain lengths.
Fri Oct 4 22:30:25 1991 Jim Meyering (meyering at churchy)
* pr.c (print_page): Fixed to not add single spurious newline
at EOF when using -t.
Wed Oct 2 01:02:05 1991 David J. MacKenzie (djm at apple-gunkies)
* pr.c (print_page): Don't pad the page if -t given.
* csplit.c (load_buffer), sort.c (mergefps): Use bcopy, not memcpy.
Thu Sep 26 12:35:00 1991 David J. MacKenzie (djm at churchy.gnu.ai.mit.edu)
* Version 1.1.
* configure, system.h: Include memory.h if it works.
* split.c: Allow `b' unit as well as `k' and `m'.
* head.c, tail.c: Replace -b +blocks option with specifying
units (b, k, or m) after the number.
(parse_unit): New function.
* fold.c (main): Check that -w arg is a number.
* cut.c: +delimiter takes an arg.
Mon Sep 16 14:52:38 1991 David J. MacKenzie (djm at churchy.gnu.ai.mit.edu)
* pr.c (close_file): Don't close an already closed file.
Thu Sep 12 00:14:43 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* memchr.c: New file.
* configure: Check if it's needed.
* csplit.c, gcsplit.1: New program.
* pr.c (cleanup): Only free buffers if they were allocated.
* sort.c [!USG && !STDC_HEADERS]: Declare memchr.
Wed Sep 11 20:54:16 1991 Jim Meyering (meyering at churchy)
* pr.c: The following 3 bugs appeared (at least) when printing
a single file with the options `-3 -f'.
* (print_white_space): Single spaces were being replaced
with tabs.
* (print_page): Some lines were getting too much white space
at the beginning because spaces_not_printed wasn't being reset
to 0.
* (read_line): The single space between a truncated column
on its left and the column on its right was omitted. Fixed
so that previous value of input_position is restored before
returning FALSE.
Sat Sep 7 03:22:18 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* configure: Only remove /etc from PATH when it's not part of
a larger name.
Wed Sep 4 17:09:24 1991 David J. MacKenzie (djm at apple-gunkies)
* linebuffer.c (readline): Fix incorrect recalculation of `end'.
* head.c, tail.c: Replace `mode' variables and bitmasks with
separate variables for each option.
Mon Sep 2 04:00:37 1991 David J. MacKenzie (djm at apple-gunkies)
* wc.c: New program.
Sun Sep 1 01:18:38 1991 David J. MacKenzie (djm at apple-gunkies)
* fold.c (fold_file): Read in an int, not a char, for EOF
comparison.
* configure: Check whether st_blksize is missing.
* tac.c (save_stdin): Put copy of pipe input in TMPDIR if
defined, instead of /tmp.
Thu Aug 29 14:48:15 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* xwrite.c [POSIX]: unistd.h might require sys/types.h.
Wed Aug 28 11:57:39 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* paste.c (main): Consider -d "" to be like -d "\0",
for POSIX (if I read it right).
* sort.c, join.c: New programs.
* cut.c (set_field): Allow blanks as well as commas to
separate numbers in ranges.
Sun Aug 25 19:57:40 1991 Jim Meyering (meyering at apple-gunkies)
* pr.c: Failure to open an input file is no longer a fatal error.
A message is printed for each failed open. When printing
in parallel, each failed open results in one fewer output column.
Added POSIX -r option to suppress the message.
* pr.c: Added variables: failed_opens, ignore_failed_opens.
These changes were based in part on work by David MacKenzie.
Sat Aug 24 15:27:39 1991 Jim Meyering (meyering at pogo)
* pr.c: Complain if user gives both -m and -[0-9]+ options.
Wed Aug 21 22:04:57 1991 David J. MacKenzie (djm at apple-gunkies)
* Version 1.0.
Mon Aug 19 00:16:51 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* expand.c: Rename some variables.
(expand): Don't access value off end of array.
* unexpand.c: Rename some variables.
(unexpand): Don't access value off end of array.
Instead of copying tabs verbatim and flushing pending spaces
when one is reached, count them as the proper number of
pending spaces. Instead of changing tabs to single spaces if
the tabstop list is exhausted, print the rest of the line
unchanged (for POSIX).
Sat Aug 17 01:49:41 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* cut.c (cut_file), paste.c (paste_parallel, paste_serial):
Clear EOF and error conditions on stdin so it can be reused.
* expand.c, unexpand.c (parse_tabstops): Allow blanks as well
as commas to separate tabstops, for POSIX.
* expand.c (expand), unexpand.c (unexpand): Don't line-buffer
the output; send it directly to stdout.
* unexpand.c (main): Make -t stupidly imply -a for POSIX.
(unexpand): If a tab stop list was given and we move past its end,
copy the rest of the line verbatim.
* split.c (convint): New function to allow 'm' and 'k' after
byte counts.
(main): Use it. Change -c option to -b for POSIX.
Fri Aug 9 02:47:02 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* pr.c: Protect isdigit with isascii, if required.
Tue Aug 6 21:42:25 1991 David J. MacKenzie (djm at wheat-chex)
Most of the following is from Paul Eggert:
* cat.c (main): If stdin is read, check close at end.
* cmp.c (main): Check for stdin being closed.
Check for close errors on stdin and stdout.
(cmp): Return a value instead of exiting.
* cut.c (cut_file): New function, from code in main.
Check for read errors.
(main): Check for close errors.
* expand.c, unexpand.c (main): Check for close errors.
(next_file): Go to next file if one can't be opened.
Check for close errors.
* head.c (main), tail.c (main): If stdin was read, check for
close errors.
* head.c (head_file), tail.c (tail_file): Check for close errors.
* paste.c (main, paste_parallel, paste_serial), tac.c (main):
Check for close errors. Close stdin if it was read.
* split.c (main): Check for close errors.
* configure, Makefile.in's: Support +srcdir option.
Make config.status. Fix up clean targets.
Wed Jul 31 01:32:59 1991 David J. MacKenzie (djm at hal)
* linebuffer.h (struct linebuffer): Add a field to indicate
the number of valid chars in the line.
* linebuffer.c (initbuffer, readline): Fill it in.
* uniq.c, comm.c: Use it.
* pr.c (main): Check stdin and stdout fclose for errors.
(init_parameters): If there's no room for header and footer,
omit them rather than dying (for POSIX).
(init_header): Take a file descriptor as additional arg.
(init_fps): Change callers. Note when stdin is read.
(open_file): For filename "-" use stdin.
(close_file): Don't close stdin. Check close for errors.
(print_char, char_to_clump): Use isprint instead of explicit
comparisons.
* memcmp.c: New file (needed for comm).
* bcopy.c: New file (needed for fold).
* system.h: Don't define bcopy as memcpy.
* configure: Check for bcopy and memcmp.
* uniq.c (main): Use "-" instead of NULL to mean stdin or
stdout.
(check_file): Use "-" instead of NULL to mean stdin or stdout.
Check readline return instead of for NUL character to
detect eof.
Check fclose for errors.
(find_field): Use linebuffer length, not NULs, to detect end
of line.
(different): New function, replaces compare. Uses memcmp
instead of strncmp.
(writeline): Use fwrite instead of fputs so NULs are preserved.
* comm.c (compare_files): Return an error indication.
Don't take a filename of NULL to mean stdin.
Use memcmp instead of strcmp to allow for NULs.
Check fclose for errors.
(writeline): Use fwrite instead of fputs so NULs are preserved.
* sum.c (sum_file): Take an arg indicating whether to print
the filename, and don't take NULL meaning stdin. Set a flag
when we read stdin. Check fclose return for errors.
(main): If stdin was read, check fclose return for errors.
Use filename of "-" if no args given.
Thu Jul 25 15:17:10 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* fold.c: Rewritten from scratch for POSIX.
Wed Jul 24 01:55:41 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* split.c (line_bytes_split): Use xmalloc instead of alloca.
* system.h: Don't declare alloca.
* tac.c, tail.c: Use SEEK_ instead of L_ for lseek.
* system.h: Define SEEK_ macros if not defined.
* pr.c: Rename variable `truncate' to avoid library function conflict.
Tue Jul 23 13:21:48 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* linebuffer.c, linebuffer.h: New files.
* comm.c, uniq.c (initbuffer, readline): Functions
removed (use versions in linebuffer.c).
Mon Jul 22 13:23:53 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
* sum.c (sumfile): Always print 5 digits for second number, too.
Interpret "-" to mean stdin.
Sat Jul 20 14:24:40 1991 David J. MacKenzie (djm at bleen)
* uniq.c: Use isblank instead of isspace, to support POSIX.2.
* comm.c, pr.c, uniq.c (concat, fatal, error,
pfatal_with_name, xmalloc, xrealloc): Functions removed.
Sat Jul 13 02:04:53 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
* nl.c: Add long-named options. Doc fixes.
Sat Jul 6 02:19:09 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
* expand.c, unexpand.c [STDC_HEADERS]: Include stdlib.h.
* xwrite.c [POSIX]: Include unistd.h.
[STDC_HEADERS]: Don't declare errno.
Sun Jun 30 23:35:16 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
* uniq.c: Add long-named options. Remove marginally useful -z
option (zero padded repeat counts).
Thu Jun 27 16:31:45 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
* tail.c (tail_file), tac.c (save_stdin, tac_file), split.c
(cwrite), head.c (head_file), cat.c (main): Check close return
value for delayed error report due to NFS.
Tue Jun 11 00:12:15 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
* cat.c: Replace "uchar" with "unsigned char", to avoid
problems with various systems' typedefs.
Thu Jun 6 12:54:26 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
* cat.c (cat): Interpret ENOTTY return from FIONREAD ioctl to mean
operation is unsupported, for HP-UX 7.0.
Sun Apr 14 21:49:17 1991 Richard Stallman (rms at mole.gnu.ai.mit.edu)
* sum.c: Always print five digits for first number.
Fri Mar 15 16:16:54 1991 David J. MacKenzie (djm at geech.ai.mit.edu)
* cat.c, cmp.c: Don't use fileno(); not needed.
Thu Jan 10 02:16:55 1991 David J. MacKenzie (djm at albert.ai.mit.edu)
* tac.c, tail.c: Change _POSIX_SOURCE to POSIX.
Thu Dec 27 00:06:45 1990 David J. MacKenzie (djm at egypt)
* cut.c (cut_file_bytes, cut_file_fields): Make inbufp and
outbufp global.
(enlarge_line): Adjust inbufp and outbufp.
Sun Sep 9 16:54:19 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* cat.c: Declare free returning void, not int, so it
doesn't bomb on Xenix.
Mon Sep 3 22:23:57 1990 David J. MacKenzie (djm at coke)
* tac.c: Print error messages before calling cleanup, not after.
Tue Aug 28 18:05:24 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* tac.c (cleanup): Return SIGTYPE, not int.
Tue Aug 7 12:51:18 1990 David J. MacKenzie (djm at apple-gunkies)
* cut.c (main, usage): Add -b and -n options for POSIX.
(set_fields): Don't allow SPC or TAB as number separators.
* paste.c (paste_parallel): If open of any file fails, quit
(for POSIX).
Mon Aug 6 22:14:13 1990 David J. MacKenzie (djm at apple-gunkies)
* pr.c: Add POSIX -F option (same as -f).
* uniq.c (check_file): Allow '-' to mean stdin or stdout.
Mon Aug 6 14:43:30 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
* head.c, tail.c: Change `chars' to `bytes' globally.
(main, usage): Use POSIX.2 draft 10 option syntax.
Sun Aug 5 11:51:12 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
* cat.c (main): Don't delay error messages, so they appear
where expected.
(main, simple_cat, cat): Make errors in input files nonfatal.
Sat Aug 4 10:11:30 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
* cat.c: Remove -c option added for POSIX draft 9, since POSIX
draft 10 removed it.
* tac.c (tac_stdin): Use fstat instead of lseek to determine
whether stdin is seekable, because lseek silently fails on
some special files, like tty's.
tail.c (tail_chars, tail_lines): Use fstat instead of lseek;
don't turn off -f for non-regular files (assume the user knows
what he's doing; it might work for fifo's and sockets).
* paste.c (main): If no files given, use stdin.
Don't let collapse_escapes write on string constant (delim default).
(paste_parallel): Don't close stdin.
* cut.c (main): Use standard input for filename of "-".
* comm.c (compare_files): Allow '-' to mean stdin.
Fri Aug 3 13:38:28 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
* cut.c (enlarge_line): Take an arg giving the required amount
of space. Change callers.
(main): Don't allow -t'<TAB>' without -f.
Make `delim' unsigned to fix sign extension problem in comparison.
Tue Jul 17 12:36:11 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
* pr.c Deleted excess whitespace from ends of lines.
Modified to work with current version of getopt, which
returns 1 instead of 0 for non-options.
Reversed the meaning of the -f option, to be compatable
with real pr.
Sun Jul 8 00:39:31 1990 David J. MacKenzie (djm at apple-gunkies)
* cmp.c (main, usage): Rename -L option to -c and don't have
it imply -l.
(printc): Take an arg to specify number of chars to pad to,
for column alignment.
(cmp): Respect flag_print_chars in default output format.
Align columns for cmp -cl.
Sat Jul 7 17:23:30 1990 David J. MacKenzie (djm at apple-gunkies)
* cmp.c: For +show-chars, have getopt return 'L' so
`flag_print_chars' gets set.
Fri Jun 29 01:04:19 1990 David J. MacKenzie (djm at apple-gunkies)
* tac.c (main): Initialize fastmap and translate fields of
regex before compiling it.
Fri Jun 22 00:38:20 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* tac.c: Change +regexp to +regex for consistency with GNU find.
Wed Jun 20 01:46:09 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* cat.c (cat): If FIONREAD is available, only use it if it is
supported by the filesystem that the file is on.
Sun Jun 3 20:26:19 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* cat.c (main): Add a variable to control whether the check
for input file == output file is made, because no values of
st_dev and st_ino should be assumed to be available for this
purpose. Only do the check for regular files.
* tac.c: Use bcopy instead of memcpy.
Thu May 31 00:55:36 1990 David J. MacKenzie (djm at apple-gunkies)
* head.c: Use longs instead of ints for file offsets, for 16
bit machines.
Tue May 22 00:56:51 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* cmp.c: Change some ints to longs for 16 bit machines.
(bcmp_cnt): Make char-pointer counting slightly simpler.
Sat May 12 01:16:42 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* cat.c (main): Allow input file to be output file for devices
(ttys, etc.). Check whether input file is output file when
reading standard input. Print any error messages for standard
input.
* cmp.c (bcmp_cnt): Handle int comparisons correctly on 16 bit
machines as well as 32 bit ones.
* cmp.c, tail.c: Use longs instead of ints for file offsets.
Fri May 11 02:11:03 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* cmp.c: Fix some exit statuses for POSIX.
Tue May 8 03:41:42 1990 David J. MacKenzie (djm at abyss)
* tac.c: Use regular expressions as the record boundaries.
Give better error messages.
Reformat code and make it more readable.
(main): Use getopt_long to parse options.
(tac_stdin): Do not make a temporary file if standard input
is a file.
(tac_file): New function.
(tac): Take an open file desc as an arg.
(output): Rewrite to use its own efficient buffering.
(xmalloc, xrealloc, xwrite): New functions.
Sun Apr 8 20:33:20 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* head.c, tail.c: Use `error' instead of `fatal_perror' and
`nonfatal_perror'. Remove some unnecessary info from messages.
Wed Mar 21 09:30:18 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
* comm.c (main): Pass the list of files to compare_files as a
char ** instead of a char *.
(compare_files): Make arg a char **.
* uniq.c: Declare some functions as void.
Change global vars `countmode' and `mode' from ints to enums.
(main): Use getopt to parse options and support POSIX options.
Don't use integer_arg to parse numbers, since `-#' can't be
parsed that way using getopt.
(find_field): Use isspace for finding fields boundaries.
Tue Mar 20 14:28:25 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
* comm.c (main): Call usage if given bad option or wrong
number of args. Exit with 0 status normally.
(usage): New function.
Declare some other functions as void.
Wed Mar 14 10:48:40 1990 David J. MacKenzie (djm at rice-chex)
* cmp.c (main, cmp, usage): Replace -q +quick option with -L
+show-chars option to add ASCII representation of bytes to -l format.
Tue Mar 13 00:50:14 1990 David J. MacKenzie (djm at rice-chex)
* cmp.c (cmp): Change EOF message for POSIX compatibility.
For -l format, clear bits > FF.
Mon Mar 5 17:21:00 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* tail.c: Move global `errors' into main instead of having
nonfatal_perror set it.
(tail, tail_chars, tail_file, tail_lines, pipe_chars, pipe_lines):
Return an error status.
(file_lines, start_chars, start_lines): Reverse the meaning of
the return value.
(tail_lines, tail_chars): Account for that reversal.
Mon Mar 5 00:34:36 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* head.c: Move global `errors' into main and have the various
functions return an error status instead of setting it in
nonfatal_perror.
Sat Mar 3 11:27:27 1990 Torbj|rn Granlund (tege at echnaton)
* cmp.c (cmp): Call function bcmp_cnt for flag == 0 (i.e. no
options specified), to compare the two blocks and count
newlines simultaneously.
* cmp.c New function: bcmp_cnt.
* cmp.c (main): Test if output is redirected to /dev/null, and
assume `-s' if this is so.
Tue Feb 20 17:09:19 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* cat.c: Change `argbad' from a char to a short, so it will
work on machines with unsigned chars.
Sat Feb 10 02:16:40 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
* cmp.c (cmp): Rename `r' to `first_diff', and `x' to `smaller'.
Remove unneccessary variable `c1'. If -l was given, increase
`char_number' by the number of bytes read, after producing output,
rather than by the offset of the first differing bytes, before
producing output.
Replace if-else-if constructions with case statements for clarity.
(bcmp2): Rename `n' to `nread'.
Wed Dec 20 01:32:06 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
* nl.c (proc_text): Use re_search instead of re_match.
Tue Dec 19 01:26:34 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
* nl.c: Indent. Un-nest statements. Use GNU regexp functions
instead of System V ones. Move function declarations together.
(quit): Remove useless function.
(program_name): New variable for error messages.
(main): Use perror in error message.
(xmalloc): New function to replace myalloc.
(myalloc): Function removed.
Global: use program_name and xmalloc.
Sun Dec 17 00:36:36 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
* uniq.c: Declare some functions.
(main): Initialize infile and outfile. Call usage if given
invalid args. Normally exit with 0 status instead of garbage.
(usage): New function to print usage message and exit.
(check_file): Remove unused variable.
(readline): Compare against EOF, not < 0.
(xmalloc, xrealloc): Return char *, not int.
Ok to return 0 if 0 bytes requested.
(lb1, lb2): Remove unused global vars.
(concat): Remove unused function.
Sat Dec 16 15:15:50 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
* comm.c: Remove unused global variables lb1, lb2.
(main): Remove unneeded variable.
(compare_files): Remove unused arg.
(readline): un-nest assignment. Test against EOF instead of < 0.
(error): Print to stderr, not stdout.
(xmalloc, xrealloc): Return char * instead of int.
Returning 0 is ok if 0 bytes requested (ANSI C).
Local Variables:
mode: indented-text
left-margin: 8
version-control: never
End:

660
src/cat.c Normal file
View File

@@ -0,0 +1,660 @@
/* cat -- concatenate files and print on the standard output.
Copyright (C) 1988, 1990, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Differences from the Unix cat:
* Always unbuffered, -u is ignored.
* 100 times faster with -v -u.
* 20 times faster with -v.
By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman. */
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#ifndef _POSIX_SOURCE
#include <sys/ioctl.h>
#endif
#include "system.h"
#define max(h,i) ((h) > (i) ? (h) : (i))
char *stpcpy ();
char *xmalloc ();
void cat ();
void error ();
void next_line_num ();
void simple_cat ();
/* Name under which this program was invoked. */
char *program_name;
/* Name of input file. May be "-". */
char *infile;
/* Descriptor on which input file is open. */
int input_desc;
/* Descriptor on which output file is open. Always is 1. */
int output_desc;
/* Buffer for line numbers. */
char line_buf[13] =
{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0', '\t', '\0'};
/* Position in `line_buf' where printing starts. This will not change
unless the number of lines are more than 999999. */
char *line_num_print = line_buf + 5;
/* Position of the first digit in `line_buf'. */
char *line_num_start = line_buf + 10;
/* Position of the last digit in `line_buf'. */
char *line_num_end = line_buf + 10;
/* Preserves the `cat' function's local `newlines' between invocations. */
int newlines2 = 0;
/* Count of non-fatal error conditions. */
int exit_stat = 0;
void
usage (reason)
char *reason;
{
if (reason != NULL)
fprintf (stderr, "%s: %s\n", program_name, reason);
fprintf (stderr, "\
Usage: %s [-benstuvAET] [--number] [--number-nonblank] [--squeeze-blank]\n\
[--show-nonprinting] [--show-ends] [--show-tabs] [--show-all]\n\
[file...]\n",
program_name);
exit (2);
}
void
main (argc, argv)
int argc;
char *argv[];
{
/* Optimal size of i/o operations of output. */
int outsize;
/* Optimal size of i/o operations of input. */
int insize;
/* Pointer to the input buffer. */
unsigned char *inbuf;
/* Pointer to the output buffer. */
unsigned char *outbuf;
int c;
/* Index in argv to processed argument. */
int argind;
/* Device number of the output (file or whatever). */
int out_dev;
/* I-node number of the output. */
int out_ino;
/* Nonzero if the output file should not be the same as any input file. */
int check_redirection = 1;
/* Nonzero if we have ever read standard input. */
int have_read_stdin = 0;
struct stat stat_buf;
/* Variables that are set according to the specified options. */
int numbers = 0;
int numbers_at_empty_lines = 1;
int squeeze_empty_lines = 0;
int mark_line_ends = 0;
int quote = 0;
int output_tabs = 1;
int options = 0;
static struct option long_options[] =
{
{"number-nonblank", 0, NULL, 'b'},
{"number", 0, NULL, 'n'},
{"squeeze-blank", 0, NULL, 's'},
{"show-nonprinting", 0, NULL, 'v'},
{"show-ends", 0, NULL, 'E'},
{"show-tabs", 0, NULL, 'T'},
{"show-all", 0, NULL, 'A'},
{NULL, 0, NULL, 0}
};
program_name = argv[0];
/* Parse command line options. */
while ((c = getopt_long (argc, argv, "benstuvAET", long_options, (int *) 0))
!= EOF)
{
options++;
switch (c)
{
case 'b':
numbers = 1;
numbers_at_empty_lines = 0;
break;
case 'e':
mark_line_ends = 1;
quote = 1;
break;
case 'n':
numbers = 1;
break;
case 's':
squeeze_empty_lines = 1;
break;
case 't':
output_tabs = 0;
quote = 1;
break;
case 'u':
/* We provide the -u feature unconditionally. */
options--;
break;
case 'v':
quote = 1;
break;
case 'A':
quote = 1;
mark_line_ends = 1;
output_tabs = 0;
break;
case 'E':
mark_line_ends = 1;
break;
case 'T':
output_tabs = 0;
break;
default:
usage ((char *) 0);
}
}
output_desc = 1;
/* Get device, i-node number, and optimal blocksize of output. */
if (fstat (output_desc, &stat_buf) < 0)
error (1, errno, "standard output");
outsize = ST_BLKSIZE (stat_buf);
/* Input file can be output file for non-regular files.
fstat on pipes returns S_IFSOCK on some systems, S_IFIFO
on others, so the checking should not be done for those types,
and to allow things like cat < /dev/tty > /dev/tty, checking
is not done for device files either. */
if (S_ISREG (stat_buf.st_mode))
{
out_dev = stat_buf.st_dev;
out_ino = stat_buf.st_ino;
}
else
check_redirection = 0;
/* Check if any of the input files are the same as the output file. */
/* Main loop. */
infile = "-";
argind = optind;
do
{
if (argind < argc)
infile = argv[argind];
if (infile[0] == '-' && infile[1] == 0)
{
have_read_stdin = 1;
input_desc = 0;
}
else
{
input_desc = open (infile, O_RDONLY);
if (input_desc < 0)
{
error (0, errno, "%s", infile);
exit_stat = 1;
continue;
}
}
if (fstat (input_desc, &stat_buf) < 0)
{
error (0, errno, "%s", infile);
exit_stat = 1;
goto contin;
}
insize = ST_BLKSIZE (stat_buf);
/* Compare the device and i-node numbers of this input file with
the corresponding values of the (output file associated with)
stdout, and skip this input file if they coincide. Input
files cannot be redirected to themselves. */
if (check_redirection
&& stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino)
{
error (0, 0, "%s: input file is output file", infile);
exit_stat = 1;
goto contin;
}
/* Select which version of `cat' to use. If any options (more than -u)
were specified, use `cat', otherwise use `simple_cat'. */
if (options == 0)
{
insize = max (insize, outsize);
inbuf = (unsigned char *) xmalloc (insize);
simple_cat (inbuf, insize);
}
else
{
inbuf = (unsigned char *) xmalloc (insize + 1);
/* Why are (OUTSIZE - 1 + INSIZE * 4 + 13) bytes allocated for
the output buffer?
A test whether output needs to be written is done when the input
buffer empties or when a newline appears in the input. After
output is written, at most (OUTSIZE - 1) bytes will remain in the
buffer. Now INSIZE bytes of input is read. Each input character
may grow by a factor of 4 (by the prepending of M-^). If all
characters do, and no newlines appear in this block of input, we
will have at most (OUTSIZE - 1 + INSIZE) bytes in the buffer. If
the last character in the preceeding block of input was a
newline, a line number may be written (according to the given
options) as the first thing in the output buffer. (Done after the
new input is read, but before processing of the input begins.) A
line number requires seldom more than 13 positions. */
outbuf = (unsigned char *) xmalloc (outsize - 1 + insize * 4 + 13);
cat (inbuf, insize, outbuf, outsize, quote,
output_tabs, numbers, numbers_at_empty_lines, mark_line_ends,
squeeze_empty_lines);
free (outbuf);
}
free (inbuf);
contin:
if (strcmp (infile, "-") && close (input_desc) < 0)
{
error (0, errno, "%s", infile);
exit_stat = 1;
}
}
while (++argind < argc);
if (have_read_stdin && close (0) < 0)
error (1, errno, "-");
if (close (1) < 0)
error (1, errno, "write error");
exit (exit_stat);
}
/* Plain cat. Copies the file behind `input_desc' to the file behind
`output_desc'. */
void
simple_cat (buf, bufsize)
/* Pointer to the buffer, used by reads and writes. */
unsigned char *buf;
/* Number of characters preferably read or written by each read and write
call. */
int bufsize;
{
/* Actual number of characters read, and therefore written. */
int n_read;
/* Loop until the end of the file. */
for (;;)
{
/* Read a block of input. */
n_read = read (input_desc, buf, bufsize);
if (n_read < 0)
{
error (0, errno, "%s", infile);
exit_stat = 1;
return;
}
/* End of this file? */
if (n_read == 0)
break;
/* Write this block out. */
if (write (output_desc, buf, n_read) != n_read)
error (1, errno, "write error");
}
}
/* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC.
Called if any option more than -u was specified.
A newline character is always put at the end of the buffer, to make
an explicit test for buffer end unnecessary. */
void
cat (inbuf, insize, outbuf, outsize, quote,
output_tabs, numbers, numbers_at_empty_lines,
mark_line_ends, squeeze_empty_lines)
/* Pointer to the beginning of the input buffer. */
unsigned char *inbuf;
/* Number of characters read in each read call. */
int insize;
/* Pointer to the beginning of the output buffer. */
unsigned char *outbuf;
/* Number of characters written by each write call. */
int outsize;
/* Variables that have values according to the specified options. */
int quote;
int output_tabs;
int numbers;
int numbers_at_empty_lines;
int mark_line_ends;
int squeeze_empty_lines;
{
/* Last character read from the input buffer. */
unsigned char ch;
/* Pointer to the next character in the input buffer. */
unsigned char *bpin;
/* Pointer to the first non-valid byte in the input buffer, i.e. the
current end of the buffer. */
unsigned char *eob;
/* Pointer to the position where the next character shall be written. */
unsigned char *bpout;
/* Number of characters read by the last read call. */
int n_read;
/* Determines how many consequtive newlines there have been in the
input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
etc. Initially 0 to indicate that we are at the beginning of a
new line. The "state" of the procedure is determined by
NEWLINES. */
int newlines = newlines2;
#ifdef FIONREAD
/* If nonzero, use the FIONREAD ioctl, as an optimization.
(On Ultrix, it is not supported on NFS filesystems.) */
int use_fionread = 1;
#endif
/* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
is read immediately. */
eob = inbuf;
bpin = eob + 1;
bpout = outbuf;
for (;;)
{
do
{
/* Write if there are at least OUTSIZE bytes in OUTBUF. */
if (bpout - outbuf >= outsize)
{
unsigned char *wp = outbuf;
do
{
if (write (output_desc, wp, outsize) != outsize)
error (1, errno, "write error");
wp += outsize;
}
while (bpout - wp >= outsize);
/* Move the remaining bytes to the beginning of the
buffer. */
bcopy (wp, outbuf, bpout - wp);
bpout = outbuf + (bpout - wp);
}
/* Is INBUF empty? */
if (bpin > eob)
{
#ifdef FIONREAD
int n_to_read = 0;
/* Is there any input to read immediately?
If not, we are about to wait,
so write all buffered output before waiting. */
if (use_fionread
&& ioctl (input_desc, FIONREAD, &n_to_read) < 0)
{
/* Ultrix returns EOPNOTSUPP on NFS;
HP-UX returns ENOTTY on pipes. */
if (errno == EOPNOTSUPP || errno == ENOTTY)
use_fionread = 0;
else
{
error (0, errno, "cannot do ioctl on `%s'", infile);
exit_stat = 1;
newlines2 = newlines;
return;
}
}
if (n_to_read == 0)
#endif
{
int n_write = bpout - outbuf;
if (write (output_desc, outbuf, n_write) != n_write)
error (1, errno, "write error");
bpout = outbuf;
}
/* Read more input into INBUF. */
n_read = read (input_desc, inbuf, insize);
if (n_read < 0)
{
error (0, errno, "%s", infile);
exit_stat = 1;
newlines2 = newlines;
return;
}
if (n_read == 0)
{
newlines2 = newlines;
return;
}
/* Update the pointers and insert a sentinel at the buffer
end. */
bpin = inbuf;
eob = bpin + n_read;
*eob = '\n';
}
else
{
/* It was a real (not a sentinel) newline. */
/* Was the last line empty?
(i.e. have two or more consecutive newlines been read?) */
if (++newlines > 0)
{
/* Are multiple adjacent empty lines to be substituted by
single ditto (-s), and this was the second empty line? */
if (squeeze_empty_lines && newlines >= 2)
{
ch = *bpin++;
continue;
}
/* Are line numbers to be written at empty lines (-n)? */
if (numbers && numbers_at_empty_lines)
{
next_line_num ();
bpout = (unsigned char *) stpcpy (bpout, line_num_print);
}
}
/* Output a currency symbol if requested (-e). */
if (mark_line_ends)
*bpout++ = '$';
/* Output the newline. */
*bpout++ = '\n';
}
ch = *bpin++;
}
while (ch == '\n');
/* Are we at the beginning of a line, and line numbers are requested? */
if (newlines >= 0 && numbers)
{
next_line_num ();
bpout = (unsigned char *) stpcpy (bpout, line_num_print);
}
/* Here CH cannot contain a newline character. */
/* The loops below continue until a newline character is found,
which means that the buffer is empty or that a proper newline
has been found. */
/* If quoting, i.e. at least one of -v, -e, or -t specified,
scan for chars that need conversion. */
if (quote)
for (;;)
{
if (ch >= 32)
{
if (ch < 127)
*bpout++ = ch;
else if (ch == 127)
*bpout++ = '^',
*bpout++ = '?';
else
{
*bpout++ = 'M',
*bpout++ = '-';
if (ch >= 128 + 32)
if (ch < 128 + 127)
*bpout++ = ch - 128;
else
*bpout++ = '^',
*bpout++ = '?';
else
*bpout++ = '^',
*bpout++ = ch - 128 + 64;
}
}
else if (ch == '\t' && output_tabs)
*bpout++ = '\t';
else if (ch == '\n')
{
newlines = -1;
break;
}
else
*bpout++ = '^',
*bpout++ = ch + 64;
ch = *bpin++;
}
else
/* Not quoting, neither of -v, -e, or -t specified. */
for (;;)
{
if (ch == '\t' && !output_tabs)
*bpout++ = '^',
*bpout++ = ch + 64;
else if (ch != '\n')
*bpout++ = ch;
else
{
newlines = -1;
break;
}
ch = *bpin++;
}
}
}
/* Compute the next line number. */
void
next_line_num ()
{
char *endp = line_num_end;
do
{
if ((*endp)++ < '9')
return;
*endp-- = '0';
}
while (endp >= line_num_start);
*--line_num_start = '1';
if (line_num_start < line_num_print)
line_num_print--;
}

274
src/cksum.c Normal file
View File

@@ -0,0 +1,274 @@
/* cksum -- calculate and print POSIX.2 checksums and sizes of files
Copyright (C) 1992 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Q. Frank Xia, qx@math.columbia.edu.
Cosmetic changes and reorganization by David MacKenzie, djm@gnu.ai.mit.edu.
Usage: cksum [file...]
The code segment between "#ifdef CRCTAB" and "#else" is the code
which calculates the "crctab". It is included for those who want
verify the correctness of the "crctab". To recreate the "crctab",
do following:
cc -DCRCTAB -o crctab cksum.c
crctab > crctab.h
As Bruce Evans pointed out to me, the crctab in the sample C code
in 4.9.10 Rationale of P1003.2/D11.2 is represented in reversed order.
Namely, 0x01 is represented as 0x80, 0x02 is represented as 0x40, etc.
The generating polynomial is crctab[0x80]=0xedb88320 instead of
crctab[1]=0x04C11DB7. But the code works only for a non-reverse order
crctab. Therefore, the sample implementation is wrong.
This software is compatible with neither the System V nor the BSD
`sum' program. It is supposed to conform to P1003.2/D11.2,
except foreign language interface (4.9.5.3 of P1003.2/D11.2) support.
Any inconsistency with the standard except 4.9.5.3 is a bug. */
#ifdef CRCTAB
#include <stdio.h>
#define BIT(x) ( (unsigned long)1 << (x) )
#define SBIT BIT(31)
/* The generating polynomial is
32 26 23 22 16 12 11 10 8 7 5 4 2 1
G(X)=X + X + X + X + X + X + X + X + X + X + X + X + X + X + 1
The i bit in GEN is set if X^i is a summand of G(X) except X^32. */
#define GEN (BIT(26)|BIT(23)|BIT(22)|BIT(16)|BIT(12)|BIT(11)|BIT(10)\
|BIT(8) |BIT(7) |BIT(5) |BIT(4) |BIT(2) |BIT(1) |BIT(0));
unsigned long r[8];
void
fill_r ()
{
int i;
r[0] = GEN;
for (i = 1; i < 8; i++)
r[i] = (r[i - 1] & SBIT) ? (r[i - 1] << 1) ^ r[0] : r[i - 1] << 1;
}
unsigned long
remainder (m)
int m;
{
unsigned long rem = 0;
int i;
for (i = 0; i < 8; i++)
if (BIT (i) & m)
rem = rem ^ r[i];
return rem & 0xFFFFFFFF; /* Make it run on 64-bit machine. */
}
void
main ()
{
int i;
fill_r ();
printf ("unsigned long crctab[256] = {\n 0x0");
for (i = 0; i < 51; i++)
{
printf (",\n 0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X",
remainder (i * 5 + 1), remainder (i * 5 + 2), remainder (i * 5 + 3),
remainder (i * 5 + 4), remainder (i * 5 + 5));
}
printf ("\n};\n");
exit (0);
}
#else /* !CRCTAB */
#include <stdio.h>
#include <sys/types.h>
#include "system.h"
/* Number of bytes to read at once. */
#define BUFLEN (1 << 16)
unsigned long crctab[256] =
{
0x0,
0x04C11DB7, 0x09823B6E, 0x0D4326D9, 0x130476DC, 0x17C56B6B,
0x1A864DB2, 0x1E475005, 0x2608EDB8, 0x22C9F00F, 0x2F8AD6D6,
0x2B4BCB61, 0x350C9B64, 0x31CD86D3, 0x3C8EA00A, 0x384FBDBD,
0x4C11DB70, 0x48D0C6C7, 0x4593E01E, 0x4152FDA9, 0x5F15ADAC,
0x5BD4B01B, 0x569796C2, 0x52568B75, 0x6A1936C8, 0x6ED82B7F,
0x639B0DA6, 0x675A1011, 0x791D4014, 0x7DDC5DA3, 0x709F7B7A,
0x745E66CD, 0x9823B6E0, 0x9CE2AB57, 0x91A18D8E, 0x95609039,
0x8B27C03C, 0x8FE6DD8B, 0x82A5FB52, 0x8664E6E5, 0xBE2B5B58,
0xBAEA46EF, 0xB7A96036, 0xB3687D81, 0xAD2F2D84, 0xA9EE3033,
0xA4AD16EA, 0xA06C0B5D, 0xD4326D90, 0xD0F37027, 0xDDB056FE,
0xD9714B49, 0xC7361B4C, 0xC3F706FB, 0xCEB42022, 0xCA753D95,
0xF23A8028, 0xF6FB9D9F, 0xFBB8BB46, 0xFF79A6F1, 0xE13EF6F4,
0xE5FFEB43, 0xE8BCCD9A, 0xEC7DD02D, 0x34867077, 0x30476DC0,
0x3D044B19, 0x39C556AE, 0x278206AB, 0x23431B1C, 0x2E003DC5,
0x2AC12072, 0x128E9DCF, 0x164F8078, 0x1B0CA6A1, 0x1FCDBB16,
0x018AEB13, 0x054BF6A4, 0x0808D07D, 0x0CC9CDCA, 0x7897AB07,
0x7C56B6B0, 0x71159069, 0x75D48DDE, 0x6B93DDDB, 0x6F52C06C,
0x6211E6B5, 0x66D0FB02, 0x5E9F46BF, 0x5A5E5B08, 0x571D7DD1,
0x53DC6066, 0x4D9B3063, 0x495A2DD4, 0x44190B0D, 0x40D816BA,
0xACA5C697, 0xA864DB20, 0xA527FDF9, 0xA1E6E04E, 0xBFA1B04B,
0xBB60ADFC, 0xB6238B25, 0xB2E29692, 0x8AAD2B2F, 0x8E6C3698,
0x832F1041, 0x87EE0DF6, 0x99A95DF3, 0x9D684044, 0x902B669D,
0x94EA7B2A, 0xE0B41DE7, 0xE4750050, 0xE9362689, 0xEDF73B3E,
0xF3B06B3B, 0xF771768C, 0xFA325055, 0xFEF34DE2, 0xC6BCF05F,
0xC27DEDE8, 0xCF3ECB31, 0xCBFFD686, 0xD5B88683, 0xD1799B34,
0xDC3ABDED, 0xD8FBA05A, 0x690CE0EE, 0x6DCDFD59, 0x608EDB80,
0x644FC637, 0x7A089632, 0x7EC98B85, 0x738AAD5C, 0x774BB0EB,
0x4F040D56, 0x4BC510E1, 0x46863638, 0x42472B8F, 0x5C007B8A,
0x58C1663D, 0x558240E4, 0x51435D53, 0x251D3B9E, 0x21DC2629,
0x2C9F00F0, 0x285E1D47, 0x36194D42, 0x32D850F5, 0x3F9B762C,
0x3B5A6B9B, 0x0315D626, 0x07D4CB91, 0x0A97ED48, 0x0E56F0FF,
0x1011A0FA, 0x14D0BD4D, 0x19939B94, 0x1D528623, 0xF12F560E,
0xF5EE4BB9, 0xF8AD6D60, 0xFC6C70D7, 0xE22B20D2, 0xE6EA3D65,
0xEBA91BBC, 0xEF68060B, 0xD727BBB6, 0xD3E6A601, 0xDEA580D8,
0xDA649D6F, 0xC423CD6A, 0xC0E2D0DD, 0xCDA1F604, 0xC960EBB3,
0xBD3E8D7E, 0xB9FF90C9, 0xB4BCB610, 0xB07DABA7, 0xAE3AFBA2,
0xAAFBE615, 0xA7B8C0CC, 0xA379DD7B, 0x9B3660C6, 0x9FF77D71,
0x92B45BA8, 0x9675461F, 0x8832161A, 0x8CF30BAD, 0x81B02D74,
0x857130C3, 0x5D8A9099, 0x594B8D2E, 0x5408ABF7, 0x50C9B640,
0x4E8EE645, 0x4A4FFBF2, 0x470CDD2B, 0x43CDC09C, 0x7B827D21,
0x7F436096, 0x7200464F, 0x76C15BF8, 0x68860BFD, 0x6C47164A,
0x61043093, 0x65C52D24, 0x119B4BE9, 0x155A565E, 0x18197087,
0x1CD86D30, 0x029F3D35, 0x065E2082, 0x0B1D065B, 0x0FDC1BEC,
0x3793A651, 0x3352BBE6, 0x3E119D3F, 0x3AD08088, 0x2497D08D,
0x2056CD3A, 0x2D15EBE3, 0x29D4F654, 0xC5A92679, 0xC1683BCE,
0xCC2B1D17, 0xC8EA00A0, 0xD6AD50A5, 0xD26C4D12, 0xDF2F6BCB,
0xDBEE767C, 0xE3A1CBC1, 0xE760D676, 0xEA23F0AF, 0xEEE2ED18,
0xF0A5BD1D, 0xF464A0AA, 0xF9278673, 0xFDE69BC4, 0x89B8FD09,
0x8D79E0BE, 0x803AC667, 0x84FBDBD0, 0x9ABC8BD5, 0x9E7D9662,
0x933EB0BB, 0x97FFAD0C, 0xAFB010B1, 0xAB710D06, 0xA6322BDF,
0xA2F33668, 0xBCB4666D, 0xB8757BDA, 0xB5365D03, 0xB1F740B4
};
/* The name this program was run with. */
char *program_name;
/* Nonzero if any of the files read were the standard input. */
int have_read_stdin;
/* Calculate and print the checksum and length in bytes
of file FILE, or of the standard input if FILE is "-".
If PRINT_NAME is nonzero, print FILE next to the checksum and size.
Return 0 if successful, -1 if an error occurs. */
int
cksum (file, print_name)
char *file;
int print_name;
{
unsigned char buf[BUFLEN];
unsigned long crc = 0;
long length = 0;
long bytes_read;
register FILE *fp;
if (!strcmp (file, "-"))
{
fp = stdin;
have_read_stdin = 1;
}
else
{
fp = fopen (file, "r");
if (fp == NULL)
{
error (0, errno, "%s", file);
return -1;
}
}
while ((bytes_read = fread (buf, 1, BUFLEN, fp)) > 0)
{
unsigned char *cp = buf;
length += bytes_read;
while (bytes_read--)
crc = (crc << 8) ^ crctab[((crc >> 24) ^ *(cp++)) & 0xFF];
}
if (ferror (fp))
{
error (0, errno, "%s", file);
if (strcmp (file, "-"))
fclose (fp);
return -1;
}
if (strcmp (file, "-") && fclose (fp) == EOF)
{
error (0, errno, "%s", file);
return -1;
}
bytes_read = length;
while (bytes_read > 0)
{
crc = (crc << 8) ^ crctab[((crc >> 24) ^ bytes_read) & 0xFF];
bytes_read >>= 8;
}
crc = ~crc & 0xFFFFFFFF;
printf ("%10lu %8ld", crc, length);
if (print_name)
printf (" %s", file);
putchar ('\n');
return 0;
}
void
main (argc, argv)
int argc;
char **argv;
{
int errors = 0;
program_name = argv[0];
have_read_stdin = 0;
if (argc == 1)
{
if (cksum ("-", 0) < 0)
errors = 1;
}
else
{
int optind;
for (optind = 1; optind < argc; ++optind)
if (cksum (argv[optind], 1) < 0)
errors = 1;
}
if (have_read_stdin && fclose (stdin) == EOF)
error (1, errno, "-");
exit (errors);
}
#endif /* !CRCTAB */

221
src/comm.c Normal file
View File

@@ -0,0 +1,221 @@
/* comm -- compare two sorted files line by line.
Copyright (C) 1986, 1990, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Richard Stallman and David MacKenzie. */
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
#include "linebuffer.h"
#define min(x, y) ((x) < (y) ? (x) : (y))
/* If nonzero, print lines that are found only in file 1. */
int only_file_1;
/* If nonzero, print lines that are found only in file 2. */
int only_file_2;
/* If nonzero, print lines that are found in both files. */
int both;
/* The name this program was run with. */
char *program_name;
int compare_files ();
void error ();
void writeline ();
void usage ();
void
main (argc, argv)
int argc;
char *argv[];
{
int c;
program_name = argv[0];
only_file_1 = 1;
only_file_2 = 1;
both = 1;
while ((c = getopt (argc, argv, "123")) != EOF)
switch (c)
{
case '1':
only_file_1 = 0;
break;
case '2':
only_file_2 = 0;
break;
case '3':
both = 0;
break;
default:
usage ();
}
if (optind + 2 != argc)
usage ();
exit (compare_files (argv + optind));
}
/* Compare INFILES[0] and INFILES[1].
If either is "-", use the standard input for that file.
Assume that each input file is sorted;
merge them and output the result.
Return 0 if successful, 1 if any errors occur. */
int
compare_files (infiles)
char **infiles;
{
/* For each file, we have one linebuffer in lb1. */
struct linebuffer lb1[2];
/* thisline[i] points to the linebuffer holding the next available line
in file i, or is NULL if there are no lines left in that file. */
struct linebuffer *thisline[2];
/* streams[i] holds the input stream for file i. */
FILE *streams[2];
int i, ret = 0;
/* Initialize the storage. */
for (i = 0; i < 2; i++)
{
initbuffer (&lb1[i]);
thisline[i] = &lb1[i];
streams[i] = strcmp (infiles[i], "-")
? fopen (infiles[i], "r") : stdin;
if (!streams[i])
{
error (0, errno, "%s", infiles[i]);
return 1;
}
thisline[i] = readline (thisline[i], streams[i]);
}
while (thisline[0] || thisline[1])
{
int order;
/* Compare the next available lines of the two files. */
if (!thisline[0])
order = 1;
else if (!thisline[1])
order = -1;
else
{
/* Cannot use bcmp -- it only returns a boolean value. */
order = memcmp (thisline[0]->buffer, thisline[1]->buffer,
min (thisline[0]->length, thisline[1]->length));
if (order == 0)
order = thisline[0]->length - thisline[1]->length;
}
/* Output the line that is lesser. */
if (order == 0)
writeline (thisline[1], stdout, 3);
else if (order > 0)
writeline (thisline[1], stdout, 2);
else
writeline (thisline[0], stdout, 1);
/* Step the file the line came from.
If the files match, step both files. */
if (order >= 0)
thisline[1] = readline (thisline[1], streams[1]);
if (order <= 0)
thisline[0] = readline (thisline[0], streams[0]);
}
/* Free all storage and close all input streams. */
for (i = 0; i < 2; i++)
{
free (lb1[i].buffer);
if (ferror (streams[i]) || fclose (streams[i]) == EOF)
{
error (0, errno, "%s", infiles[i]);
ret = 1;
}
}
if (ferror (stdout) || fclose (stdout) == EOF)
{
error (0, errno, "write error");
ret = 1;
}
return ret;
}
/* Output the line in linebuffer LINE to stream STREAM
provided the switches say it should be output.
CLASS is 1 for a line found only in file 1,
2 for a line only in file 2, 3 for a line in both. */
void
writeline (line, stream, class)
struct linebuffer *line;
FILE *stream;
int class;
{
switch (class)
{
case 1:
if (!only_file_1)
return;
break;
case 2:
if (!only_file_2)
return;
/* Skip the tab stop for case 1, if we are printing case 1. */
if (only_file_1)
putc ('\t', stream);
break;
case 3:
if (!both)
return;
/* Skip the tab stop for case 1, if we are printing case 1. */
if (only_file_1)
putc ('\t', stream);
/* Skip the tab stop for case 2, if we are printing case 2. */
if (only_file_2)
putc ('\t', stream);
break;
}
fwrite (line->buffer, sizeof (char), line->length, stream);
putc ('\n', stream);
}
void
usage ()
{
fprintf (stderr, "Usage: %s [-123] file1 file2\n", program_name);
exit (1);
}

1308
src/csplit.c Normal file

File diff suppressed because it is too large Load Diff

586
src/cut.c Normal file
View File

@@ -0,0 +1,586 @@
/* cut - remove parts of lines of files
Copyright (C) 1984 by David M. Ihnat
This program is a total rewrite of the Bell Laboratories Unix(Tm)
command of the same name, as of System V. It contains no proprietary
code, and therefore may be used without violation of any proprietary
agreements whatsoever. However, you will notice that the program is
copyrighted by me. This is to assure the program does *not* fall
into the public domain. Thus, I may specify just what I am now:
This program may be freely copied and distributed, provided this notice
remains; it may not be sold for profit without express written consent of
the author.
Please note that I recreated the behavior of the Unix(Tm) 'cut' command
as faithfully as possible; however, I haven't run a full set of regression
tests. Thus, the user of this program accepts full responsibility for any
effects or loss; in particular, the author is not responsible for any losses,
explicit or incidental, that may be incurred through use of this program.
I ask that any bugs (and, if possible, fixes) be reported to me when
possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us
POSIX changes, bug fixes, long-named options, and cleanup
by David MacKenzie <djm@ai.mit.edu>.
Options:
--bytes=byte-list
-b byte-list Print only the bytes in positions listed
in BYTE-LIST.
Tabs and backspaces are treated like any
other character; they take up 1 byte.
--characters=character-list
-c character-list Print only characters in positions listed
in CHARACTER-LIST.
The same as -b for now, but
internationalization will change that.
Tabs and backspaces are treated like any
other character; they take up 1 character.
--fields=field-list
-f field-list Print only the fields listed in FIELD-LIST.
Fields are separated by a TAB by default.
--delimiter=delim
-d delim For -f, fields are separated by the first
character in DELIM instead of TAB.
-n Do not split multibyte chars (no-op for now).
--only-delimited
-s For -f, do not print lines that do not contain
the field separator character.
The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers
or ranges separated by commas. The first byte, character, and field
are numbered 1.
A FILE of `-' means standard input. */
#define _GNU_SOURCE
#include <ctype.h>
#ifndef isblank
#define isblank(c) ((c) == ' ' || (c) == '\t')
#endif
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
#ifdef isascii
#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
#else
#define ISDIGIT(c) (isdigit ((c)))
#endif
char *xmalloc ();
char *xrealloc ();
int set_fields ();
int cut_file ();
void cut_stream ();
void cut_bytes ();
void cut_fields ();
void enlarge_line ();
void error ();
void invalid_list ();
void usage ();
/* The number of elements allocated for the input line
and the byte or field number.
Enlarged as necessary. */
int line_size;
/* Processed output buffer. */
char *outbuf;
/* Where to save next char to output. */
char *outbufptr;
/* Raw line buffer for field mode. */
char *inbuf;
/* Where to save next input char. */
char *inbufptr;
/* What can be done about a byte or field. */
enum field_action
{
field_omit,
field_output
};
/* In byte mode, which bytes to output.
In field mode, which `delim'-separated fields to output.
Both bytes and fields are numbered starting with 1,
so the first element of `fields' is unused. */
enum field_action *fields;
enum operating_mode
{
undefined_mode,
/* Output characters that are in the given bytes. */
byte_mode,
/* Output the given delimeter-separated fields. */
field_mode
};
enum operating_mode operating_mode;
/* If nonzero,
for field mode, do not output lines containing no delimeter characters. */
int delimited_lines_only;
/* The delimeter character for field mode. */
unsigned char delim;
/* Nonzero if we have ever read standard input. */
int have_read_stdin;
/* The name this program was run with. */
char *program_name;
struct option longopts[] =
{
{"bytes", 1, 0, 'b'},
{"characters", 1, 0, 'c'},
{"fields", 1, 0, 'f'},
{"delimiter", 1, 0, 'd'},
{"only-delimited", 0, 0, 's'},
{0, 0, 0, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
int optc, exit_status = 0;
program_name = argv[0];
line_size = 512;
operating_mode = undefined_mode;
delimited_lines_only = 0;
delim = '\0';
have_read_stdin = 0;
fields = (enum field_action *)
xmalloc (line_size * sizeof (enum field_action));
outbuf = (char *) xmalloc (line_size);
inbuf = (char *) xmalloc (line_size);
for (optc = 0; optc < line_size; optc++)
fields[optc] = field_omit;
while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0))
!= EOF)
{
switch (optc)
{
case 'b':
case 'c':
/* Build the byte list. */
if (operating_mode != undefined_mode)
usage ();
operating_mode = byte_mode;
if (set_fields (optarg) == 0)
error (2, 0, "no fields given");
break;
case 'f':
/* Build the field list. */
if (operating_mode != undefined_mode)
usage ();
operating_mode = field_mode;
if (set_fields (optarg) == 0)
error (2, 0, "no fields given");
break;
case 'd':
/* New delimiter. */
if (optarg[0] == '\0')
error (2, 0, "no delimiter given");
if (optarg[1] != '\0')
error (2, 0, "delimiter must be a single character");
delim = optarg[0];
break;
case 'n':
break;
case 's':
delimited_lines_only++;
break;
default:
usage ();
}
}
if (operating_mode == undefined_mode)
usage ();
if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode)
usage ();
if (delim == '\0')
delim = '\t';
if (optind == argc)
exit_status |= cut_file ("-");
else
for (; optind < argc; optind++)
exit_status |= cut_file (argv[optind]);
if (have_read_stdin && fclose (stdin) == EOF)
{
error (0, errno, "-");
exit_status = 1;
}
if (ferror (stdout) || fclose (stdout) == EOF)
error (1, 0, "write error");
exit (exit_status);
}
/* Select for printing the positions in `fields' that are listed in
byte or field specification FIELDSTR. FIELDSTR should be
composed of one or more numbers or ranges of numbers, separated by
blanks or commas. Incomplete ranges may be given: `-m' means
`1-m'; `n-' means `n' through end of line or last field.
Return the number of fields selected. */
int
set_fields (fieldstr)
char *fieldstr;
{
int initial = 1; /* Value of first number in a range. */
int dash_found = 0; /* Nonzero if a '-' is found in this field. */
int value = 0; /* If nonzero, a number being accumulated. */
int fields_selected = 0; /* Number of fields selected so far. */
/* If nonzero, index of first field in a range that goes to end of line. */
int eol_range_start = 0;
for (;;)
{
if (*fieldstr == '-')
{
/* Starting a range. */
if (dash_found)
invalid_list ();
dash_found++;
fieldstr++;
if (value)
{
if (value >= line_size)
enlarge_line (value);
initial = value;
value = 0;
}
else
initial = 1;
}
else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
{
/* Ending the string, or this field/byte sublist. */
if (dash_found)
{
dash_found = 0;
/* A range. Possibilites: -n, m-n, n-.
In any case, `initial' contains the start of the range. */
if (value == 0)
{
/* `n-'. From `initial' to end of line. */
eol_range_start = initial;
fields_selected++;
}
else
{
/* `m-n' or `-n' (1-n). */
if (value < initial)
invalid_list ();
if (value >= line_size)
enlarge_line (value);
/* Is there already a range going to end of line? */
if (eol_range_start != 0)
{
/* Yes. Is the new sequence already contained
in the old one? If so, no processing is
necessary. */
if (initial < eol_range_start)
{
/* No, the new sequence starts before the
old. Does the old range going to end of line
extend into the new range? */
if (eol_range_start < value)
/* Yes. Simply move the end of line marker. */
eol_range_start = initial;
else
{
/* No. A simple range, before and disjoint from
the range going to end of line. Fill it. */
for (; initial <= value; initial++)
fields[initial] = field_output;
}
/* In any case, some fields were selected. */
fields_selected++;
}
}
else
{
/* There is no range going to end of line. */
for (; initial <= value; initial++)
fields[initial] = field_output;
fields_selected++;
}
value = 0;
}
}
else if (value != 0)
{
/* A simple field number, not a range. */
if (value >= line_size)
enlarge_line (value);
fields[value] = field_output;
value = 0;
fields_selected++;
}
if (*fieldstr == '\0')
{
/* If there was a range going to end of line, fill the
array from the end of line point. */
if (eol_range_start)
for (initial = eol_range_start; initial < line_size; initial++)
fields[initial] = field_output;
return fields_selected;
}
fieldstr++;
}
else if (ISDIGIT (*fieldstr))
{
value = 10 * value + *fieldstr - '0';
fieldstr++;
}
else
invalid_list ();
}
}
/* Process file FILE to standard output.
Return 0 if successful, 1 if not. */
int
cut_file (file)
char *file;
{
FILE *stream;
if (!strcmp (file, "-"))
{
have_read_stdin = 1;
stream = stdin;
}
else
{
stream = fopen (file, "r");
if (stream == NULL)
{
error (0, errno, "%s", file);
return 1;
}
}
cut_stream (stream);
if (ferror (stream))
{
error (0, errno, "%s", file);
return 1;
}
if (!strcmp (file, "-"))
clearerr (stream); /* Also clear EOF. */
else if (fclose (stream) == EOF)
{
error (0, errno, "%s", file);
return 1;
}
return 0;
}
void
cut_stream (stream)
FILE *stream;
{
if (operating_mode == byte_mode)
cut_bytes (stream);
else
cut_fields (stream);
}
/* Print the file open for reading on stream STREAM
with the bytes marked `field_omit' in `fields' removed from each line. */
void
cut_bytes (stream)
FILE *stream;
{
register int c; /* Each character from the file. */
int doneflag = 0; /* Nonzero if EOF reached. */
int char_count; /* Number of chars in the line so far. */
while (doneflag == 0)
{
/* Start processing a line. */
outbufptr = outbuf;
char_count = 0;
do
{
c = getc (stream);
if (c == EOF)
{
doneflag++;
break;
}
/* If this character is to be sent, stow it in the outbuffer. */
if (++char_count == line_size - 1)
enlarge_line (char_count);
if (fields[char_count] == field_output || c == '\n')
*outbufptr++ = c;
}
while (c != '\n');
if (char_count)
fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
}
}
/* Print the file open for reading on stream STREAM
with the fields marked `field_omit' in `fields' removed from each line.
All characters are initially stowed in the raw input buffer, until
at least one field has been found. */
void
cut_fields (stream)
FILE *stream;
{
register int c; /* Each character from the file. */
int doneflag = 0; /* Nonzero if EOF reached. */
int char_count; /* Number of chars in line before any delim. */
int fieldfound; /* Nonzero if any fields to print found. */
int curr_field; /* Current index in `fields'. */
while (doneflag == 0)
{
char_count = 0;
fieldfound = 0;
curr_field = 1;
outbufptr = outbuf;
inbufptr = inbuf;
do
{
c = getc (stream);
if (c == EOF)
{
doneflag++;
break;
}
if (fields[curr_field] == field_output && c != '\n')
{
/* Working on a field. It, and its terminating
delimiter, go only into the processed buffer. */
fieldfound = 1;
if (outbufptr - outbuf == line_size - 2)
enlarge_line (outbufptr - outbuf);
*outbufptr++ = c;
}
else if (fieldfound == 0)
{
if (++char_count == line_size - 1)
enlarge_line (char_count);
*inbufptr++ = c;
}
if (c == delim && ++curr_field == line_size - 1)
enlarge_line (curr_field);
}
while (c != '\n');
if (fieldfound)
{
/* Something was found. Print it. */
if (outbufptr[-1] == delim)
--outbufptr; /* Suppress trailing delimiter. */
fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
if (c == '\n')
putc (c, stdout);
}
else if (!delimited_lines_only && char_count)
/* A line with some characters, no delimiters, and no
suppression. Print it. */
fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout);
}
}
/* Extend the buffers to accomodate at least NEW_SIZE characters. */
void
enlarge_line (new_size)
int new_size;
{
char *newp;
int i;
new_size += 256; /* Leave some room to grow. */
fields = (enum field_action *)
xrealloc (fields, new_size * sizeof (enum field_action));
newp = (char *) xrealloc (outbuf, new_size);
outbufptr += newp - outbuf;
outbuf = newp;
newp = (char *) xrealloc (inbuf, new_size);
inbufptr += newp - inbuf;
inbuf = newp;
for (i = line_size; i < new_size; i++)
fields[i] = field_omit;
line_size = new_size;
}
void
invalid_list ()
{
error (2, 0, "invalid byte or field list");
}
void
usage ()
{
fprintf (stderr, "\
Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\
%s {-c character-list,--characters=character-list} [file...]\n\
%s {-f field-list,--fields=field-list} [-d delim] [-s]\n\
[--delimiter=delim] [--only-delimited] [file...]\n",
program_name, program_name, program_name);
exit (2);
}

377
src/expand.c Normal file
View File

@@ -0,0 +1,377 @@
/* expand - convert tabs to spaces
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* By default, convert all tabs to spaces.
Preserves backspace characters in the output; they decrement the
column count for tab calculations.
The default action is equivalent to -8.
Options:
--tabs=tab1[,tab2[,...]]
-t tab1[,tab2[,...]]
-tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
spaces apart instead of the default 8. Otherwise,
set the tabs at columns tab1, tab2, etc. (numbered from
0); replace any tabs beyond the tabstops given with
single spaces.
--initial
-i Only convert initial tabs on each line to spaces.
David MacKenzie <djm@ai.mit.edu> */
#define _GNU_SOURCE
#include <ctype.h>
#ifndef isblank
#define isblank(c) ((c) == ' ' || (c) == '\t')
#endif
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
#ifdef isascii
#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
#else
#define ISDIGIT(c) (isdigit ((c)))
#endif
/* The number of bytes added at a time to the amount of memory
allocated for the output line. */
#define OUTPUT_BLOCK 256
/* The number of bytes added at a time to the amount of memory
allocated for the list of tabstops. */
#define TABLIST_BLOCK 256
char *xmalloc ();
char *xrealloc ();
void error ();
FILE *next_file ();
void add_tabstop ();
void expand ();
void parse_tabstops ();
void usage ();
void validate_tabstops ();
/* If nonzero, convert blanks even after nonblank characters have been
read on the line. */
int convert_entire_line;
/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
int tab_size;
/* Array of the explicit column numbers of the tab stops;
after `tab_list' is exhausted, each additional tab is replaced
by a space. The first column is column 0. */
int *tab_list;
/* The index of the first invalid element of `tab_list',
where the next element can be added. */
int first_free_tab;
/* Null-terminated array of input filenames. */
char **file_list;
/* Default for `file_list' if no files are given on the command line. */
char *stdin_argv[] =
{
"-", NULL
};
/* Nonzero if we have ever read standard input. */
int have_read_stdin;
/* Status to return to the system. */
int exit_status;
/* The name this program was run with. */
char *program_name;
struct option longopts[] =
{
{"tabs", 1, NULL, 't'},
{"initial", 0, NULL, 'i'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
int tabval = -1; /* Value of tabstop being read, or -1. */
int c; /* Option character. */
have_read_stdin = 0;
exit_status = 0;
convert_entire_line = 1;
tab_list = NULL;
first_free_tab = 0;
program_name = argv[0];
while ((c = getopt_long (argc, argv, "it:,0123456789", longopts, (int *) 0))
!= EOF)
{
switch (c)
{
case '?':
usage ();
case 'i':
convert_entire_line = 0;
break;
case 't':
parse_tabstops (optarg);
break;
case ',':
add_tabstop (tabval);
tabval = -1;
break;
default:
if (tabval == -1)
tabval = 0;
tabval = tabval * 10 + c - '0';
break;
}
}
add_tabstop (tabval);
validate_tabstops (tab_list, first_free_tab);
if (first_free_tab == 0)
tab_size = 8;
else if (first_free_tab == 1)
tab_size = tab_list[0];
else
tab_size = 0;
if (optind == argc)
file_list = stdin_argv;
else
file_list = &argv[optind];
expand ();
if (have_read_stdin && fclose (stdin) == EOF)
error (1, errno, "-");
if (ferror (stdout) || fclose (stdout) == EOF)
error (1, 0, "write error");
exit (exit_status);
}
/* Add the comma or blank separated list of tabstops STOPS
to the list of tabstops. */
void
parse_tabstops (stops)
char *stops;
{
int tabval = -1;
for (; *stops; stops++)
{
if (*stops == ',' || isblank (*stops))
{
add_tabstop (tabval);
tabval = -1;
}
else if (ISDIGIT (*stops))
{
if (tabval == -1)
tabval = 0;
tabval = tabval * 10 + *stops - '0';
}
else
error (1, 0, "tab size contains an invalid character");
}
add_tabstop (tabval);
}
/* Add tab stop TABVAL to the end of `tab_list', except
if TABVAL is -1, do nothing. */
void
add_tabstop (tabval)
int tabval;
{
if (tabval == -1)
return;
if (first_free_tab % TABLIST_BLOCK == 0)
tab_list = (int *) xrealloc (tab_list, first_free_tab + TABLIST_BLOCK);
tab_list[first_free_tab++] = tabval;
}
/* Check that the list of tabstops TABS, with ENTRIES entries,
contains only nonzero, ascending values. */
void
validate_tabstops (tabs, entries)
int *tabs;
int entries;
{
int prev_tab = 0;
int i;
for (i = 0; i < entries; i++)
{
if (tabs[i] == 0)
error (1, 0, "tab size cannot be 0");
if (tabs[i] <= prev_tab)
error (1, 0, "tab sizes must be ascending");
prev_tab = tabs[i];
}
}
/* Change tabs to spaces, writing to stdout.
Read each file in `file_list', in order. */
void
expand ()
{
FILE *fp; /* Input stream. */
int c; /* Each input character. */
int tab_index = 0; /* Index in `tab_list' of next tabstop. */
int column = 0; /* Column on screen of the next char. */
int next_tab_column; /* Column the next tab stop is on. */
int convert = 1; /* If nonzero, perform translations. */
fp = next_file ((FILE *) NULL);
for (;;)
{
c = getc (fp);
if (c == EOF)
{
fp = next_file (fp);
if (fp == NULL)
break; /* No more files. */
else
continue;
}
if (c == '\n')
{
putchar (c);
tab_index = 0;
column = 0;
convert = 1;
}
else if (c == '\t' && convert)
{
if (tab_size == 0)
{
/* Do not let tab_index == first_free_tab;
stop when it is 1 less. */
while (tab_index < first_free_tab - 1
&& column >= tab_list[tab_index])
tab_index++;
next_tab_column = tab_list[tab_index];
if (tab_index < first_free_tab - 1)
tab_index++;
if (column >= next_tab_column)
next_tab_column = column + 1; /* Ran out of tab stops. */
}
else
{
next_tab_column = column + tab_size - column % tab_size;
}
while (column < next_tab_column)
{
putchar (' ');
++column;
}
}
else
{
if (convert)
{
if (c == '\b')
{
if (column > 0)
--column;
}
else
{
++column;
if (convert_entire_line == 0)
convert = 0;
}
}
putchar (c);
}
}
}
/* Close the old stream pointer FP if it is non-NULL,
and return a new one opened to read the next input file.
Open a filename of `-' as the standard input.
Return NULL if there are no more input files. */
FILE *
next_file (fp)
FILE *fp;
{
static char *prev_file;
char *file;
if (fp)
{
if (ferror (fp))
{
error (0, errno, "%s", prev_file);
exit_status = 1;
}
if (fp == stdin)
clearerr (fp); /* Also clear EOF. */
else if (fclose (fp) == EOF)
{
error (0, errno, "%s", prev_file);
exit_status = 1;
}
}
while ((file = *file_list++) != NULL)
{
if (file[0] == '-' && file[1] == '\0')
{
have_read_stdin = 1;
prev_file = file;
return stdin;
}
fp = fopen (file, "r");
if (fp)
{
prev_file = file;
return fp;
}
error (0, errno, "%s", file);
exit_status = 1;
}
return NULL;
}
void
usage ()
{
fprintf (stderr, "\
Usage: %s [-tab1[,tab2[,...]]] [-t tab1[,tab2[,...]]] [-i]\n\
[--tabs=tab1[,tab2[,...]]] [--initial] [file...]\n",
program_name);
exit (1);
}

250
src/fold.c Normal file
View File

@@ -0,0 +1,250 @@
/* fold -- wrap each input line to fit in specified width.
Copyright (C) 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by David MacKenzie. */
#define _GNU_SOURCE
#include <ctype.h>
#ifndef isblank
#define isblank(c) ((c) == ' ' || (c) == '\t')
#endif
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
char *xrealloc ();
int adjust_column ();
int fold_file ();
void error ();
/* If nonzero, try to break on whitespace. */
int break_spaces;
/* If nonzero, count bytes, not column positions. */
int count_bytes;
/* If nonzero, at least one of the files we read was standard input. */
int have_read_stdin;
/* The name this program was run with. */
char *program_name;
struct option longopts[] =
{
{"bytes", 0, NULL, 'b'},
{"spaces", 0, NULL, 's'},
{"width", 1, NULL, 'w'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
int width = 80;
int i;
int optc;
int errs = 0;
program_name = argv[0];
break_spaces = count_bytes = have_read_stdin = 0;
while ((optc = getopt_long (argc, argv, "bsw:", longopts, (int *) 0))
!= EOF)
{
switch (optc)
{
case 'b': /* Count bytes rather than columns. */
count_bytes = 1;
break;
case 's': /* Break at word boundaries. */
break_spaces = 1;
break;
case 'w': /* Line width. */
width = atoi (optarg);
if (width < 1)
error (1, 0, "%s: invalid line width", optarg);
break;
default:
fprintf (stderr, "\
Usage: %s [-bs] [-w width] [--bytes] [--spaces] [--width=width] [file...]\n",
argv[0]);
exit (1);
}
}
if (argc == optind)
errs |= fold_file ("-", width);
else
for (i = optind; i < argc; i++)
errs |= fold_file (argv[i], width);
if (have_read_stdin && fclose (stdin) == EOF)
error (1, errno, "-");
if (fclose (stdout) == EOF)
error (1, errno, "write error");
exit (errs);
}
/* Fold file FILENAME, or standard input if FILENAME is "-",
to stdout, with maximum line length WIDTH.
Return 0 if successful, 1 if an error occurs. */
int
fold_file (filename, width)
char *filename;
int width;
{
FILE *istream;
register int c;
int column = 0; /* Screen column where next char will go. */
int offset_out = 0; /* Index in `line_out' for next char. */
static char *line_out = NULL;
static size_t allocated_out = 0;
if (!strcmp (filename, "-"))
{
istream = stdin;
have_read_stdin = 1;
}
else
istream = fopen (filename, "r");
if (istream == NULL)
{
error (0, errno, "%s", filename);
return 1;
}
while ((c = getc (istream)) != EOF)
{
if (offset_out + 1 >= allocated_out)
{
allocated_out += 1024;
line_out = xrealloc (line_out, allocated_out);
}
if (c == '\n')
{
line_out[offset_out++] = c;
fwrite (line_out, sizeof (char), offset_out, stdout);
column = offset_out = 0;
continue;
}
rescan:
column = adjust_column (column, c);
if (column > width)
{
/* This character would make the line too long.
Print the line plus a newline, and make this character
start the next line. */
if (break_spaces)
{
/* Look for the last blank. */
int logical_end;
for (logical_end = offset_out - 1; logical_end >= 0;
logical_end--)
if (isblank (line_out[logical_end]))
break;
if (logical_end >= 0)
{
int i;
/* Found a blank. Don't output the part after it. */
logical_end++;
fwrite (line_out, sizeof (char), logical_end, stdout);
putchar ('\n');
/* Move the remainder to the beginning of the next line.
The areas being copied here might overlap. */
bcopy (line_out + logical_end, line_out,
offset_out - logical_end);
offset_out -= logical_end;
for (column = i = 0; i < offset_out; i++)
column = adjust_column (column, line_out[i]);
goto rescan;
}
}
line_out[offset_out++] = '\n';
fwrite (line_out, sizeof (char), offset_out, stdout);
column = offset_out = 0;
goto rescan;
}
line_out[offset_out++] = c;
}
if (offset_out)
fwrite (line_out, sizeof (char), offset_out, stdout);
if (ferror (istream))
{
error (0, errno, "%s", filename);
if (strcmp (filename, "-"))
fclose (istream);
return 1;
}
if (strcmp (filename, "-") && fclose (istream) == EOF)
{
error (0, errno, "%s", filename);
return 1;
}
if (ferror (stdout))
{
error (0, errno, "write error");
return 1;
}
return 0;
}
/* Assuming the current column is COLUMN, return the column that
printing C will move the cursor to.
The first column is 0. */
int
adjust_column (column, c)
int column;
char c;
{
if (!count_bytes)
{
if (c == '\b')
{
if (column > 0)
column--;
}
else if (c == '\r')
column = 0;
else if (c == '\t')
column = column + 8 - column % 8;
else /* if (isprint (c)) */
column++;
}
else
column++;
return column;
}

380
src/head.c Normal file
View File

@@ -0,0 +1,380 @@
/* head -- output first part of file(s)
Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Options:
-b Print first N 512-byte blocks.
-c, --bytes=N[bkm] Print first N bytes
[or 512-byte blocks, kilobytes, or megabytes].
-k Print first N kilobytes.
-N, -l, -n, --lines=N Print first N lines.
-m Print first N megabytes.
-q, --quiet, --silent Never print filename headers.
-v, --verbose Always print filename headers.
Reads from standard input if no files are given or when a filename of
``-'' is encountered.
By default, filename headers are printed only if more than one file
is given.
By default, prints the first 10 lines (head -n 10).
David MacKenzie <djm@ai.mit.edu> */
#include <stdio.h>
#include <getopt.h>
#include <ctype.h>
#include <sys/types.h>
#include "system.h"
#ifdef isascii
#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
#else
#define ISDIGIT(c) (isdigit ((c)))
#endif
/* Number of lines/chars/blocks to head. */
#define DEFAULT_NUMBER 10
/* Size of atomic reads. */
#define BUFSIZE (512 * 8)
/* Number of bytes per item we are printing.
If 0, head in lines. */
int unit_size;
/* If nonzero, print filename headers. */
int print_headers;
/* When to print the filename banners. */
enum header_mode
{
multiple_files, always, never
};
int head ();
int head_bytes ();
int head_file ();
int head_lines ();
long atou ();
void error ();
void parse_unit ();
void usage ();
void write_header ();
void xwrite ();
/* The name this program was run with. */
char *program_name;
/* Have we ever read standard input? */
int have_read_stdin;
struct option long_options[] =
{
{"bytes", 1, NULL, 'c'},
{"lines", 1, NULL, 'n'},
{"quiet", 0, NULL, 'q'},
{"silent", 0, NULL, 'q'},
{"verbose", 0, NULL, 'v'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
enum header_mode header_mode = multiple_files;
int exit_status = 0;
long number = -1; /* Number of items to print (-1 if undef.). */
int c; /* Option character. */
program_name = argv[0];
have_read_stdin = 0;
unit_size = 0;
print_headers = 0;
if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
{
/* Old option syntax; a dash, one or more digits, and one or
more option letters. Move past the number. */
for (number = 0, ++argv[1]; ISDIGIT (*argv[1]); ++argv[1])
number = number * 10 + *argv[1] - '0';
/* Parse any appended option letters. */
while (*argv[1])
{
switch (*argv[1])
{
case 'b':
unit_size = 512;
break;
case 'c':
unit_size = 1;
break;
case 'k':
unit_size = 1024;
break;
case 'l':
unit_size = 0;
break;
case 'm':
unit_size = 1048576;
break;
case 'q':
header_mode = never;
break;
case 'v':
header_mode = always;
break;
default:
error (0, 0, "unrecognized option `-%c'", *argv[1]);
usage ();
}
++argv[1];
}
/* Make the options we just parsed invisible to getopt. */
argv[1] = argv[0];
argv++;
argc--;
}
while ((c = getopt_long (argc, argv, "c:n:qv", long_options, (int *) 0))
!= EOF)
{
switch (c)
{
case 'c':
unit_size = 1;
parse_unit (optarg);
goto getnum;
case 'n':
unit_size = 0;
getnum:
number = atou (optarg);
if (number == -1)
error (1, 0, "invalid number `%s'", optarg);
break;
case 'q':
header_mode = never;
break;
case 'v':
header_mode = always;
break;
default:
usage ();
}
}
if (number == -1)
number = DEFAULT_NUMBER;
if (unit_size > 1)
number *= unit_size;
if (header_mode == always
|| (header_mode == multiple_files && optind < argc - 1))
print_headers = 1;
if (optind == argc)
exit_status |= head_file ("-", number);
for (; optind < argc; ++optind)
exit_status |= head_file (argv[optind], number);
if (have_read_stdin && close (0) < 0)
error (1, errno, "-");
if (close (1) < 0)
error (1, errno, "write error");
exit (exit_status);
}
int
head_file (filename, number)
char *filename;
long number;
{
int fd;
if (!strcmp (filename, "-"))
{
have_read_stdin = 1;
filename = "standard input";
if (print_headers)
write_header (filename);
return head (filename, 0, number);
}
else
{
fd = open (filename, O_RDONLY);
if (fd >= 0)
{
int errors;
if (print_headers)
write_header (filename);
errors = head (filename, fd, number);
if (close (fd) == 0)
return errors;
}
error (0, errno, "%s", filename);
return 1;
}
}
void
write_header (filename)
char *filename;
{
static int first_file = 1;
if (first_file)
{
xwrite (1, "==> ", 4);
first_file = 0;
}
else
xwrite (1, "\n==> ", 5);
xwrite (1, filename, strlen (filename));
xwrite (1, " <==\n", 5);
}
int
head (filename, fd, number)
char *filename;
int fd;
long number;
{
if (unit_size)
return head_bytes (filename, fd, number);
else
return head_lines (filename, fd, number);
}
int
head_bytes (filename, fd, bytes_to_write)
char *filename;
int fd;
long bytes_to_write;
{
char buffer[BUFSIZE];
int bytes_read;
while (bytes_to_write)
{
bytes_read = read (fd, buffer, BUFSIZE);
if (bytes_read == -1)
{
error (0, errno, "%s", filename);
return 1;
}
if (bytes_read == 0)
break;
if (bytes_read > bytes_to_write)
bytes_read = bytes_to_write;
xwrite (1, buffer, bytes_read);
bytes_to_write -= bytes_read;
}
return 0;
}
int
head_lines (filename, fd, lines_to_write)
char *filename;
int fd;
long lines_to_write;
{
char buffer[BUFSIZE];
int bytes_read;
int bytes_to_write;
while (lines_to_write)
{
bytes_read = read (fd, buffer, BUFSIZE);
if (bytes_read == -1)
{
error (0, errno, "%s", filename);
return 1;
}
if (bytes_read == 0)
break;
bytes_to_write = 0;
while (bytes_to_write < bytes_read)
if (buffer[bytes_to_write++] == '\n' && --lines_to_write == 0)
break;
xwrite (1, buffer, bytes_to_write);
}
return 0;
}
void
parse_unit (str)
char *str;
{
int arglen = strlen (str);
if (arglen == 0)
return;
switch (str[arglen - 1])
{
case 'b':
unit_size = 512;
str[arglen - 1] = '\0';
break;
case 'k':
unit_size = 1024;
str[arglen - 1] = '\0';
break;
case 'm':
unit_size = 1048576;
str[arglen - 1] = '\0';
break;
}
}
/* Convert STR, a string of ASCII digits, into an unsigned integer.
Return -1 if STR does not represent a valid unsigned integer. */
long
atou (str)
char *str;
{
int value;
for (value = 0; ISDIGIT (*str); ++str)
value = value * 10 + *str - '0';
return *str ? -1 : value;
}
void
usage ()
{
fprintf (stderr, "\
Usage: %s [-c N[bkm]] [-n N] [-qv] [--bytes=N[bkm]] [--lines=N]\n\
[--quiet] [--silent] [--verbose] [file...]\n\
%s [-Nbcklmqv] [file...]\n", program_name, program_name);
exit (1);
}

690
src/join.c Normal file
View File

@@ -0,0 +1,690 @@
/* join - join lines of two files on a common field
Copyright (C) 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Written by Mike Haertel, mike@gnu.ai.mit.edu. */
#define _GNU_SOURCE
#include <ctype.h>
#ifndef isblank
#define isblank(c) ((c) == ' ' || (c) == '\t')
#endif
#include <stdio.h>
#include <sys/types.h>
#include <getopt.h>
#include "system.h"
#ifdef isascii
#define ISSPACE(c) (isascii(c) && isspace(c))
#define ISDIGIT(c) (isascii(c) && isdigit(c))
#else
#define ISSPACE(c) isspace(c)
#define ISDIGIT(c) isdigit(c)
#endif
char *xmalloc ();
char *xrealloc ();
void error ();
static void usage ();
#define min(A, B) ((A) < (B) ? (A) : (B))
/* An element of the list describing the format of each
output line. */
struct outlist
{
int file; /* File to take field from (1 or 2). */
int field; /* Field number to print. */
struct outlist *next;
};
/* A field of a line. */
struct field
{
char *beg; /* First character in field. */
char *lim; /* Character after last character in field. */
};
/* A line read from an input file. Newlines are not stored. */
struct line
{
char *beg; /* First character in line. */
char *lim; /* Character after last character in line. */
int nfields; /* Number of elements in `fields'. */
struct field *fields;
};
/* One or more consecutive lines read from a file that all have the
same join field value. */
struct seq
{
int count; /* Elements used in `lines'. */
int alloc; /* Elements allocated in `lines'. */
struct line *lines;
};
/* If nonzero, print unpairable lines in file 1 or 2. */
static int print_unpairables_1, print_unpairables_2;
/* If nonzero, print pairable lines. */
static int print_pairables;
/* Empty output field filler. */
static char *empty_filler;
/* Field to join on. */
static int join_field_1, join_field_2;
/* List of fields to print. */
struct outlist *outlist;
/* Last element in `outlist', where a new element can be added. */
struct outlist *outlist_end;
/* Tab character separating fields; if this is NUL fields are separated
by any nonempty string of white space, otherwise by exactly one
tab character. */
static char tab;
/* The name this program was run with. */
char *program_name;
/* Fill in the `fields' structure in LINE. */
static void
xfields (line)
struct line *line;
{
static int nfields = 2;
int i;
register char *ptr, *lim;
line->fields = (struct field *) xmalloc (nfields * sizeof (struct field));
ptr = line->beg;
lim = line->lim;
for (i = 0; ptr < lim; ++i)
{
if (i == nfields)
{
nfields *= 2;
line->fields = (struct field *)
xrealloc ((char *) line->fields, nfields * sizeof (struct field));
}
if (tab)
{
line->fields[i].beg = ptr;
while (ptr < lim && *ptr != tab)
++ptr;
line->fields[i].lim = ptr;
if (ptr < lim)
++ptr;
}
else
{
line->fields[i].beg = ptr;
while (ptr < lim && !ISSPACE (*ptr))
++ptr;
line->fields[i].lim = ptr;
while (ptr < lim && ISSPACE (*ptr))
++ptr;
}
}
line->nfields = i;
}
/* Read a line from FP into LINE and split it into fields.
Return 0 if EOF, 1 otherwise. */
static int
get_line (fp, line)
FILE *fp;
struct line *line;
{
static int linesize = 80;
int c, i;
char *ptr;
if (feof (fp))
return 0;
ptr = xmalloc (linesize);
for (i = 0; (c = getc (fp)) != EOF && c != '\n'; ++i)
{
if (i == linesize)
{
linesize *= 2;
ptr = xrealloc (ptr, linesize);
}
ptr[i] = c;
}
if (c == EOF && i == 0)
{
free (ptr);
return 0;
}
line->beg = ptr;
line->lim = line->beg + i;
xfields (line);
return 1;
}
static void
freeline (line)
struct line *line;
{
free ((char *) line->fields);
free (line->beg);
}
static void
initseq (seq)
struct seq *seq;
{
seq->count = 0;
seq->alloc = 1;
seq->lines = (struct line *) xmalloc (seq->alloc * sizeof (struct line));
}
/* Read a line from FP and add it to SEQ. Return 0 if EOF, 1 otherwise. */
static int
getseq (fp, seq)
FILE *fp;
struct seq *seq;
{
if (seq->count == seq->alloc)
{
seq->alloc *= 2;
seq->lines = (struct line *)
xrealloc ((char *) seq->lines, seq->alloc * sizeof (struct line));
}
if (get_line (fp, &seq->lines[seq->count]))
{
++seq->count;
return 1;
}
return 0;
}
static void
delseq (seq)
struct seq *seq;
{
free ((char *) seq->lines);
}
/* Return <0 if the join field in LINE1 compares less than the one in LINE2;
>0 if it compares greater; 0 if it compares equal. */
static int
keycmp (line1, line2)
struct line *line1;
struct line *line2;
{
char *beg1, *beg2; /* Start of field to compare in each file. */
int len1, len2; /* Length of fields to compare. */
int diff;
if (join_field_1 < line1->nfields)
{
beg1 = line1->fields[join_field_1].beg;
len1 = line1->fields[join_field_1].lim
- line1->fields[join_field_1].beg;
}
else
{
beg1 = NULL;
len1 = 0;
}
if (join_field_2 < line2->nfields)
{
beg2 = line2->fields[join_field_2].beg;
len2 = line2->fields[join_field_2].lim
- line2->fields[join_field_2].beg;
}
else
{
beg2 = NULL;
len2 = 0;
}
if (len1 == 0)
return len2 == 0 ? 0 : -1;
if (len2 == 0)
return 1;
diff = memcmp (beg1, beg2, min (len1, len2));
if (diff)
return diff;
return len1 - len2;
}
/* Print field N of LINE if it exists and is nonempty, otherwise
`empty_filler' if it is nonempty. */
static void
prfield (n, line)
int n;
struct line *line;
{
int len;
if (n < line->nfields)
{
len = line->fields[n].lim - line->fields[n].beg;
if (len)
fwrite (line->fields[n].beg, 1, len, stdout);
else if (empty_filler)
fputs (empty_filler, stdout);
}
else if (empty_filler)
fputs (empty_filler, stdout);
}
/* Print LINE, with its fields separated by `tab'. */
static void
prline (line)
struct line *line;
{
int i;
for (i = 0; i < line->nfields; ++i)
{
prfield (i, line);
if (i == line->nfields - 1)
putchar ('\n');
else
putchar (tab ? tab : ' ');
}
}
/* Print the join of LINE1 and LINE2. */
static void
prjoin (line1, line2)
struct line *line1;
struct line *line2;
{
if (outlist)
{
struct outlist *o;
prfield (outlist->field - 1, outlist->file == 1 ? line1 : line2);
for (o = outlist->next; o; o = o->next)
{
putchar (tab ? tab : ' ');
prfield (o->field - 1, o->file == 1 ? line1 : line2);
}
putchar ('\n');
}
else
{
int i;
prfield (join_field_1, line1);
for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
{
putchar (tab ? tab : ' ');
prfield (i, line1);
}
for (i = join_field_1 + 1; i < line1->nfields; ++i)
{
putchar (tab ? tab : ' ');
prfield (i, line1);
}
for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
{
putchar (tab ? tab : ' ');
prfield (i, line2);
}
for (i = join_field_2 + 1; i < line2->nfields; ++i)
{
putchar (tab ? tab : ' ');
prfield (i, line2);
}
putchar ('\n');
}
}
/* Print the join of the files in FP1 and FP2. */
static void
join (fp1, fp2)
FILE *fp1;
FILE *fp2;
{
struct seq seq1, seq2;
struct line line;
int diff, i, j, eof1, eof2;
/* Read the first line of each file. */
initseq (&seq1);
getseq (fp1, &seq1);
initseq (&seq2);
getseq (fp2, &seq2);
while (seq1.count && seq2.count)
{
diff = keycmp (&seq1.lines[0], &seq2.lines[0]);
if (diff < 0)
{
if (print_unpairables_1)
prline (&seq1.lines[0]);
freeline (&seq1.lines[0]);
seq1.count = 0;
getseq (fp1, &seq1);
continue;
}
if (diff > 0)
{
if (print_unpairables_2)
prline (&seq2.lines[0]);
freeline (&seq2.lines[0]);
seq2.count = 0;
getseq (fp2, &seq2);
continue;
}
/* Keep reading lines from file1 as long as they continue to
match the current line from file2. */
eof1 = 0;
do
if (!getseq (fp1, &seq1))
{
eof1 = 1;
++seq1.count;
break;
}
while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0]));
/* Keep reading lines from file2 as long as they continue to
match the current line from file1. */
eof2 = 0;
do
if (!getseq (fp2, &seq2))
{
eof2 = 1;
++seq2.count;
break;
}
while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1]));
if (print_pairables)
{
for (i = 0; i < seq1.count - 1; ++i)
for (j = 0; j < seq2.count - 1; ++j)
prjoin (&seq1.lines[i], &seq2.lines[j]);
}
for (i = 0; i < seq1.count - 1; ++i)
freeline (&seq1.lines[i]);
if (!eof1)
{
seq1.lines[0] = seq1.lines[seq1.count - 1];
seq1.count = 1;
}
else
seq1.count = 0;
for (i = 0; i < seq2.count - 1; ++i)
freeline (&seq2.lines[i]);
if (!eof2)
{
seq2.lines[0] = seq2.lines[seq2.count - 1];
seq2.count = 1;
}
else
seq2.count = 0;
}
if (print_unpairables_1 && seq1.count)
{
prline (&seq1.lines[0]);
freeline (&seq1.lines[0]);
while (get_line (fp1, &line))
{
prline (&line);
freeline (&line);
}
}
if (print_unpairables_2 && seq2.count)
{
prline (&seq2.lines[0]);
freeline (&seq2.lines[0]);
while (get_line (fp2, &line))
{
prline (&line);
freeline (&line);
}
}
delseq (&seq1);
delseq (&seq2);
}
/* Add a field spec for field FIELD of file FILE to `outlist' and return 1,
unless either argument is invalid; then just return 0. */
static int
add_field (file, field)
int file;
int field;
{
struct outlist *o;
if (file < 1 || file > 2 || field < 1)
return 0;
o = (struct outlist *) xmalloc (sizeof (struct outlist));
o->file = file;
o->field = field;
o->next = NULL;
/* Add to the end of the list so the fields are in the right order. */
if (outlist == NULL)
outlist = o;
else
outlist_end->next = o;
outlist_end = o;
return 1;
}
/* Add the comma or blank separated field spec(s) in STR to `outlist'.
Return the number of fields added. */
static int
add_field_list (str)
char *str;
{
int added = 0;
int file = -1, field = -1;
int dot_found = 0;
for (; *str; str++)
{
if (*str == ',' || isblank (*str))
{
added += add_field (file, field);
file = field = -1;
dot_found = 0;
}
else if (*str == '.')
dot_found = 1;
else if (ISDIGIT (*str))
{
if (!dot_found)
{
if (file == -1)
file = 0;
file = file * 10 + *str - '0';
}
else
{
if (field == -1)
field = 0;
field = field * 10 + *str - '0';
}
}
else
return 0;
}
added += add_field (file, field);
return added;
}
/* When using getopt_long_only, no long option can start with
a character that is a short option. */
static struct option longopts[] =
{
{"j", 1, NULL, 'j'},
{"j1", 1, NULL, '1'},
{"j2", 1, NULL, '2'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char *argv[];
{
char *names[2];
FILE *fp1, *fp2;
int optc, prev_optc = 0, nfiles, val;
program_name = argv[0];
nfiles = 0;
print_pairables = 1;
while ((optc = getopt_long_only (argc, argv, "-a:e:1:2:o:t:v:", longopts,
(int *) 0)) != EOF)
{
switch (optc)
{
case 'a':
val = atoi (optarg);
if (val == 1)
print_unpairables_1 = 1;
else if (val == 2)
print_unpairables_2 = 1;
else
error (2, 0, "invalid file number for `-a'");
break;
case 'e':
empty_filler = optarg;
break;
case '1':
val = atoi (optarg);
if (val <= 0)
error (2, 0, "invalid field number for `-1'");
join_field_1 = val - 1;
break;
case '2':
val = atoi (optarg);
if (val <= 0)
error (2, 0, "invalid field number for `-2'");
join_field_2 = val - 1;
break;
case 'j':
val = atoi (optarg);
if (val <= 0)
error (2, 0, "invalid field number for `-j'");
join_field_1 = join_field_2 = val - 1;
break;
case 'o':
if (add_field_list (optarg) == 0)
error (2, 0, "invalid field list for `-o'");
break;
case 't':
tab = *optarg;
break;
case 'v':
val = atoi (optarg);
if (val == 1)
print_unpairables_1 = 1;
else if (val == 2)
print_unpairables_2 = 1;
else
error (2, 0, "invalid file number for `-v'");
print_pairables = 0;
break;
case 1: /* Non-option argument. */
if (prev_optc == 'o')
{
/* Might be continuation of args to -o. */
if (add_field_list (optarg) > 0)
continue; /* Don't change `prev_optc'. */
}
if (nfiles > 1)
usage ();
names[nfiles++] = optarg;
break;
case '?':
usage ();
}
prev_optc = optc;
}
if (nfiles != 2)
usage ();
fp1 = strcmp (names[0], "-") ? fopen (names[0], "r") : stdin;
if (!fp1)
error (1, errno, "%s", names[0]);
fp2 = strcmp (names[1], "-") ? fopen (names[1], "r") : stdin;
if (!fp2)
error (1, errno, "%s", names[1]);
if (fp1 == fp2)
error (1, errno, "both files cannot be standard input");
join (fp1, fp2);
if ((fp1 == stdin || fp2 == stdin) && fclose (stdin) == EOF)
error (1, errno, "-");
if (ferror (stdout) || fclose (stdout) == EOF)
error (1, 0, "write error");
exit (0);
}
static void
usage ()
{
fprintf (stderr, "\
Usage: %s [-a 1|2] [-v 1|2] [-e empty-string] [-o field-list...] [-t char]\n\
[-j[1|2] field] [-1 field] [-2 field] file1 file2\n",
program_name);
exit (1);
}

546
src/nl.c Normal file
View File

@@ -0,0 +1,546 @@
/* nl -- number lines of files
Copyright (C) 1989, 1992 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Scott Bartram (nancy!scott@uunet.uu.net)
Revised by David MacKenzie (djm@ai.mit.edu) */
#include <stdio.h>
#include <sys/types.h>
#include <getopt.h>
#include <regex.h>
#include "linebuffer.h"
#include "system.h"
#ifndef TRUE
#define TRUE 1
#define FALSE 0
#endif
/* Line-number formats. */
enum number_format
{
FORMAT_RIGHT_NOLZ, /* Right justified, no leading zeroes. */
FORMAT_RIGHT_LZ, /* Right justified, leading zeroes. */
FORMAT_LEFT /* Left justified, no leading zeroes. */
};
/* Default section delimiter characters. */
#define DEFAULT_SECTION_DELIMITERS "\\:"
/* Types of input lines: either one of the section delimiters,
or text to output. */
enum section
{
Header, Body, Footer, Text
};
/* Format of body lines (-b). */
char *body_type = "t";
/* Format of header lines (-h). */
char *header_type = "n";
/* Format of footer lines (-f). */
char *footer_type = "n";
/* Format currently being used (body, header, or footer). */
char *current_type;
/* Regex for body lines to number (-bp). */
struct re_pattern_buffer body_regex;
/* Regex for header lines to number (-hp). */
struct re_pattern_buffer header_regex;
/* Regex for footer lines to number (-fp). */
struct re_pattern_buffer footer_regex;
/* Pointer to current regex, if any. */
struct re_pattern_buffer *current_regex = NULL;
/* Separator string to print after line number (-s). */
char *separator_str = "\t";
/* Input section delimiter string (-d). */
char *section_del = DEFAULT_SECTION_DELIMITERS;
/* Header delimiter string. */
char *header_del = NULL;
/* Header section delimiter length. */
int header_del_len;
/* Body delimiter string. */
char *body_del = NULL;
/* Body section delimiter length. */
int body_del_len;
/* Footer delimiter string. */
char *footer_del = NULL;
/* Footer section delimiter length. */
int footer_del_len;
/* Input buffer. */
struct linebuffer line_buf;
/* printf format string for line number. */
char *print_fmt;
/* printf format string for unnumbered lines. */
char *print_no_line_fmt = NULL;
/* Starting line number on each page (-v). */
int page_start = 1;
/* Line number increment (-i). */
int page_incr = 1;
/* If TRUE, reset line number at start of each page (-p). */
int reset_numbers = TRUE;
/* Number of blank lines to consider to be one line for numbering (-l). */
int blank_join = 1;
/* Width of line numbers (-w). */
int lineno_width = 6;
/* Line number format (-n). */
enum number_format lineno_format = FORMAT_RIGHT_NOLZ;
/* Current print line number. */
int line_no;
/* The name this program was run with. */
char *program_name;
/* Nonzero if we have ever read standard input. */
int have_read_stdin;
enum section check_section ();
char *xmalloc ();
char *xrealloc ();
int build_type_arg ();
int nl_file ();
void usage ();
void process_file ();
void proc_header ();
void proc_body ();
void proc_footer ();
void proc_text ();
void print_lineno ();
void build_print_fmt ();
void error ();
struct option longopts[] =
{
{"header-numbering", 1, NULL, 'h'},
{"body-numbering", 1, NULL, 'b'},
{"footer-numbering", 1, NULL, 'f'},
{"first-page", 1, NULL, 'v'},
{"page-increment", 1, NULL, 'i'},
{"no-renumber", 0, NULL, 'p'},
{"join-blank-lines", 1, NULL, 'l'},
{"number-separator", 1, NULL, 's'},
{"number-width", 1, NULL, 'w'},
{"number-format", 1, NULL, 'n'},
{"section-delimiter", 1, NULL, 'd'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
int c, exit_status = 0;
program_name = argv[0];
have_read_stdin = 0;
while ((c = getopt_long (argc, argv, "h:b:f:v:i:pl:s:w:n:d:", longopts,
(int *) 0)) != EOF)
{
switch (c)
{
case 'h':
if (build_type_arg (&header_type, &header_regex) != TRUE)
usage ();
break;
case 'b':
if (build_type_arg (&body_type, &body_regex) != TRUE)
usage ();
break;
case 'f':
if (build_type_arg (&footer_type, &footer_regex) != TRUE)
usage ();
break;
case 'v':
page_start = atoi (optarg);
break;
case 'i':
page_incr = atoi (optarg);
if (page_incr < 1)
page_incr = 1;
break;
case 'p':
reset_numbers = FALSE;
break;
case 'l':
blank_join = atoi (optarg);
break;
case 's':
separator_str = optarg;
break;
case 'w':
lineno_width = atoi (optarg);
if (lineno_width < 1)
lineno_width = 1;
break;
case 'n':
switch (*optarg)
{
case 'l':
if (optarg[1] == 'n')
lineno_format = FORMAT_LEFT;
else
usage ();
break;
case 'r':
switch (optarg[1])
{
case 'n':
lineno_format = FORMAT_RIGHT_NOLZ;
break;
case 'z':
lineno_format = FORMAT_RIGHT_LZ;
break;
default:
usage ();
break;
}
break;
default:
usage ();
break;
}
break;
case 'd':
section_del = optarg;
break;
default:
usage ();
break;
}
}
/* Initialize the section delimiters. */
c = strlen (section_del);
header_del_len = c * 3;
header_del = xmalloc (header_del_len + 1);
strcat (strcat (strcpy (header_del, section_del), section_del), section_del);
body_del_len = c * 2;
body_del = xmalloc (body_del_len + 1);
strcat (strcpy (body_del, section_del), section_del);
footer_del_len = c;
footer_del = xmalloc (footer_del_len + 1);
strcpy (footer_del, section_del);
/* Initialize the input buffer. */
initbuffer (&line_buf);
/* Initialize the printf format for unnumbered lines. */
c = strlen (separator_str);
print_no_line_fmt = xmalloc (lineno_width + c + 1);
memset (print_no_line_fmt, ' ', lineno_width + c);
print_no_line_fmt[lineno_width + c] = '\0';
line_no = page_start;
current_type = body_type;
current_regex = &body_regex;
build_print_fmt ();
/* Main processing. */
if (optind == argc)
exit_status |= nl_file ("-");
else
for (; optind < argc; optind++)
exit_status |= nl_file (argv[optind]);
if (have_read_stdin && fclose (stdin) == EOF)
{
error (0, errno, "-");
exit_status = 1;
}
if (ferror (stdout) || fclose (stdout) == EOF)
error (1, 0, "write error");
exit (exit_status);
}
/* Process file FILE to standard output.
Return 0 if successful, 1 if not. */
int
nl_file (file)
char *file;
{
FILE *stream;
if (!strcmp (file, "-"))
{
have_read_stdin = 1;
stream = stdin;
}
else
{
stream = fopen (file, "r");
if (stream == NULL)
{
error (0, errno, "%s", file);
return 1;
}
}
process_file (stream);
if (ferror (stream))
{
error (0, errno, "%s", file);
return 1;
}
if (!strcmp (file, "-"))
clearerr (stream); /* Also clear EOF. */
else if (fclose (stream) == EOF)
{
error (0, errno, "%s", file);
return 1;
}
return 0;
}
/* Read and process the file pointed to by FP. */
void
process_file (fp)
FILE *fp;
{
while (readline (&line_buf, fp))
{
switch ((int) check_section ())
{
case Header:
proc_header ();
break;
case Body:
proc_body ();
break;
case Footer:
proc_footer ();
break;
case Text:
proc_text ();
break;
}
}
}
/* Return the type of line in `line_buf'. */
enum section
check_section ()
{
if (line_buf.length < 2 || memcmp (line_buf.buffer, section_del, 2))
return Text;
if (line_buf.length == header_del_len
&& !memcmp (line_buf.buffer, header_del, header_del_len))
return Header;
if (line_buf.length == body_del_len
&& !memcmp (line_buf.buffer, body_del, body_del_len))
return Body;
if (line_buf.length == footer_del_len
&& !memcmp (line_buf.buffer, footer_del, footer_del_len))
return Footer;
return Text;
}
/* Switch to a header section. */
void
proc_header ()
{
current_type = header_type;
current_regex = &header_regex;
if (reset_numbers)
line_no = page_start;
putchar ('\n');
}
/* Switch to a body section. */
void
proc_body ()
{
current_type = body_type;
current_regex = &body_regex;
putchar ('\n');
}
/* Switch to a footer section. */
void
proc_footer ()
{
current_type = footer_type;
current_regex = &footer_regex;
putchar ('\n');
}
/* Process a regular text line in `line_buf'. */
void
proc_text ()
{
static int blank_lines = 0; /* Consecutive blank lines so far. */
switch (*current_type)
{
case 'a':
if (blank_join > 1)
{
if (line_buf.length || ++blank_lines == blank_join)
{
print_lineno ();
blank_lines = 0;
}
else
printf (print_no_line_fmt);
}
else
print_lineno ();
break;
case 't':
if (line_buf.length)
print_lineno ();
else
printf (print_no_line_fmt);
break;
case 'n':
printf (print_no_line_fmt);
break;
case 'p':
if (re_search (current_regex, line_buf.buffer, line_buf.length,
0, line_buf.length, (struct re_registers *) 0) < 0)
printf (print_no_line_fmt);
else
print_lineno ();
break;
}
fwrite (line_buf.buffer, sizeof (char), line_buf.length, stdout);
putchar ('\n');
}
/* Print and increment the line number. */
void
print_lineno ()
{
printf (print_fmt, line_no);
line_no += page_incr;
}
/* Build the printf format string, based on `lineno_format'. */
void
build_print_fmt ()
{
/* 12 = 10 chars for lineno_width, 1 for %, 1 for \0. */
print_fmt = xmalloc (strlen (separator_str) + 12);
switch (lineno_format)
{
case FORMAT_RIGHT_NOLZ:
sprintf (print_fmt, "%%%dd%s", lineno_width, separator_str);
break;
case FORMAT_RIGHT_LZ:
sprintf (print_fmt, "%%0%dd%s", lineno_width, separator_str);
break;
case FORMAT_LEFT:
sprintf (print_fmt, "%%-%dd%s", lineno_width, separator_str);
break;
}
}
/* Set the command line flag TYPEP and possibly the regex pointer REGEXP,
according to `optarg'. */
int
build_type_arg (typep, regexp)
char **typep;
struct re_pattern_buffer *regexp;
{
char *errmsg;
int rval = TRUE;
int optlen;
switch (*optarg)
{
case 'a':
case 't':
case 'n':
*typep = optarg;
break;
case 'p':
*typep = optarg++;
optlen = strlen (optarg);
regexp->allocated = optlen * 2;
regexp->buffer = (unsigned char *) xmalloc (regexp->allocated);
regexp->translate = NULL;
regexp->fastmap = xmalloc (256);
regexp->fastmap_accurate = 0;
errmsg = re_compile_pattern (optarg, optlen, regexp);
if (errmsg)
error (1, 0, "%s", errmsg);
break;
default:
rval = FALSE;
break;
}
return rval;
}
/* Print a usage message and quit. */
void
usage ()
{
fprintf (stderr, "\
Usage: %s [-h header-style] [-b body-style] [-f footer-style] [-p] [-d cc]\n\
[-v start-number] [-i increment] [-l lines] [-s line-separator]\n\
[-w line-no-width] [-n {ln,rn,rz}] [--header-numbering=style]\n\
[--body-numbering=style] [--footer-numbering=style]\n\
[--first-page=number] [--page-increment=number] [--no-renumber]\n\
[--join-blank-lines=number] [--number-separator=string]\n\
[--number-width=number] [--number-format={ln,rn,rz}]\n\
[--section-delimiter=cc] [file...]\n",
program_name);
exit (2);
}

1697
src/od.c Normal file

File diff suppressed because it is too large Load Diff

458
src/paste.c Normal file
View File

@@ -0,0 +1,458 @@
/* paste - merge lines of files
Copyright (C) 1984 by David M. Ihnat
This program is a total rewrite of the Bell Laboratories Unix(Tm)
command of the same name, as of System V. It contains no proprietary
code, and therefore may be used without violation of any proprietary
agreements whatsoever. However, you will notice that the program is
copyrighted by me. This is to assure the program does *not* fall
into the public domain. Thus, I may specify just what I am now:
This program may be freely copied and distributed, provided this notice
remains; it may not be sold for profit without express written consent of
the author.
Please note that I recreated the behavior of the Unix(Tm) 'paste' command
as faithfully as possible, with minor exceptions; however,
I haven't run a full set of regression tests. Thus, the user of
this program accepts full responsibility for any effects or loss;
in particular, the author is not responsible for any losses,
explicit or incidental, that may be incurred through use of this program.
I ask that any bugs (and, if possible, fixes) be reported to me when
possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us
The list of valid escape sequences has been expanded over the Unix
version, to include \b, \f, \r, and \v.
POSIX changes, bug fixes, long-named options, and cleanup
by David MacKenzie <djm@ai.mit.edu>.
Options:
--serial
-s Paste one file at a time rather than
one line from each file.
--delimiters=delim-list
-d delim-list Consecutively use the characters in
DELIM-LIST instead of tab to separate
merged lines. When DELIM-LIST is exhausted,
start again at its beginning.
A FILE of `-' means standard input.
If no FILEs are given, standard input is used. */
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
char *collapse_escapes ();
char *xmalloc ();
char *xrealloc ();
int paste_parallel ();
int paste_serial ();
void error ();
void usage ();
/* Indicates that no delimiter should be added in the current position. */
#define EMPTY_DELIM '\0'
/* Element marking a file that has reached EOF and been closed. */
#define CLOSED ((FILE *) -1)
/* Element marking end of list of open files. */
#define ENDLIST ((FILE *) -2)
/* Name this program was run with. */
char *program_name;
/* If nonzero, we have read standard input at some point. */
int have_read_stdin;
/* If nonzero, merge subsequent lines of each file rather than
corresponding lines from each file in parallel. */
int serial_merge;
/* The delimeters between lines of input files (used cyclically). */
char *delims;
/* A pointer to the character after the end of `delims'. */
char *delim_end;
struct option longopts[] =
{
{"serial", 0, 0, 's'},
{"delimiters", 1, 0, 'd'},
{0, 0, 0, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
int optc, exit_status;
char default_delims[2], zero_delims[3];
program_name = argv[0];
have_read_stdin = 0;
serial_merge = 0;
delims = default_delims;
strcpy (delims, "\t");
strcpy (zero_delims, "\\0");
while ((optc = getopt_long (argc, argv, "d:s", longopts, (int *) 0))
!= EOF)
{
switch (optc)
{
case 'd':
/* Delimiter character(s). */
if (optarg[0] == '\0')
optarg = zero_delims;
delims = optarg;
break;
case 's':
serial_merge++;
break;
default:
usage ();
}
}
if (optind == argc)
argv[argc++] = "-";
delim_end = collapse_escapes (delims);
if (!serial_merge)
exit_status = paste_parallel (argc - optind, &argv[optind]);
else
exit_status = paste_serial (argc - optind, &argv[optind]);
if (have_read_stdin && fclose (stdin) == EOF)
error (1, errno, "-");
if (ferror (stdout) || fclose (stdout) == EOF)
error (1, errno, "write error");
exit (exit_status);
}
/* Replace backslash representations of special characters in
STRPTR with their actual values.
The set of possible backslash characters has been expanded beyond
that recognized by the Unix version.
Return a pointer to the character after the new end of STRPTR. */
char *
collapse_escapes (strptr)
char *strptr;
{
register char *strout;
strout = strptr; /* Start at the same place, anyway. */
while (*strptr)
{
if (*strptr != '\\') /* Is it an escape character? */
*strout++ = *strptr++; /* No, just transfer it. */
else
{
switch (*++strptr)
{
case '0':
*strout++ = EMPTY_DELIM;
break;
case 'b':
*strout++ = '\b';
break;
case 'f':
*strout++ = '\f';
break;
case 'n':
*strout++ = '\n';
break;
case 'r':
*strout++ = '\r';
break;
case 't':
*strout++ = '\t';
break;
case 'v':
*strout++ = '\v';
break;
default:
*strout++ = *strptr;
break;
}
strptr++;
}
}
return strout;
}
/* Perform column paste on the NFILES files named in FNAMPTR.
Return 0 if no errors, 1 if one or more files could not be
opened or read. */
int
paste_parallel (nfiles, fnamptr)
int nfiles;
char **fnamptr;
{
int errors = 0; /* 1 if open or read errors occur. */
/* Number of files for which space is allocated in `delbuf' and `fileptr'.
Enlarged as necessary. */
int file_list_size = 12;
int chr; /* Input character. */
int line_length; /* Number of chars in line. */
int somedone; /* 0 if all files empty for this line. */
/* If all files are just ready to be closed, or will be on this
round, the string of delimiters must be preserved.
delbuf[0] through delbuf[file_list_size]
store the delimiters for closed files. */
char *delbuf;
int delims_saved; /* Number of delims saved in `delbuf'. */
register char *delimptr; /* Cycling pointer into `delims'. */
FILE **fileptr; /* Streams open to the files to process. */
int files_open; /* Number of files still open to process. */
int i; /* Loop index. */
int opened_stdin = 0; /* Nonzero if any fopen got fd 0. */
delbuf = (char *) xmalloc (file_list_size + 2);
fileptr = (FILE **) xmalloc ((file_list_size + 1) * sizeof (FILE *));
/* Attempt to open all files. This could be expanded to an infinite
number of files, but at the (considerable) expense of remembering
each file and its current offset, then opening/reading/closing. */
for (files_open = 0; files_open < nfiles; ++files_open)
{
if (files_open == file_list_size - 2)
{
file_list_size += 12;
delbuf = (char *) xrealloc (delbuf, file_list_size + 2);
fileptr = (FILE **) xrealloc (fileptr, (file_list_size + 1)
* sizeof (FILE *));
}
if (!strcmp (fnamptr[files_open], "-"))
{
have_read_stdin = 1;
fileptr[files_open] = stdin;
}
else
{
fileptr[files_open] = fopen (fnamptr[files_open], "r");
if (fileptr[files_open] == NULL)
error (1, errno, "%s", fnamptr[files_open]);
else if (fileno (fileptr[files_open]) == 0)
opened_stdin = 1;
}
}
fileptr[files_open] = ENDLIST;
if (opened_stdin && have_read_stdin)
error (1, 0, "standard input is closed");
/* Read a line from each file and output it to stdout separated by a
delimiter, until we go through the loop without successfully
reading from any of the files. */
while (files_open)
{
/* Set up for the next line. */
somedone = 0;
delimptr = delims;
delims_saved = 0;
for (i = 0; fileptr[i] != ENDLIST && files_open; i++)
{
line_length = 0; /* Clear so we can easily detect EOF. */
if (fileptr[i] != CLOSED)
{
chr = getc (fileptr[i]);
if (chr != EOF && delims_saved)
{
fwrite (delbuf, sizeof (char), delims_saved, stdout);
delims_saved = 0;
}
while (chr != EOF)
{
line_length++;
if (chr == '\n')
break;
putc (chr, stdout);
chr = getc (fileptr[i]);
}
}
if (line_length == 0)
{
/* EOF, read error, or closed file.
If an EOF or error, close the file and mark it in the list. */
if (fileptr[i] != CLOSED)
{
if (ferror (fileptr[i]))
{
error (0, errno, "%s", fnamptr[i]);
errors = 1;
}
if (fileptr[i] == stdin)
clearerr (fileptr[i]); /* Also clear EOF. */
else if (fclose (fileptr[i]) == EOF)
{
error (0, errno, "%s", fnamptr[i]);
errors = 1;
}
fileptr[i] = CLOSED;
files_open--;
}
if (fileptr[i + 1] == ENDLIST)
{
/* End of this output line.
Is this the end of the whole thing? */
if (somedone)
{
/* No. Some files were not closed for this line. */
if (delims_saved)
{
fwrite (delbuf, sizeof (char), delims_saved, stdout);
delims_saved = 0;
}
putc ('\n', stdout);
}
continue; /* Next read of files, or exit. */
}
else
{
/* Closed file; add delimiter to `delbuf'. */
if (*delimptr != EMPTY_DELIM)
delbuf[delims_saved++] = *delimptr;
if (++delimptr == delim_end)
delimptr = delims;
}
}
else
{
/* Some data read. */
somedone++;
/* Except for last file, replace last newline with delim. */
if (fileptr[i + 1] != ENDLIST)
{
if (chr != '\n')
putc (chr, stdout);
if (*delimptr != EMPTY_DELIM)
putc (*delimptr, stdout);
if (++delimptr == delim_end)
delimptr = delims;
}
else
putc (chr, stdout);
}
}
}
return errors;
}
/* Perform serial paste on the NFILES files named in FNAMPTR.
Return 0 if no errors, 1 if one or more files could not be
opened or read. */
int
paste_serial (nfiles, fnamptr)
int nfiles;
char **fnamptr;
{
int errors = 0; /* 1 if open or read errors occur. */
register int charnew, charold; /* Current and previous char read. */
register char *delimptr; /* Current delimiter char. */
register FILE *fileptr; /* Open for reading current file. */
for (; nfiles; nfiles--, fnamptr++)
{
if (!strcmp (*fnamptr, "-"))
{
have_read_stdin = 1;
fileptr = stdin;
}
else
{
fileptr = fopen (*fnamptr, "r");
if (fileptr == NULL)
{
error (0, errno, "%s", *fnamptr);
errors = 1;
continue;
}
}
delimptr = delims; /* Set up for delimiter string. */
charold = getc (fileptr);
if (charold != EOF)
{
/* `charold' is set up. Hit it!
Keep reading characters, stashing them in `charnew';
output `charold', converting to the appropriate delimiter
character if needed. After the EOF, output `charold'
if it's a newline; otherwise, output it and then a newline. */
while ((charnew = getc (fileptr)) != EOF)
{
/* Process the old character. */
if (charold == '\n')
{
if (*delimptr != EMPTY_DELIM)
putc (*delimptr, stdout);
if (++delimptr == delim_end)
delimptr = delims;
}
else
putc (charold, stdout);
charold = charnew;
}
/* Hit EOF. Process that last character. */
putc (charold, stdout);
}
if (charold != '\n')
putc ('\n', stdout);
if (ferror (fileptr))
{
error (0, errno, "%s", *fnamptr);
errors = 1;
}
if (fileptr == stdin)
clearerr (fileptr); /* Also clear EOF. */
else if (fclose (fileptr) == EOF)
{
error (0, errno, "%s", *fnamptr);
errors = 1;
}
}
return errors;
}
void
usage ()
{
fprintf (stderr, "\
Usage: %s [-s] [-d delim-list] [--serial] [--delimiters=delim-list]\n\
[file...]\n",
program_name);
exit (1);
}

1844
src/pr.c Normal file

File diff suppressed because it is too large Load Diff

1746
src/sort.c Normal file

File diff suppressed because it is too large Load Diff

532
src/split.c Normal file
View File

@@ -0,0 +1,532 @@
/* split.c -- split a file into pieces.
Copyright (C) 1988, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* By tege@sics.se, with rms.
To do:
* Implement -t CHAR or -t REGEX to specify break characters other
than newline. */
#include <stdio.h>
#include <getopt.h>
#include <ctype.h>
#include <sys/types.h>
#include "system.h"
char *xmalloc ();
void error ();
int convint ();
int isdigits ();
int stdread ();
void line_bytes_split ();
void bytes_split ();
void cwrite ();
void lines_split ();
void next_file_name ();
/* Name under which this program was invoked. */
char *program_name;
/* Base name of output files. */
char *outfile;
/* Pointer to the end of the prefix in OUTFILE.
Suffixes are inserted here. */
char *outfile_mid;
/* Pointer to the end of OUTFILE. */
char *outfile_end;
/* Status for outfile name generation. */
unsigned outfile_count = -1;
unsigned outfile_name_limit = 25 * 26;
unsigned outfile_name_generation = 1;
/* Name of input file. May be "-". */
char *infile;
/* Descriptor on which input file is open. */
int input_desc;
/* Descriptor on which output file is open. */
int output_desc;
void
usage (reason)
char *reason;
{
if (reason != NULL)
fprintf (stderr, "%s: %s\n", program_name, reason);
fprintf (stderr, "\
Usage: %s [-lines] [-l lines] [-b bytes[bkm]] [-C bytes[bkm]]\n\
[--lines=lines] [--bytes=bytes[bkm]] [--line-bytes=bytes[bkm]]\n\
[infile [outfile-prefix]]\n",
program_name);
exit (2);
}
struct option longopts[] =
{
{"bytes", 1, NULL, 'b'},
{"lines", 1, NULL, 'l'},
{"line-bytes", 1, NULL, 'C'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char *argv[];
{
struct stat stat_buf;
int num; /* numeric argument from command line */
enum
{
type_undef, type_bytes, type_byteslines, type_lines, type_digits
} split_type = type_undef;
int in_blk_size; /* optimal block size of input file device */
char *buf; /* file i/o buffer */
int accum = 0;
char *outbase;
int c;
int digits_optind = 0;
program_name = argv[0];
/* Parse command line options. */
infile = "-";
outbase = "x";
while (1)
{
/* This is the argv-index of the option we will read next. */
int this_optind = optind ? optind : 1;
c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
if (c == EOF)
break;
switch (c)
{
case 'b':
if (split_type != type_undef)
usage ("cannot split in more than one way");
split_type = type_bytes;
if (convint (optarg, &accum) == -1)
usage ("invalid number of bytes");
break;
case 'l':
if (split_type != type_undef)
usage ("cannot split in more than one way");
split_type = type_lines;
if (!isdigits (optarg))
usage ("invalid number of lines");
accum = atoi (optarg);
break;
case 'C':
if (split_type != type_undef)
usage ("cannot split in more than one way");
split_type = type_byteslines;
if (convint (optarg, &accum) == -1)
usage ("invalid number of bytes");
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (split_type != type_undef && split_type != type_digits)
usage ("cannot split in more than one way");
if (digits_optind != 0 && digits_optind != this_optind)
accum = 0; /* More than one number given; ignore other. */
digits_optind = this_optind;
split_type = type_digits;
accum = accum * 10 + c - '0';
break;
default:
usage ((char *)0);
}
}
/* Handle default case. */
if (split_type == type_undef)
{
split_type = type_lines;
accum = 1000;
}
if (accum < 1)
usage ("invalid number");
num = accum;
/* Get out the filename arguments. */
if (optind < argc)
infile = argv[optind++];
if (optind < argc)
outbase = argv[optind++];
if (optind < argc)
usage ("too many arguments");
/* Open the input file. */
if (!strcmp (infile, "-"))
input_desc = 0;
else
{
input_desc = open (infile, O_RDONLY);
if (input_desc < 0)
error (1, errno, "%s", infile);
}
/* No output file is open now. */
output_desc = -1;
/* Copy the output file prefix so we can add suffixes to it.
26**29 is certainly enough output files! */
outfile = xmalloc (strlen (outbase) + 30);
strcpy (outfile, outbase);
outfile_mid = outfile + strlen (outfile);
outfile_end = outfile_mid + 2;
bzero (outfile_mid, 30);
outfile_mid[0] = 'a';
outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
/* Get the optimal block size of input device and make a buffer. */
if (fstat (input_desc, &stat_buf) < 0)
error (1, errno, "%s", infile);
in_blk_size = ST_BLKSIZE (stat_buf);
buf = xmalloc (in_blk_size + 1);
switch (split_type)
{
case type_digits:
case type_lines:
lines_split (num, buf, in_blk_size);
break;
case type_bytes:
bytes_split (num, buf, in_blk_size);
break;
case type_byteslines:
line_bytes_split (num);
break;
}
if (close (input_desc) < 0)
error (1, errno, "%s", infile);
if (output_desc >= 0 && close (output_desc) < 0)
error (1, errno, "%s", outfile);
exit (0);
}
/* Return nonzero if the string STR is composed entirely of decimal digits. */
int
isdigits (str)
char *str;
{
do
{
if (!isdigit (*str))
return 0;
str++;
}
while (*str);
return 1;
}
/* Put the value of the number in STR into *VAL.
STR can specify a positive integer, optionally ending in `k'
to mean kilo or `m' to mean mega.
Return 0 if STR is valid, -1 if not. */
int
convint (str, val)
char *str;
int *val;
{
int multiplier = 1;
int arglen = strlen (str);
if (arglen > 1)
{
switch (str[arglen - 1])
{
case 'b':
multiplier = 512;
str[arglen - 1] = '\0';
break;
case 'k':
multiplier = 1024;
str[arglen - 1] = '\0';
break;
case 'm':
multiplier = 1048576;
str[arglen - 1] = '\0';
break;
}
}
if (!isdigits (str))
return -1;
*val = atoi (str) * multiplier;
return 0;
}
/* Split into pieces of exactly NCHARS bytes.
Use buffer BUF, whose size is BUFSIZE. */
void
bytes_split (nchars, buf, bufsize)
int nchars;
char *buf;
int bufsize;
{
int n_read;
int new_file_flag = 1;
int to_read;
int to_write = nchars;
char *bp_out;
do
{
n_read = stdread (buf, bufsize);
if (n_read < 0)
error (1, errno, "%s", infile);
bp_out = buf;
to_read = n_read;
for (;;)
{
if (to_read < to_write)
{
if (to_read) /* do not write 0 bytes! */
{
cwrite (new_file_flag, bp_out, to_read);
to_write -= to_read;
new_file_flag = 0;
}
break;
}
else
{
cwrite (new_file_flag, bp_out, to_write);
bp_out += to_write;
to_read -= to_write;
new_file_flag = 1;
to_write = nchars;
}
}
}
while (n_read == bufsize);
}
/* Split into pieces of exactly NLINES lines.
Use buffer BUF, whose size is BUFSIZE. */
void
lines_split (nlines, buf, bufsize)
int nlines;
char *buf;
int bufsize;
{
int n_read;
char *bp, *bp_out, *eob;
int new_file_flag = 1;
int n = 0;
do
{
n_read = stdread (buf, bufsize);
if (n_read < 0)
error (1, errno, "%s", infile);
bp = bp_out = buf;
eob = bp + n_read;
*eob = '\n';
for (;;)
{
while (*bp++ != '\n')
; /* this semicolon takes most of the time */
if (bp > eob)
{
if (eob != bp_out) /* do not write 0 bytes! */
{
cwrite (new_file_flag, bp_out, eob - bp_out);
new_file_flag = 0;
}
break;
}
else
if (++n >= nlines)
{
cwrite (new_file_flag, bp_out, bp - bp_out);
bp_out = bp;
new_file_flag = 1;
n = 0;
}
}
}
while (n_read == bufsize);
}
/* Split into pieces that are as large as possible while still not more
than NCHARS bytes, and are split on line boundaries except
where lines longer than NCHARS bytes occur. */
void
line_bytes_split (nchars)
int nchars;
{
int n_read;
char *bp;
int eof = 0;
int n_buffered = 0;
char *buf = (char *) xmalloc (nchars);
do
{
/* Fill up the full buffer size from the input file. */
n_read = stdread (buf + n_buffered, nchars - n_buffered);
if (n_read < 0)
error (1, errno, "%s", infile);
n_buffered += n_read;
if (n_buffered != nchars)
eof = 1;
/* Find where to end this chunk. */
bp = buf + n_buffered;
if (n_buffered == nchars)
{
while (bp > buf && bp[-1] != '\n')
bp--;
}
/* If chunk has no newlines, use all the chunk. */
if (bp == buf)
bp = buf + n_buffered;
/* Output the chars as one output file. */
cwrite (1, buf, bp - buf);
/* Discard the chars we just output; move rest of chunk
down to be the start of the next chunk. */
n_buffered -= bp - buf;
if (n_buffered > 0)
bcopy (bp, buf, n_buffered);
}
while (!eof);
free (buf);
}
/* Write BYTES bytes at BP to an output file.
If NEW_FILE_FLAG is nonzero, open the next output file.
Otherwise add to the same output file already in use. */
void
cwrite (new_file_flag, bp, bytes)
int new_file_flag;
char *bp;
int bytes;
{
if (new_file_flag)
{
if (output_desc >= 0 && close (output_desc) < 0)
error (1, errno, "%s", outfile);
next_file_name ();
output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
if (output_desc < 0)
error (1, errno, "%s", outfile);
}
if (write (output_desc, bp, bytes) < 0)
error (1, errno, "%s", outfile);
}
/* Read NCHARS bytes from the input file into BUF.
Return the number of bytes successfully read.
If this is less than NCHARS, do not call `stdread' again. */
int
stdread (buf, nchars)
char *buf;
int nchars;
{
int n_read;
int to_be_read = nchars;
while (to_be_read)
{
n_read = read (input_desc, buf, to_be_read);
if (n_read < 0)
return -1;
if (n_read == 0)
break;
to_be_read -= n_read;
buf += n_read;
}
return nchars - to_be_read;
}
/* Compute the next sequential output file name suffix and store it
into the string `outfile' at the position pointed to by `outfile_mid'. */
void
next_file_name ()
{
int x;
char *ne;
outfile_count++;
if (outfile_count < outfile_name_limit)
{
for (ne = outfile_end - 1; ; ne--)
{
x = *ne;
if (x != 'z')
break;
*ne = 'a';
}
*ne = x + 1;
return;
}
outfile_count = 0;
outfile_name_limit *= 26;
outfile_name_generation++;
*outfile_mid++ = 'z';
for (x = 0; x <= outfile_name_generation; x++)
outfile_mid[x] = 'a';
outfile_end += 2;
}

217
src/sum.c Normal file
View File

@@ -0,0 +1,217 @@
/* sum -- checksum and count the blocks in a file
Copyright (C) 1986, 1989, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Like BSD sum or SysV sum -r, except like SysV sum if -s option is given. */
/* Written by Kayvan Aghaiepour and David MacKenzie. */
#include <stdio.h>
#include <sys/types.h>
#include <getopt.h>
#include "system.h"
int bsd_sum_file ();
int sysv_sum_file ();
void error ();
/* The name this program was run with. */
char *program_name;
/* Nonzero if any of the files read were the standard input. */
int have_read_stdin;
/* Right-rotate 32-bit integer variable C. */
#define ROTATE_RIGHT(c) if ((c) & 01) (c) = ((c) >>1) + 0x8000; else (c) >>= 1;
struct option longopts[] =
{
{"sysv", 0, NULL, 's'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
int errors = 0;
int optc;
int files_given;
int (*sum_func) () = bsd_sum_file;
program_name = argv[0];
have_read_stdin = 0;
while ((optc = getopt_long (argc, argv, "rs", longopts, (int *) 0)) != -1)
{
switch (optc)
{
case 'r': /* For SysV compatibility. */
sum_func = bsd_sum_file;
break;
case 's':
sum_func = sysv_sum_file;
break;
case '?':
fprintf (stderr, "\
Usage: %s [-rs] [--sysv] [file...]\n", argv[0]);
exit (1);
}
}
files_given = argc - optind;
if (files_given == 0)
{
if ((*sum_func) ("-", files_given) < 0)
errors = 1;
}
else
for (; optind < argc; optind++)
if ((*sum_func) (argv[optind], files_given) < 0)
errors = 1;
if (have_read_stdin && fclose (stdin) == EOF)
error (1, errno, "-");
exit (errors);
}
/* Calculate and print the rotated checksum and the size in 1K blocks
of file FILE, or of the standard input if FILE is "-".
If PRINT_NAME is >1, print FILE next to the checksum and size.
The checksum varies depending on sizeof(int).
Return 0 if successful, -1 if an error occurs. */
int
bsd_sum_file (file, print_name)
char *file;
int print_name;
{
register FILE *fp;
register unsigned long checksum = 0; /* The checksum mod 2^16. */
register long total_bytes = 0; /* The number of bytes. */
register int ch; /* Each character read. */
if (!strcmp (file, "-"))
{
fp = stdin;
have_read_stdin = 1;
}
else
{
fp = fopen (file, "r");
if (fp == NULL)
{
error (0, errno, "%s", file);
return -1;
}
}
/* This algorithm seems to depend on sign extension in `ch' in order to
give the right results. Ick. */
while ((ch = getc (fp)) != EOF)
{
total_bytes++;
ROTATE_RIGHT (checksum);
checksum += ch;
checksum &= 0xffff; /* Keep it within bounds. */
}
if (ferror (fp))
{
error (0, errno, "%s", file);
if (strcmp (file, "-"))
fclose (fp);
return -1;
}
if (strcmp (file, "-") && fclose (fp) == EOF)
{
error (0, errno, "%s", file);
return -1;
}
printf ("%05lu %5ld", checksum, (total_bytes + 1024 - 1) / 1024);
if (print_name > 1)
printf (" %s", file);
putchar ('\n');
return 0;
}
/* Calculate and print the checksum and the size in 512-byte blocks
of file FILE, or of the standard input if FILE is "-".
If PRINT_NAME is >0, print FILE next to the checksum and size.
Return 0 if successful, -1 if an error occurs. */
int
sysv_sum_file (file, print_name)
char *file;
int print_name;
{
int fd;
unsigned char buf[8192];
register int bytes_read;
register unsigned long checksum = 0;
long total_bytes = 0;
if (!strcmp (file, "-"))
{
fd = 0;
have_read_stdin = 1;
}
else
{
fd = open (file, O_RDONLY);
if (fd == -1)
{
error (0, errno, "%s", file);
return -1;
}
}
while ((bytes_read = read (fd, buf, sizeof buf)) > 0)
{
register int i;
for (i = 0; i < bytes_read; i++)
checksum += buf[i];
total_bytes += bytes_read;
}
if (bytes_read < 0)
{
error (0, errno, "%s", file);
if (strcmp (file, "-"))
close (fd);
return -1;
}
if (strcmp (file, "-") && close (fd) == -1)
{
error (0, errno, "%s", file);
return -1;
}
printf ("%lu %ld", checksum % 0xffff, (total_bytes + 512 - 1) / 512);
if (print_name)
printf (" %s", file);
putchar ('\n');
return 0;
}

628
src/tac.c Normal file
View File

@@ -0,0 +1,628 @@
/* tac - concatenate and print files in reverse
Copyright (C) 1988, 1989, 1990, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Jay Lepreau (lepreau@cs.utah.edu).
GNU enhancements by David MacKenzie (djm@ai.mit.edu). */
/* Copy each FILE, or the standard input if none are given or when a
FILE name of "-" is encountered, to the standard output with the
order of the records reversed. The records are separated by
instances of a string, or a newline if none is given. By default, the
separator string is attached to the end of the record that it
follows in the file.
Options:
-b, --before The separator is attached to the beginning
of the record that it precedes in the file.
-r, --regex The separator is a regular expression.
-s, --separator=separator Use SEPARATOR as the record separator.
To reverse a file byte by byte, use (in bash, ksh, or sh):
tac -r -s '.\|
' file */
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include <signal.h>
#include <regex.h>
#include "system.h"
#ifndef STDC_HEADERS
char *malloc ();
char *realloc ();
#endif
/* The number of bytes per atomic read. */
#define INITIAL_READSIZE 8192
/* The number of bytes per atomic write. */
#define WRITESIZE 8192
char *mktemp ();
RETSIGTYPE cleanup ();
int tac ();
int tac_file ();
int tac_stdin ();
char *xmalloc ();
char *xrealloc ();
void output ();
void error ();
void save_stdin ();
void xwrite ();
/* The name this program was run with. */
char *program_name;
/* The string that separates the records of the file. */
char *separator;
/* If nonzero, print `separator' along with the record preceding it
in the file; otherwise with the record following it. */
int separator_ends_record;
/* 0 if `separator' is to be matched as a regular expression;
otherwise, the length of `separator', used as a sentinel to
stop the search. */
int sentinel_length;
/* The length of a match with `separator'. If `sentinel_length' is 0,
`match_length' is computed every time a match succeeds;
otherwise, it is simply the length of `separator'. */
int match_length;
/* The input buffer. */
char *buffer;
/* The number of bytes to read at once into `buffer'. */
unsigned read_size;
/* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
The extra 2 bytes allow `past_end' to have a value beyond the
end of `buffer' and `match_start' to run off the front of `buffer'. */
unsigned buffer_size;
/* The compiled regular expression representing `separator'. */
static struct re_pattern_buffer compiled_separator;
struct option longopts[] =
{
{"before", 0, &separator_ends_record, 0},
{"regex", 0, &sentinel_length, 0},
{"separator", 1, NULL, 's'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
char *error_message; /* Return value from re_compile_pattern. */
int optc, errors;
int have_read_stdin = 0;
program_name = argv[0];
errors = 0;
separator = "\n";
sentinel_length = 1;
separator_ends_record = 1;
while ((optc = getopt_long (argc, argv, "brs:", longopts, (int *) 0))
!= EOF)
{
switch (optc)
{
case 0:
break;
case 'b':
separator_ends_record = 0;
break;
case 'r':
sentinel_length = 0;
break;
case 's':
separator = optarg;
if (*separator == 0)
error (1, 0, "separator cannot be empty");
break;
default:
fprintf (stderr, "\
Usage: %s [-br] [-s separator] [--before] [--regex] [--separator=separator]\n\
[file...]\n",
program_name);
exit (1);
}
}
if (sentinel_length == 0)
{
compiled_separator.allocated = 100;
compiled_separator.buffer = (unsigned char *)
xmalloc (compiled_separator.allocated);
compiled_separator.fastmap = xmalloc (256);
compiled_separator.translate = 0;
error_message = re_compile_pattern (separator, strlen (separator),
&compiled_separator);
if (error_message)
error (1, 0, "%s", error_message);
}
else
match_length = sentinel_length = strlen (separator);
read_size = INITIAL_READSIZE;
/* A precaution that will probably never be needed. */
while (sentinel_length * 2 >= read_size)
read_size *= 2;
buffer_size = read_size * 2 + sentinel_length + 2;
buffer = xmalloc (buffer_size);
if (sentinel_length)
{
strcpy (buffer, separator);
buffer += sentinel_length;
}
else
++buffer;
if (optind == argc)
{
have_read_stdin = 1;
errors = tac_stdin ();
}
else
for (; optind < argc; ++optind)
{
if (strcmp (argv[optind], "-") == 0)
{
have_read_stdin = 1;
errors |= tac_stdin ();
}
else
errors |= tac_file (argv[optind]);
}
/* Flush the output buffer. */
output ((char *) NULL, (char *) NULL);
if (have_read_stdin && close (0) < 0)
error (1, errno, "-");
if (close (1) < 0)
error (1, errno, "write error");
exit (errors);
}
/* The name of a temporary file containing a copy of pipe input. */
char *tempfile;
/* Print the standard input in reverse, saving it to temporary
file `tempfile' first if it is a pipe.
Return 0 if ok, 1 if an error occurs. */
int
tac_stdin ()
{
/* Previous values of signal handlers. */
RETSIGTYPE (*sigint) (), (*sighup) (), (*sigpipe) (), (*sigterm) ();
int errors;
struct stat stats;
#ifdef _POSIX_VERSION
struct sigaction oldact, newact;
#endif /* _POSIX_VERSION */
/* No tempfile is needed for "tac < file".
Use fstat instead of checking for errno == ESPIPE because
lseek doesn't work on some special files but doesn't return an
error, either. */
if (fstat (0, &stats))
{
error (0, errno, "standard input");
return 1;
}
if (S_ISREG (stats.st_mode))
return tac (0, "standard input");
#ifdef _POSIX_VERSION
newact.sa_handler = cleanup;
sigemptyset (&newact.sa_mask);
newact.sa_flags = 0;
sigaction (SIGINT, NULL, &oldact);
sigint = oldact.sa_handler;
if (sigint != SIG_IGN)
sigaction (SIGINT, &newact, NULL);
sigaction (SIGHUP, NULL, &oldact);
sighup = oldact.sa_handler;
if (sighup != SIG_IGN)
sigaction (SIGHUP, &newact, NULL);
sigaction (SIGPIPE, NULL, &oldact);
sigpipe = oldact.sa_handler;
if (sigpipe != SIG_IGN)
sigaction (SIGPIPE, &newact, NULL);
sigaction (SIGTERM, NULL, &oldact);
sigterm = oldact.sa_handler;
if (sigterm != SIG_IGN)
sigaction (SIGTERM, &newact, NULL);
#else /* !_POSIX_VERSION */
sigint = signal (SIGINT, SIG_IGN);
if (sigint != SIG_IGN)
signal (SIGINT, cleanup);
sighup = signal (SIGHUP, SIG_IGN);
if (sighup != SIG_IGN)
signal (SIGHUP, cleanup);
sigpipe = signal (SIGPIPE, SIG_IGN);
if (sigpipe != SIG_IGN)
signal (SIGPIPE, cleanup);
sigterm = signal (SIGTERM, SIG_IGN);
if (sigterm != SIG_IGN)
signal (SIGTERM, cleanup);
#endif /* _POSIX_VERSION */
save_stdin ();
errors = tac_file (tempfile);
unlink (tempfile);
#ifdef _POSIX_VERSION
newact.sa_handler = sigint;
sigaction (SIGINT, &newact, NULL);
newact.sa_handler = sighup;
sigaction (SIGHUP, &newact, NULL);
newact.sa_handler = sigterm;
sigaction (SIGTERM, &newact, NULL);
newact.sa_handler = sigpipe;
sigaction (SIGPIPE, &newact, NULL);
#else /* !_POSIX_VERSION */
signal (SIGINT, sigint);
signal (SIGHUP, sighup);
signal (SIGTERM, sigterm);
signal (SIGPIPE, sigpipe);
#endif /* _POSIX_VERSION */
return errors;
}
/* Make a copy of the standard input in `tempfile'. */
void
save_stdin ()
{
static char *template = NULL;
static char *tempdir;
int fd;
int bytes_read;
if (template == NULL)
{
tempdir = getenv ("TMPDIR");
if (tempdir == NULL)
tempdir = "/tmp";
template = xmalloc (strlen (tempdir) + 11);
}
sprintf (template, "%s/tacXXXXXX", tempdir);
tempfile = mktemp (template);
fd = creat (tempfile, 0600);
if (fd == -1)
{
error (0, errno, "%s", tempfile);
cleanup ();
}
while ((bytes_read = read (0, buffer, read_size)) > 0)
if (write (fd, buffer, bytes_read) != bytes_read)
{
error (0, errno, "%s", tempfile);
cleanup ();
}
if (close (fd) < 0)
{
error (0, errno, "%s", tempfile);
cleanup ();
}
if (bytes_read == -1)
{
error (0, errno, "read error");
cleanup ();
}
}
/* Print FILE in reverse.
Return 0 if ok, 1 if an error occurs. */
int
tac_file (file)
char *file;
{
int fd, errors;
fd = open (file, 0);
if (fd == -1)
{
error (0, errno, "%s", file);
return 1;
}
errors = tac (fd, file);
if (close (fd) < 0)
{
error (0, errno, "%s", file);
return 1;
}
return errors;
}
/* Print in reverse the file open on descriptor FD for reading FILE.
Return 0 if ok, 1 if an error occurs. */
int
tac (fd, file)
int fd;
char *file;
{
/* Pointer to the location in `buffer' where the search for
the next separator will begin. */
char *match_start;
/* Pointer to one past the rightmost character in `buffer' that
has not been printed yet. */
char *past_end;
unsigned saved_record_size; /* Length of the record growing in `buffer'. */
off_t file_pos; /* Offset in the file of the next read. */
/* Nonzero if `output' has not been called yet for any file.
Only used when the separator is attached to the preceding record. */
int first_time = 1;
char first_char = *separator; /* Speed optimization, non-regexp. */
char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
struct re_registers regs;
/* Find the size of the input file. */
file_pos = lseek (fd, (off_t) 0, SEEK_END);
if (file_pos < 1)
return 0; /* It's an empty file. */
/* Arrange for the first read to lop off enough to leave the rest of the
file a multiple of `read_size'. Since `read_size' can change, this may
not always hold during the program run, but since it usually will, leave
it here for i/o efficiency (page/sector boundaries and all that).
Note: the efficiency gain has not been verified. */
saved_record_size = file_pos % read_size;
if (saved_record_size == 0)
saved_record_size = read_size;
file_pos -= saved_record_size;
/* `file_pos' now points to the start of the last (probably partial) block
in the input file. */
lseek (fd, file_pos, SEEK_SET);
if (read (fd, buffer, saved_record_size) != saved_record_size)
{
error (0, 1, "%s", file);
return 1;
}
match_start = past_end = buffer + saved_record_size;
/* For non-regexp search, move past impossible positions for a match. */
if (sentinel_length)
match_start -= match_length1;
for (;;)
{
/* Search backward from `match_start' - 1 to `buffer' for a match
with `separator'; for speed, use strncmp if `separator' contains no
metacharacters.
If the match succeeds, set `match_start' to point to the start of
the match and `match_length' to the length of the match.
Otherwise, make `match_start' < `buffer'. */
if (sentinel_length == 0)
{
int i = match_start - buffer;
int ret;
ret = re_search (&compiled_separator, buffer, i, i - 1, -i, &regs);
if (ret == -1)
match_start = buffer - 1;
else if (ret == -2)
{
error (0, 0, "error in regular expression search");
cleanup ();
}
else
{
match_start = buffer + regs.start[0];
match_length = regs.end[0] - regs.start[0];
}
}
else
{
/* `match_length' is constant for non-regexp boundaries. */
while (*--match_start != first_char
|| (match_length1 && strncmp (match_start + 1, separator1,
match_length1)))
/* Do nothing. */ ;
}
/* Check whether we backed off the front of `buffer' without finding
a match for `separator'. */
if (match_start < buffer)
{
if (file_pos == 0)
{
/* Hit the beginning of the file; print the remaining record. */
output (buffer, past_end);
return 0;
}
saved_record_size = past_end - buffer;
if (saved_record_size > read_size)
{
/* `buffer_size' is about twice `read_size', so since
we want to read in another `read_size' bytes before
the data already in `buffer', we need to increase
`buffer_size'. */
char *newbuffer;
int offset = sentinel_length ? sentinel_length : 1;
read_size *= 2;
buffer_size = read_size * 2 + sentinel_length + 2;
newbuffer = xrealloc (buffer - offset, buffer_size) + offset;
/* Adjust the pointers for the new buffer location. */
match_start += newbuffer - buffer;
past_end += newbuffer - buffer;
buffer = newbuffer;
}
/* Back up to the start of the next bufferfull of the file. */
if (file_pos >= read_size)
file_pos -= read_size;
else
{
read_size = file_pos;
file_pos = 0;
}
lseek (fd, file_pos, SEEK_SET);
/* Shift the pending record data right to make room for the new. */
bcopy (buffer, buffer + read_size, saved_record_size);
past_end = buffer + read_size + saved_record_size;
/* For non-regexp searches, avoid unneccessary scanning. */
if (sentinel_length)
match_start = buffer + read_size;
else
match_start = past_end;
if (read (fd, buffer, read_size) != read_size)
{
error (0, errno, "%s", file);
return 1;
}
}
else
{
/* Found a match of `separator'. */
if (separator_ends_record)
{
char *match_end = match_start + match_length;
/* If this match of `separator' isn't at the end of the
file, print the record. */
if (first_time == 0 || match_end != past_end)
output (match_end, past_end);
past_end = match_end;
first_time = 0;
}
else
{
output (match_start, past_end);
past_end = match_start;
}
match_start -= match_length - 1;
}
}
}
/* Print the characters from START to PAST_END - 1.
If START is NULL, just flush the buffer. */
void
output (start, past_end)
char *start;
char *past_end;
{
static char buffer[WRITESIZE];
static int bytes_in_buffer = 0;
int bytes_to_add = past_end - start;
int bytes_available = WRITESIZE - bytes_in_buffer;
if (start == 0)
{
xwrite (1, buffer, bytes_in_buffer);
bytes_in_buffer = 0;
return;
}
/* Write out as many full buffers as possible. */
while (bytes_to_add >= bytes_available)
{
bcopy (start, buffer + bytes_in_buffer, bytes_available);
bytes_to_add -= bytes_available;
start += bytes_available;
xwrite (1, buffer, WRITESIZE);
bytes_in_buffer = 0;
bytes_available = WRITESIZE;
}
bcopy (start, buffer + bytes_in_buffer, bytes_to_add);
bytes_in_buffer += bytes_to_add;
}
RETSIGTYPE
cleanup ()
{
unlink (tempfile);
exit (1);
}
void
xwrite (desc, buffer, size)
int desc;
char *buffer;
int size;
{
if (write (desc, buffer, size) != size)
{
error (0, errno, "write error");
cleanup ();
}
}
/* Allocate N bytes of memory dynamically, with error checking. */
char *
xmalloc (n)
unsigned n;
{
char *p;
p = malloc (n);
if (p == 0)
{
error (0, 0, "virtual memory exhausted");
cleanup ();
}
return p;
}
/* Change the size of memory area P to N bytes, with error checking. */
char *
xrealloc (p, n)
char *p;
unsigned n;
{
p = realloc (p, n);
if (p == 0)
{
error (0, 0, "virtual memory exhausted");
cleanup ();
}
return p;
}

858
src/tail.c Normal file
View File

@@ -0,0 +1,858 @@
/* tail -- output last part of file(s)
Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Can display any amount of data, unlike the Unix version, which uses
a fixed size buffer and therefore can only deliver a limited number
of lines.
Options:
-b Tail by N 512-byte blocks.
-c, --bytes=N[bkm] Tail by N bytes
[or 512-byte blocks, kilobytes, or megabytes].
-f, --follow Loop forever trying to read more characters at the
end of the file, on the assumption that the file
is growing. Ignored if reading from a pipe.
Cannot be used if more than one file is given.
-k Tail by N kilobytes.
-N, -l, -n, --lines=N Tail by N lines.
-m Tail by N megabytes.
-q, --quiet, --silent Never print filename headers.
-v, --verbose Always print filename headers.
If a number (N) starts with a `+', begin printing with the Nth item
from the start of each file, instead of from the end.
Reads from standard input if no files are given or when a filename of
``-'' is encountered.
By default, filename headers are printed only more than one file
is given.
By default, prints the last 10 lines (tail -n 10).
Original version by Paul Rubin <phr@ocf.berkeley.edu>.
Extensions by David MacKenzie <djm@ai.mit.edu>. */
#include <stdio.h>
#include <getopt.h>
#include <ctype.h>
#include <sys/types.h>
#include "system.h"
#ifdef isascii
#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
#else
#define ISDIGIT(c) (isdigit ((c)))
#endif
/* Number of items to tail. */
#define DEFAULT_NUMBER 10
/* Size of atomic reads. */
#define BUFSIZE (512 * 8)
/* Number of bytes per item we are printing.
If 0, tail in lines. */
int unit_size;
/* If nonzero, read from end of file until killed. */
int forever;
/* If nonzero, count from start of file instead of end. */
int from_start;
/* If nonzero, print filename headers. */
int print_headers;
/* When to print the filename banners. */
enum header_mode
{
multiple_files, always, never
};
char *xmalloc ();
int file_lines ();
int pipe_bytes ();
int pipe_lines ();
int start_bytes ();
int start_lines ();
int tail ();
int tail_bytes ();
int tail_file ();
int tail_lines ();
long atou();
void dump_remainder ();
void error ();
void parse_unit ();
void usage ();
void write_header ();
void xwrite ();
/* The name this program was run with. */
char *program_name;
/* Nonzero if we have ever read standard input. */
int have_read_stdin;
struct option long_options[] =
{
{"bytes", 1, NULL, 'c'},
{"follow", 0, NULL, 'f'},
{"lines", 1, NULL, 'n'},
{"quiet", 0, NULL, 'q'},
{"silent", 0, NULL, 'q'},
{"verbose", 0, NULL, 'v'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
enum header_mode header_mode = multiple_files;
int exit_status = 0;
/* If from_start, the number of items to skip before printing; otherwise,
the number of items at the end of the file to print. Initially, -1
means the value has not been set. */
long number = -1;
int c; /* Option character. */
program_name = argv[0];
have_read_stdin = 0;
unit_size = 0;
forever = from_start = print_headers = 0;
if (argc > 1
&& ((argv[1][0] == '-' && ISDIGIT (argv[1][1]))
|| (argv[1][0] == '+' && (ISDIGIT (argv[1][1]) || argv[1][1] == 0))))
{
/* Old option syntax: a dash or plus, one or more digits (zero digits
are acceptable with a plus), and one or more option letters. */
if (argv[1][0] == '+')
from_start = 1;
if (argv[1][1] != 0)
{
for (number = 0, ++argv[1]; ISDIGIT (*argv[1]); ++argv[1])
number = number * 10 + *argv[1] - '0';
/* Parse any appended option letters. */
while (*argv[1])
{
switch (*argv[1])
{
case 'b':
unit_size = 512;
break;
case 'c':
unit_size = 1;
break;
case 'f':
forever = 1;
break;
case 'k':
unit_size = 1024;
break;
case 'l':
unit_size = 0;
break;
case 'm':
unit_size = 1048576;
break;
case 'q':
header_mode = never;
break;
case 'v':
header_mode = always;
break;
default:
error (0, 0, "unrecognized option `-%c'", *argv[1]);
usage ();
}
++argv[1];
}
}
/* Make the options we just parsed invisible to getopt. */
argv[1] = argv[0];
argv++;
argc--;
}
while ((c = getopt_long (argc, argv, "c:n:fqv", long_options, (int *) 0))
!= EOF)
{
switch (c)
{
case 'c':
unit_size = 1;
parse_unit (optarg);
goto getnum;
case 'n':
unit_size = 0;
getnum:
if (*optarg == '+')
{
from_start = 1;
++optarg;
}
else if (*optarg == '-')
++optarg;
number = atou (optarg);
if (number == -1)
error (1, 0, "invalid number `%s'", optarg);
break;
case 'f':
forever = 1;
break;
case 'q':
header_mode = never;
break;
case 'v':
header_mode = always;
break;
default:
usage ();
}
}
if (number == -1)
number = DEFAULT_NUMBER;
/* To start printing with item `number' from the start of the file, skip
`number' - 1 items. `tail +0' is actually meaningless, but for Unix
compatibility it's treated the same as `tail +1'. */
if (from_start)
{
if (number)
--number;
}
if (unit_size > 1)
number *= unit_size;
if (optind < argc - 1 && forever)
error (1, 0, "cannot follow the ends of multiple files");
if (header_mode == always
|| (header_mode == multiple_files && optind < argc - 1))
print_headers = 1;
if (optind == argc)
exit_status |= tail_file ("-", number);
for (; optind < argc; ++optind)
exit_status |= tail_file (argv[optind], number);
if (have_read_stdin && close (0) < 0)
error (1, errno, "-");
if (close (1) < 0)
error (1, errno, "write error");
exit (exit_status);
}
/* Display the last NUMBER units of file FILENAME.
"-" for FILENAME means the standard input.
Return 0 if successful, 1 if an error occurred. */
int
tail_file (filename, number)
char *filename;
long number;
{
int fd;
if (!strcmp (filename, "-"))
{
have_read_stdin = 1;
filename = "standard input";
if (print_headers)
write_header (filename);
return tail (filename, 0, number);
}
else
{
fd = open (filename, O_RDONLY);
if (fd >= 0)
{
int errors;
if (print_headers)
write_header (filename);
errors = tail (filename, fd, number);
if (close (fd) == 0)
return errors;
}
error (0, errno, "%s", filename);
return 1;
}
}
void
write_header (filename)
char *filename;
{
static int first_file = 1;
if (first_file)
{
xwrite (1, "==> ", 4);
first_file = 0;
}
else
xwrite (1, "\n==> ", 5);
xwrite (1, filename, strlen (filename));
xwrite (1, " <==\n", 5);
}
/* Display the last NUMBER units of file FILENAME, open for reading
in FD.
Return 0 if successful, 1 if an error occurred. */
int
tail (filename, fd, number)
char *filename;
int fd;
long number;
{
if (unit_size)
return tail_bytes (filename, fd, number);
else
return tail_lines (filename, fd, number);
}
/* Display the last part of file FILENAME, open for reading in FD,
using NUMBER characters.
Return 0 if successful, 1 if an error occurred. */
int
tail_bytes (filename, fd, number)
char *filename;
int fd;
long number;
{
struct stat stats;
/* Use fstat instead of checking for errno == ESPIPE because
lseek doesn't work on some special files but doesn't return an
error, either. */
if (fstat (fd, &stats))
{
error (0, errno, "%s", filename);
return 1;
}
if (from_start)
{
if (S_ISREG (stats.st_mode))
lseek (fd, number, SEEK_SET);
else if (start_bytes (filename, fd, number))
return 1;
dump_remainder (filename, fd);
}
else
{
if (S_ISREG (stats.st_mode))
{
if (lseek (fd, 0L, SEEK_END) <= number)
/* The file is shorter than we want, or just the right size, so
print the whole file. */
lseek (fd, 0L, SEEK_SET);
else
/* The file is longer than we want, so go back. */
lseek (fd, -number, SEEK_END);
dump_remainder (filename, fd);
}
else
return pipe_bytes (filename, fd, number);
}
return 0;
}
/* Display the last part of file FILENAME, open for reading on FD,
using NUMBER lines.
Return 0 if successful, 1 if an error occurred. */
int
tail_lines (filename, fd, number)
char *filename;
int fd;
long number;
{
struct stat stats;
long length;
if (fstat (fd, &stats))
{
error (0, errno, "%s", filename);
return 1;
}
if (from_start)
{
if (start_lines (filename, fd, number))
return 1;
dump_remainder (filename, fd);
}
else
{
if (S_ISREG (stats.st_mode))
{
length = lseek (fd, 0L, SEEK_END);
if (length != 0 && file_lines (filename, fd, number, length))
return 1;
dump_remainder (filename, fd);
}
else
return pipe_lines (filename, fd, number);
}
return 0;
}
/* Print the last NUMBER lines from the end of file FD.
Go backward through the file, reading `BUFSIZE' bytes at a time (except
probably the first), until we hit the start of the file or have
read NUMBER newlines.
POS starts out as the length of the file (the offset of the last
byte of the file + 1).
Return 0 if successful, 1 if an error occurred. */
int
file_lines (filename, fd, number, pos)
char *filename;
int fd;
long number;
long pos;
{
char buffer[BUFSIZE];
int bytes_read;
int i; /* Index into `buffer' for scanning. */
if (number == 0)
return 0;
/* Set `bytes_read' to the size of the last, probably partial, buffer;
0 < `bytes_read' <= `BUFSIZE'. */
bytes_read = pos % BUFSIZE;
if (bytes_read == 0)
bytes_read = BUFSIZE;
/* Make `pos' a multiple of `BUFSIZE' (0 if the file is short), so that all
reads will be on block boundaries, which might increase efficiency. */
pos -= bytes_read;
lseek (fd, pos, SEEK_SET);
bytes_read = read (fd, buffer, bytes_read);
if (bytes_read == -1)
{
error (0, errno, "%s", filename);
return 1;
}
/* Count the incomplete line on files that don't end with a newline. */
if (bytes_read && buffer[bytes_read - 1] != '\n')
--number;
do
{
/* Scan backward, counting the newlines in this bufferfull. */
for (i = bytes_read - 1; i >= 0; i--)
{
/* Have we counted the requested number of newlines yet? */
if (buffer[i] == '\n' && number-- == 0)
{
/* If this newline wasn't the last character in the buffer,
print the text after it. */
if (i != bytes_read - 1)
xwrite (1, &buffer[i + 1], bytes_read - (i + 1));
return 0;
}
}
/* Not enough newlines in that bufferfull. */
if (pos == 0)
{
/* Not enough lines in the file; print the entire file. */
lseek (fd, 0L, SEEK_SET);
return 0;
}
pos -= BUFSIZE;
lseek (fd, pos, SEEK_SET);
}
while ((bytes_read = read (fd, buffer, BUFSIZE)) > 0);
if (bytes_read == -1)
{
error (0, errno, "%s", filename);
return 1;
}
return 0;
}
/* Print the last NUMBER lines from the end of the standard input,
open for reading as pipe FD.
Buffer the text as a linked list of LBUFFERs, adding them as needed.
Return 0 if successful, 1 if an error occured. */
int
pipe_lines (filename, fd, number)
char *filename;
int fd;
long number;
{
struct linebuffer
{
int nbytes, nlines;
char buffer[BUFSIZE];
struct linebuffer *next;
};
typedef struct linebuffer LBUFFER;
LBUFFER *first, *last, *tmp;
int i; /* Index into buffers. */
int total_lines = 0; /* Total number of newlines in all buffers. */
int errors = 0;
first = last = (LBUFFER *) xmalloc (sizeof (LBUFFER));
first->nbytes = first->nlines = 0;
first->next = NULL;
tmp = (LBUFFER *) xmalloc (sizeof (LBUFFER));
/* Input is always read into a fresh buffer. */
while ((tmp->nbytes = read (fd, tmp->buffer, BUFSIZE)) > 0)
{
tmp->nlines = 0;
tmp->next = NULL;
/* Count the number of newlines just read. */
for (i = 0; i < tmp->nbytes; i++)
if (tmp->buffer[i] == '\n')
++tmp->nlines;
total_lines += tmp->nlines;
/* If there is enough room in the last buffer read, just append the new
one to it. This is because when reading from a pipe, `nbytes' can
often be very small. */
if (tmp->nbytes + last->nbytes < BUFSIZE)
{
bcopy (tmp->buffer, &last->buffer[last->nbytes], tmp->nbytes);
last->nbytes += tmp->nbytes;
last->nlines += tmp->nlines;
}
else
{
/* If there's not enough room, link the new buffer onto the end of
the list, then either free up the oldest buffer for the next
read if that would leave enough lines, or else malloc a new one.
Some compaction mechanism is possible but probably not
worthwhile. */
last = last->next = tmp;
if (total_lines - first->nlines > number)
{
tmp = first;
total_lines -= first->nlines;
first = first->next;
}
else
tmp = (LBUFFER *) xmalloc (sizeof (LBUFFER));
}
}
if (tmp->nbytes == -1)
{
error (0, errno, "%s", filename);
errors = 1;
free ((char *) tmp);
goto free_lbuffers;
}
free ((char *) tmp);
/* This prevents a core dump when the pipe contains no newlines. */
if (number == 0)
goto free_lbuffers;
/* Count the incomplete line on files that don't end with a newline. */
if (last->buffer[last->nbytes - 1] != '\n')
{
++last->nlines;
++total_lines;
}
/* Run through the list, printing lines. First, skip over unneeded
buffers. */
for (tmp = first; total_lines - tmp->nlines > number; tmp = tmp->next)
total_lines -= tmp->nlines;
/* Find the correct beginning, then print the rest of the file. */
if (total_lines > number)
{
char *cp;
/* Skip `total_lines' - `number' newlines. We made sure that
`total_lines' - `number' <= `tmp->nlines'. */
cp = tmp->buffer;
for (i = total_lines - number; i; --i)
while (*cp++ != '\n')
/* Do nothing. */ ;
i = cp - tmp->buffer;
}
else
i = 0;
xwrite (1, &tmp->buffer[i], tmp->nbytes - i);
for (tmp = tmp->next; tmp; tmp = tmp->next)
xwrite (1, tmp->buffer, tmp->nbytes);
free_lbuffers:
while (first)
{
tmp = first->next;
free ((char *) first);
first = tmp;
}
return errors;
}
/* Print the last NUMBER characters from the end of pipe FD.
This is a stripped down version of pipe_lines.
Return 0 if successful, 1 if an error occurred. */
int
pipe_bytes (filename, fd, number)
char *filename;
int fd;
long number;
{
struct charbuffer
{
int nbytes;
char buffer[BUFSIZE];
struct charbuffer *next;
};
typedef struct charbuffer CBUFFER;
CBUFFER *first, *last, *tmp;
int i; /* Index into buffers. */
int total_bytes = 0; /* Total characters in all buffers. */
int errors = 0;
first = last = (CBUFFER *) xmalloc (sizeof (CBUFFER));
first->nbytes = 0;
first->next = NULL;
tmp = (CBUFFER *) xmalloc (sizeof (CBUFFER));
/* Input is always read into a fresh buffer. */
while ((tmp->nbytes = read (fd, tmp->buffer, BUFSIZE)) > 0)
{
tmp->next = NULL;
total_bytes += tmp->nbytes;
/* If there is enough room in the last buffer read, just append the new
one to it. This is because when reading from a pipe, `nbytes' can
often be very small. */
if (tmp->nbytes + last->nbytes < BUFSIZE)
{
bcopy (tmp->buffer, &last->buffer[last->nbytes], tmp->nbytes);
last->nbytes += tmp->nbytes;
}
else
{
/* If there's not enough room, link the new buffer onto the end of
the list, then either free up the oldest buffer for the next
read if that would leave enough characters, or else malloc a new
one. Some compaction mechanism is possible but probably not
worthwhile. */
last = last->next = tmp;
if (total_bytes - first->nbytes > number)
{
tmp = first;
total_bytes -= first->nbytes;
first = first->next;
}
else
{
tmp = (CBUFFER *) xmalloc (sizeof (CBUFFER));
}
}
}
if (tmp->nbytes == -1)
{
error (0, errno, "%s", filename);
errors = 1;
free ((char *) tmp);
goto free_cbuffers;
}
free ((char *) tmp);
/* Run through the list, printing characters. First, skip over unneeded
buffers. */
for (tmp = first; total_bytes - tmp->nbytes > number; tmp = tmp->next)
total_bytes -= tmp->nbytes;
/* Find the correct beginning, then print the rest of the file.
We made sure that `total_bytes' - `number' <= `tmp->nbytes'. */
if (total_bytes > number)
i = total_bytes - number;
else
i = 0;
xwrite (1, &tmp->buffer[i], tmp->nbytes - i);
for (tmp = tmp->next; tmp; tmp = tmp->next)
xwrite (1, tmp->buffer, tmp->nbytes);
free_cbuffers:
while (first)
{
tmp = first->next;
free ((char *) first);
first = tmp;
}
return errors;
}
/* Skip NUMBER characters from the start of pipe FD, and print
any extra characters that were read beyond that.
Return 1 on error, 0 if ok. */
int
start_bytes (filename, fd, number)
char *filename;
int fd;
long number;
{
char buffer[BUFSIZE];
int bytes_read = 0;
while (number > 0 && (bytes_read = read (fd, buffer, BUFSIZE)) > 0)
number -= bytes_read;
if (bytes_read == -1)
{
error (0, errno, "%s", filename);
return 1;
}
else if (number < 0)
xwrite (1, &buffer[bytes_read + number], -number);
return 0;
}
/* Skip NUMBER lines at the start of file or pipe FD, and print
any extra characters that were read beyond that.
Return 1 on error, 0 if ok. */
int
start_lines (filename, fd, number)
char *filename;
int fd;
long number;
{
char buffer[BUFSIZE];
int bytes_read = 0;
int bytes_to_skip = 0;
while (number && (bytes_read = read (fd, buffer, BUFSIZE)) > 0)
{
bytes_to_skip = 0;
while (bytes_to_skip < bytes_read)
if (buffer[bytes_to_skip++] == '\n' && --number == 0)
break;
}
if (bytes_read == -1)
{
error (0, errno, "%s", filename);
return 1;
}
else if (bytes_to_skip < bytes_read)
xwrite (1, &buffer[bytes_to_skip], bytes_read - bytes_to_skip);
return 0;
}
/* Display file FILENAME from the current position in FD
to the end. If `forever' is nonzero, keep reading from the
end of the file until killed. */
void
dump_remainder (filename, fd)
char *filename;
int fd;
{
char buffer[BUFSIZE];
int bytes_read;
output:
while ((bytes_read = read (fd, buffer, BUFSIZE)) > 0)
xwrite (1, buffer, bytes_read);
if (bytes_read == -1)
error (1, errno, "%s", filename);
if (forever)
{
sleep (1);
goto output;
}
}
void
parse_unit (str)
char *str;
{
int arglen = strlen (str);
if (arglen == 0)
return;
switch (str[arglen - 1])
{
case 'b':
unit_size = 512;
str[arglen - 1] = '\0';
break;
case 'k':
unit_size = 1024;
str[arglen - 1] = '\0';
break;
case 'm':
unit_size = 1048576;
str[arglen - 1] = '\0';
break;
}
}
/* Convert STR, a string of ASCII digits, into an unsigned integer.
Return -1 if STR does not represent a valid unsigned integer. */
long
atou (str)
char *str;
{
unsigned long value;
for (value = 0; ISDIGIT (*str); ++str)
value = value * 10 + *str - '0';
return *str ? -1 : value;
}
void
usage ()
{
fprintf (stderr, "\
Usage: %s [-c [+]N[bkm]] [-n [+]N] [-fqv] [--bytes=[+]N[bkm]] [--lines=[+]N]\n\
[--follow] [--quiet] [--silent] [--verbose] [file...]\n\
%s [{-,+}Nbcfklmqv] [file...]\n", program_name, program_name);
exit (1);
}

1813
src/tr.c Normal file

File diff suppressed because it is too large Load Diff

432
src/unexpand.c Normal file
View File

@@ -0,0 +1,432 @@
/* unexpand - convert spaces to tabs
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* By default, convert only maximal strings of initial blanks and tabs
into tabs.
Preserves backspace characters in the output; they decrement the
column count for tab calculations.
The default action is equivalent to -8.
Options:
--tabs=tab1[,tab2[,...]]
-t tab1[,tab2[,...]]
-tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
spaces apart instead of the default 8. Otherwise,
set the tabs at columns tab1, tab2, etc. (numbered from
0); replace any tabs beyond the tabstops given with
single spaces.
--all
-a Use tabs wherever they would replace 2 or more spaces,
not just at the beginnings of lines.
David MacKenzie <djm@ai.mit.edu> */
#define _GNU_SOURCE
#include <ctype.h>
#ifndef isblank
#define isblank(c) ((c) == ' ' || (c) == '\t')
#endif
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
#ifdef isascii
#define ISDIGIT(c) (isascii((c)) && isdigit((c)))
#else
#define ISDIGIT(c) (isdigit((c)))
#endif
/* The number of bytes added at a time to the amount of memory
allocated for the output line. */
#define OUTPUT_BLOCK 256
/* The number of bytes added at a time to the amount of memory
allocated for the list of tabstops. */
#define TABLIST_BLOCK 256
char *xmalloc ();
char *xrealloc ();
void error ();
FILE *next_file ();
void add_tabstop ();
void parse_tabstops ();
void unexpand ();
void usage ();
void validate_tabstops ();
/* If nonzero, convert blanks even after nonblank characters have been
read on the line. */
int convert_entire_line;
/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
int tab_size;
/* Array of the explicit column numbers of the tab stops;
after `tab_list' is exhausted, the rest of the line is printed
unchanged. The first column is column 0. */
int *tab_list;
/* The index of the first invalid element of `tab_list',
where the next element can be added. */
int first_free_tab;
/* Null-terminated array of input filenames. */
char **file_list;
/* Default for `file_list' if no files are given on the command line. */
char *stdin_argv[] =
{
"-", NULL
};
/* Nonzero if we have ever read standard input. */
int have_read_stdin;
/* Status to return to the system. */
int exit_status;
/* The name this program was run with. */
char *program_name;
struct option longopts[] =
{
{"tabs", 1, NULL, 't'},
{"all", 0, NULL, 'a'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
int tabval = -1; /* Value of tabstop being read, or -1. */
int c; /* Option character. */
have_read_stdin = 0;
exit_status = 0;
convert_entire_line = 0;
tab_list = NULL;
first_free_tab = 0;
program_name = argv[0];
while ((c = getopt_long (argc, argv, "at:,0123456789", longopts, (int *) 0))
!= EOF)
{
switch (c)
{
case '?':
usage ();
case 'a':
convert_entire_line = 1;
break;
case 't':
convert_entire_line = 1;
parse_tabstops (optarg);
break;
case ',':
add_tabstop (tabval);
tabval = -1;
break;
default:
if (tabval == -1)
tabval = 0;
tabval = tabval * 10 + c - '0';
break;
}
}
add_tabstop (tabval);
validate_tabstops (tab_list, first_free_tab);
if (first_free_tab == 0)
tab_size = 8;
else if (first_free_tab == 1)
tab_size = tab_list[0];
else
tab_size = 0;
if (optind == argc)
file_list = stdin_argv;
else
file_list = &argv[optind];
unexpand ();
if (have_read_stdin && fclose (stdin) == EOF)
error (1, errno, "-");
if (fclose (stdout) == EOF)
error (1, errno, "write error");
exit (exit_status);
}
/* Add the comma or blank separated list of tabstops STOPS
to the list of tabstops. */
void
parse_tabstops (stops)
char *stops;
{
int tabval = -1;
for (; *stops; stops++)
{
if (*stops == ',' || isblank (*stops))
{
add_tabstop (tabval);
tabval = -1;
}
else if (ISDIGIT (*stops))
{
if (tabval == -1)
tabval = 0;
tabval = tabval * 10 + *stops - '0';
}
else
error (1, 0, "tab size contains an invalid character");
}
add_tabstop (tabval);
}
/* Add tab stop TABVAL to the end of `tab_list', except
if TABVAL is -1, do nothing. */
void
add_tabstop (tabval)
int tabval;
{
if (tabval == -1)
return;
if (first_free_tab % TABLIST_BLOCK == 0)
tab_list = (int *) xrealloc (tab_list, first_free_tab + TABLIST_BLOCK);
tab_list[first_free_tab++] = tabval;
}
/* Check that the list of tabstops TABS, with ENTRIES entries,
contains only nonzero, ascending values. */
void
validate_tabstops (tabs, entries)
int *tabs;
int entries;
{
int prev_tab = 0;
int i;
for (i = 0; i < entries; i++)
{
if (tabs[i] == 0)
error (1, 0, "tab size cannot be 0");
if (tabs[i] <= prev_tab)
error (1, 0, "tab sizes must be ascending");
prev_tab = tabs[i];
}
}
/* Change spaces to tabs, writing to stdout.
Read each file in `file_list', in order. */
void
unexpand ()
{
FILE *fp; /* Input stream. */
int c; /* Each input character. */
/* Index in `tab_list' of next tabstop: */
int tab_index = 0; /* For calculating width of pending tabs. */
int print_tab_index = 0; /* For printing as many tabs as possible. */
int column = 0; /* Column on screen of next char. */
int next_tab_column; /* Column the next tab stop is on. */
int convert = 1; /* If nonzero, perform translations. */
int pending = 0; /* Pending columns of blanks. */
fp = next_file ((FILE *) NULL);
for (;;)
{
c = getc (fp);
if (c == EOF)
{
fp = next_file (fp);
if (fp == NULL)
break; /* No more files. */
else
continue;
}
if (c == ' ' && convert)
{
++pending;
++column;
}
else if (c == '\t' && convert)
{
if (tab_size == 0)
{
/* Do not let tab_index == first_free_tab;
stop when it is 1 less. */
while (tab_index < first_free_tab - 1
&& column >= tab_list[tab_index])
tab_index++;
next_tab_column = tab_list[tab_index];
if (tab_index < first_free_tab - 1)
tab_index++;
if (column >= next_tab_column)
{
convert = 0; /* Ran out of tab stops. */
goto flush_pend;
}
}
else
{
next_tab_column = column + tab_size - column % tab_size;
}
pending += next_tab_column - column;
column = next_tab_column;
}
else
{
flush_pend:
/* Flush pending spaces. Print as many tabs as possible,
then print the rest as spaces. */
if (pending == 1)
{
putchar (' ');
pending = 0;
}
column -= pending;
while (pending != 0)
{
if (tab_size == 0)
{
/* Do not let tab_index == first_free_tab;
stop when it is 1 less. */
while (tab_index < first_free_tab - 1
&& column >= tab_list[tab_index])
print_tab_index++;
next_tab_column = tab_list[print_tab_index];
if (print_tab_index < first_free_tab - 1)
print_tab_index++;
}
else
{
next_tab_column = column + tab_size - column % tab_size;
}
if (next_tab_column - column <= pending)
{
putchar ('\t');
pending -= next_tab_column - column;
column = next_tab_column;
}
else
{
--print_tab_index;
column += pending;
while (pending != 0)
{
putchar (' ');
pending--;
}
}
}
if (convert)
{
if (c == '\b')
{
if (column > 0)
--column;
}
else
{
++column;
if (convert_entire_line == 0)
convert = 0;
}
}
putchar (c);
if (c == '\n')
{
tab_index = print_tab_index = 0;
column = pending = 0;
convert = 1;
}
}
}
}
/* Close the old stream pointer FP if it is non-NULL,
and return a new one opened to read the next input file.
Open a filename of `-' as the standard input.
Return NULL if there are no more input files. */
FILE *
next_file (fp)
FILE *fp;
{
static char *prev_file;
char *file;
if (fp)
{
if (ferror (fp))
{
error (0, errno, "%s", prev_file);
exit_status = 1;
}
if (fp == stdin)
clearerr (fp); /* Also clear EOF. */
else if (fclose (fp) == EOF)
{
error (0, errno, "%s", prev_file);
exit_status = 1;
}
}
while ((file = *file_list++) != NULL)
{
if (file[0] == '-' && file[1] == '\0')
{
have_read_stdin = 1;
prev_file = file;
return stdin;
}
fp = fopen (file, "r");
if (fp)
{
prev_file = file;
return fp;
}
error (0, errno, "%s", file);
exit_status = 1;
}
return NULL;
}
void
usage ()
{
fprintf (stderr, "\
Usage: %s [-tab1[,tab2[,...]]] [-t tab1[,tab2[,...]]] [-a]\n\
[--tabs=tab1[,tab2[,...]]] [--all] [file...]\n",
program_name);
exit (1);
}

321
src/uniq.c Normal file
View File

@@ -0,0 +1,321 @@
/* uniq -- remove duplicate lines from a sorted file
Copyright (C) 1986, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Richard Stallman and David MacKenzie. */
#define _GNU_SOURCE
#include <ctype.h>
#ifndef isblank
#define isblank(c) ((c) == ' ' || (c) == '\t')
#endif
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
#include "linebuffer.h"
#define min(x, y) ((x) < (y) ? (x) : (y))
char *find_field ();
int different ();
void check_file ();
void error ();
void usage ();
void writeline ();
/* Number of fields to skip on each line when doing comparisons. */
int skip_fields;
/* Number of chars to skip after skipping any fields. */
int skip_chars;
/* Number of chars to compare; if 0, compare the whole lines. */
int check_chars;
enum countmode
{
count_occurrences, /* -c Print count before output lines. */
count_none /* Default. Do not print counts. */
};
/* Whether and how to precede the output lines with a count of the number of
times they occurred in the input. */
enum countmode countmode;
enum output_mode
{
output_repeated, /* -d Only lines that are repeated. */
output_unique, /* -u Only lines that are not repeated. */
output_all /* Default. Print first copy of each line. */
};
/* Which lines to output. */
enum output_mode mode;
/* The name this program was run with. */
char *program_name;
struct option longopts[] =
{
{"count", 0, NULL, 'c'},
{"repeated", 0, NULL, 'd'},
{"unique", 0, NULL, 'u'},
{"skip-fields", 1, NULL, 'f'},
{"skip-chars", 1, NULL, 's'},
{"check-chars", 1, NULL, 'w'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char *argv[];
{
int optc;
char *infile = "-", *outfile = "-";
program_name = argv[0];
skip_chars = 0;
skip_fields = 0;
check_chars = 0;
mode = output_all;
countmode = count_none;
while ((optc = getopt_long (argc, argv, "0123456789cdf:s:uw:", longopts,
(int *) 0)) != EOF)
{
switch (optc)
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
skip_fields = skip_fields * 10 + optc - '0';
break;
case 'c':
countmode = count_occurrences;
break;
case 'd':
mode = output_repeated;
break;
case 'f': /* Like '-#'. */
skip_fields = atoi (optarg);
break;
case 's': /* Like '+#'. */
skip_chars = atoi (optarg);
break;
case 'u':
mode = output_unique;
break;
case 'w':
check_chars = atoi (optarg);
break;
default:
usage ();
}
}
while (optind < argc && argv[optind][0] == '+')
skip_chars = atoi (argv[optind++]);
if (optind < argc)
infile = argv[optind++];
if (optind < argc)
outfile = argv[optind++];
if (optind < argc)
usage (); /* Extra arguments. */
check_file (infile, outfile);
exit (0);
}
/* Process input file INFILE with output to OUTFILE.
If either is "-", use the standard I/O stream for it instead. */
void
check_file (infile, outfile)
char *infile, *outfile;
{
FILE *istream;
FILE *ostream;
struct linebuffer lb1, lb2;
struct linebuffer *thisline, *prevline, *exch;
char *prevfield, *thisfield;
int prevlen, thislen;
int match_count = 0;
if (!strcmp (infile, "-"))
istream = stdin;
else
istream = fopen (infile, "r");
if (istream == NULL)
error (1, errno, "%s", infile);
if (!strcmp (outfile, "-"))
ostream = stdout;
else
ostream = fopen (outfile, "w");
if (ostream == NULL)
error (1, errno, "%s", outfile);
thisline = &lb1;
prevline = &lb2;
initbuffer (thisline);
initbuffer (prevline);
if (readline (prevline, istream) == 0)
goto closefiles;
prevfield = find_field (prevline);
prevlen = prevline->length - (prevfield - prevline->buffer);
while (!feof (istream))
{
if (readline (thisline, istream) == 0)
break;
thisfield = find_field (thisline);
thislen = thisline->length - (thisfield - thisline->buffer);
if (!different (thisfield, prevfield, thislen, prevlen))
match_count++;
else
{
writeline (prevline, ostream, match_count);
match_count = 0;
exch = prevline;
prevline = thisline;
thisline = exch;
prevfield = thisfield;
prevlen = thislen;
}
}
writeline (prevline, ostream, match_count);
closefiles:
if (ferror (istream) || fclose (istream) == EOF)
error (1, errno, "error reading %s", infile);
if (ferror (ostream) || fclose (ostream) == EOF)
error (1, errno, "error writing %s", outfile);
free (lb1.buffer);
free (lb2.buffer);
}
/* Given a linebuffer LINE,
return a pointer to the beginning of the line's field to be compared. */
char *
find_field (line)
struct linebuffer *line;
{
register int count;
register char *lp = line->buffer;
register int size = line->length;
register int i = 0;
for (count = 0; count < skip_fields && i < size; count++)
{
while (i < size && isblank (lp[i]))
i++;
while (i < size && !isblank (lp[i]))
i++;
}
for (count = 0; count < skip_chars && i < size; count++)
i++;
return lp + i;
}
/* Return zero if two strings OLD and NEW match, nonzero if not.
OLD and NEW point not to the beginnings of the lines
but rather to the beginnings of the fields to compare.
OLDLEN and NEWLEN are their lengths. */
int
different (old, new, oldlen, newlen)
char *old;
char *new;
int oldlen;
int newlen;
{
register int order;
if (check_chars)
{
if (oldlen > check_chars)
oldlen = check_chars;
if (newlen > check_chars)
newlen = check_chars;
}
order = memcmp (old, new, min (oldlen, newlen));
if (order == 0)
return oldlen - newlen;
return order;
}
/* Output the line in linebuffer LINE to stream STREAM
provided that the switches say it should be output.
If requested, print the number of times it occurred, as well;
LINECOUNT + 1 is the number of times that the line occurred. */
void
writeline (line, stream, linecount)
struct linebuffer *line;
FILE *stream;
int linecount;
{
if ((mode == output_unique && linecount != 0)
|| (mode == output_repeated && linecount == 0))
return;
if (countmode == count_occurrences)
fprintf (stream, "%7d\t", linecount + 1);
fwrite (line->buffer, sizeof (char), line->length, stream);
putc ('\n', stream);
}
void
usage ()
{
fprintf (stderr, "\
Usage: %s [-cdu] [-f skip-fields] [-s skip-chars] [-w check-chars]\n\
[-#skip-fields] [+#skip-chars] [--count] [--repeated] [--unique]\n\
[--skip-fields=skip-fields] [--skip-chars=skip-chars]\n\
[--check-chars=check-chars] [infile] [outfile]\n",
program_name);
exit (1);
}

231
src/wc.c Normal file
View File

@@ -0,0 +1,231 @@
/* wc - print the number of bytes, words, and lines in files
Copyright (C) 1985, 1991 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written by Paul Rubin, phr@ocf.berkeley.edu
and David MacKenzie, djm@gnu.ai.mit.edu. */
#include <stdio.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
/* Size of atomic reads. */
#define BUFFER_SIZE (16 * 1024)
void error ();
void wc ();
void wc_file ();
void write_counts ();
/* Cumulative number of lines, words, and chars in all files so far. */
unsigned long total_lines, total_words, total_chars;
/* Which counts to print. */
int print_lines, print_words, print_chars;
/* Nonzero if we have ever read the standard input. */
int have_read_stdin;
/* The name this program was run with. */
char *program_name;
/* The error code to return to the system. */
int exit_status;
struct option longopts[] =
{
{"bytes", 0, NULL, 'c'},
{"chars", 0, NULL, 'c'},
{"lines", 0, NULL, 'l'},
{"words", 0, NULL, 'w'},
{NULL, 0, NULL, 0}
};
void
main (argc, argv)
int argc;
char **argv;
{
int optc;
int nfiles;
program_name = argv[0];
exit_status = 0;
print_lines = print_words = print_chars = 0;
total_lines = total_words = total_chars = 0;
while ((optc = getopt_long (argc, argv, "clw", longopts, (int *) 0)) != EOF)
switch (optc)
{
case 'c':
print_chars = 1;
break;
case 'l':
print_lines = 1;
break;
case 'w':
print_words = 1;
break;
default:
fprintf (stderr, "\
Usage: %s [-clw] [--bytes] [--chars] [--lines] [--words] [file...]\n", argv[0]);
exit (1);
}
if (print_lines + print_words + print_chars == 0)
print_lines = print_words = print_chars = 1;
nfiles = argc - optind;
if (nfiles == 0)
{
have_read_stdin = 1;
wc (0, "");
}
else
{
for (; optind < argc; ++optind)
wc_file (argv[optind]);
if (nfiles > 1)
write_counts (total_lines, total_words, total_chars, "total");
}
if (have_read_stdin && close (0))
error (1, errno, "-");
exit (exit_status);
}
void
wc_file (file)
char *file;
{
if (!strcmp (file, "-"))
{
have_read_stdin = 1;
wc (0, file);
}
else
{
int fd = open (file, O_RDONLY);
if (fd == -1)
{
error (0, errno, "%s", file);
exit_status = 1;
return;
}
wc (fd, file);
if (close (fd))
{
error (0, errno, "%s", file);
exit_status = 1;
}
}
}
void
wc (fd, file)
int fd;
char *file;
{
char buf[BUFFER_SIZE];
register int bytes_read;
register int in_word = 0;
register unsigned long lines, words, chars;
struct stat stats;
lines = words = chars = 0;
if (print_chars && !print_words && !print_lines
&& fstat (fd, &stats) == 0 && S_ISREG (stats.st_mode))
{
chars = stats.st_size;
}
else
{
while ((bytes_read = read (fd, buf, BUFFER_SIZE)) > 0)
{
register char *p = buf;
chars += bytes_read;
do
{
switch (*p++)
{
case '\n':
lines++;
/* Fall through. */
case '\r':
case '\f':
case '\t':
case '\v':
case ' ':
if (in_word)
{
in_word = 0;
words++;
}
break;
default:
in_word = 1;
break;
}
}
while (--bytes_read);
}
if (bytes_read < 0)
{
error (0, errno, "%s", file);
exit_status = 1;
}
if (in_word)
words++;
}
write_counts (lines, words, chars, file);
total_lines += lines;
total_words += words;
total_chars += chars;
}
void
write_counts (lc, wc, cc, file)
unsigned long lc, wc, cc;
char *file;
{
if (print_lines)
printf ("%7lu", lc);
if (print_words)
{
if (print_lines)
putchar (' ');
printf ("%7lu", wc);
}
if (print_chars)
{
if (print_lines || print_words)
putchar (' ');
printf ("%7lu", cc);
}
if (*file)
printf (" %s", file);
putchar ('\n');
}