/* recut -- reorder, duplicate and remove sections from each line of files * * Like cut(1), but fields may be output multiple times and reordered. * * TODO: Implement -c * * TODO: Add --complement as in cut(1). * * TODO: Localize messages * * TODO: Option to omit delimiter before non-existing fields: Input * "one,two" with "-f 1-3" would not output the non-existing fields 3 * * Author: Bert Bos * Created: 30 Dec 2013 */ #include #include #include #include #include #include #include #include #ifndef VERSION #define VERSION "0.1" #endif #ifndef PACKAGE #define PACKAGE "recut" #endif #define INCR 50 typedef enum _FieldType {NONE, BYTE, CHARACTER, FIELD} FieldType; typedef struct {int len; int *from; int *to;} RangeList; /* usage -- print usage message and exit */ static void usage(void) { fprintf(stderr, "\ Usage: recut -b fields [-o fields] [file...]\n\ recut -c fields [-o fields] [file...]\n\ recut -f fields [-s] [-d delim] [-o fields] [file...]\n\ Try 'man recut' for long options\n"); exit(EX_USAGE); } /* add_to_list -- add a range to a list of ranges */ static void add_to_list(RangeList *list, const int from, const int to) { int newsize; newsize = ((list->len + INCR) / INCR) * INCR; list->from = realloc(list->from, newsize * sizeof(*list->from)); if (!list->from) err(EX_OSERR, NULL); list->from[list->len] = from; list->to = realloc(list->to, newsize * sizeof(*list->to)); if (!list->to) err(EX_OSERR, NULL); list->to[list->len] = to; list->len++; } /* delete_list -- free the memory used by a list */ static void delete_list(RangeList x) { free(x.from); free(x.to); /* x isn't reset to {0,NULL,NULL}, it isn't meant to be reused */ } /* parse_range -- parse a range specification */ static void parse_range(RangeList *result, char **s) { int lo, hi; assert(**s == '-' || ('0' <= **s && **s <= '9')); if (**s == '-') lo = 1; else { lo = (int)strtol(*s, s, 10); if (lo <= 0) errx(EX_USAGE, "Field numbers must be >= 1"); } if (**s != '-') hi = lo; else { (*s)++; if (**s < '0' || '9' < **s) hi = -1; else { hi = (int)strtol(*s, s, 10); if (hi <= 0) errx(EX_USAGE, "Field numbers must be >= 1"); } } add_to_list(result, lo, hi); } /* parse_fields -- parse a field list specification */ static void parse_fields(RangeList *result, char *s) { assert(result); assert(s); if (result->len) usage(); /* Only 1 field list allowed */ while (*s == '-' || ('0' <= *s && *s <= '9')) { parse_range(result, &s); if (*s == ',' || *s == ' ') s++; } if (*s) errx(EX_USAGE, "Fields can contain only 0-9, ' ', '-' and space"); } /* print_bytes -- print the bytes from line in the given order */ static void print_bytes(char *line, size_t len, RangeList fields, char *delim) { char *d = ""; int i, j; /* Fields are 1-based, but the first byte in line is line[0] */ for (i = 0; i < fields.len; i++) if (fields.to[i] == -1) for (j = fields.from[i] - 1; j < len; j++, d = delim) { fputs(d, stdout); putchar(line[j]); } else if (fields.from[i] <= fields.to[i]) for (j = fields.from[i] - 1; j <= fields.to[i] - 1; j++, d = delim) { fputs(d, stdout); if (j < len) putchar(line[j]); } else for (j = fields.from[i] - 1; j >= fields.to[i] - 1; j--, d = delim) { fputs(d, stdout); if (j < len) putchar(line[j]); } putchar('\n'); } /* print_field -- output one range of a line */ static void print_field(const char *line, RangeList ranges, int field, char *pre) { int i; fputs(pre, stdout); field--; /* 1st field is array index 0 */ if (field < ranges.len) for (i = ranges.from[field]; i <= ranges.to[field]; i++) putchar(line[i]); } /* print_fields -- output ranges of the line according to the fields spec */ static void print_fields(const char *line, RangeList ranges, RangeList fields, char *delim) { char *d = ""; /* No delim before first field */ int i, j; for (i = 0; i < fields.len; i++) if (fields.to[i] == -1) /* -1 means: until the end of the line */ for (j = fields.from[i]; j <= ranges.len; j++, d = delim) print_field(line, ranges, j, d); else if (fields.from[i] <= fields.to[i]) for (j = fields.from[i]; j <= fields.to[i]; j++, d = delim) print_field(line, ranges, j, d); else for (j = fields.from[i]; j >= fields.to[i]; j--, d = delim) print_field(line, ranges, j, d); putchar('\n'); } /* parse_line -- find the delimited fields in the line */ static void parse_line(RangeList *ranges, bool *has_delim, char *line, ssize_t len, char *delim) { int delimlen = strlen(delim); char *p, *q; assert(ranges); ranges->len = 0; *has_delim = false; p = line; while ((q = strstr(p, delim))) { add_to_list(ranges, p - line, q - line - 1); p = q + delimlen; *has_delim = true; } add_to_list(ranges, p - line, len - 1); } /* process -- process one file */ static void process_file(const char *name, FieldType type, RangeList fields, char *delim, bool only_delimited, char *output_delim) { RangeList ranges = {0, NULL, NULL}; bool has_delim; char *line = NULL; size_t linesize = 0; ssize_t len; FILE *f; if (strcmp(name, "-") == 0) f = stdin; else if (!(f = fopen(name, "r"))) err(EX_NOINPUT, "%s", name); /* Loop over lines */ while ((len = getline(&line, &linesize, f)) != -1) { if (len > 0 && line[len-1] == '\n') line[--len] = '\0'; /* Remove newline */ if (type == FIELD) { parse_line(&ranges, &has_delim, line, len, delim); if (!only_delimited || has_delim) print_fields(line, ranges, fields, output_delim); } else if (type == BYTE) { print_bytes(line, len, fields, output_delim); } else { assert(type == CHARACTER); errx(EX_SOFTWARE, "-c is not yet implemented"); } } if (ferror(f)) err(EX_IOERR, "%s", name); (void)fclose(f); delete_list(ranges); } /* main -- main body */ int main(int argc, char *argv[]) { struct option long_opts[] = { {"bytes", required_argument, 0, 'b'}, {"characters", required_argument, 0, 'c'}, {"delimiter", required_argument, 0, 'd'}, {"fields", required_argument, 0, 'f'}, {"only-delimited", no_argument, 0, 's'}, {"output-delimiter", required_argument, 0, 'o'}, {"version", no_argument, 0, 'V'}, {0, 0, 0, 0}}; char *delim = "\t", *output_delim = NULL; bool only_delimited = false; RangeList fields = {0, NULL, NULL}; FieldType type = NONE; int c, i; while ((c = getopt_long(argc, argv, "b:c:d:f:so:", long_opts, NULL)) != -1) switch (c) { case 'b': type = BYTE; parse_fields(&fields, optarg); break; case 'c': type = CHARACTER; parse_fields(&fields, optarg); break; case 'd': delim = optarg; break; case 'f': type = FIELD; parse_fields(&fields, optarg); break; case 's': only_delimited = true; break; case 'o': output_delim = optarg; break; case 'V': printf("%s %s\n", PACKAGE, VERSION); exit(0); break; default: usage(); } if (type == NONE) errx(EX_USAGE, "One of '-c', '-b' or '-f' is required"); if (!*delim) errx(EX_USAGE, "Delimiter cannot be empty"); /* Set output_delim to default if not set explicitly */ if (!output_delim) output_delim = type == FIELD ? delim : ""; if (optind == argc) process_file("-", type, fields, delim, only_delimited, output_delim); else for (i = optind; i < argc; i++) process_file(argv[i], type, fields, delim, only_delimited, output_delim); return 0; }