/* gather - collect files for mailing Author: Andy Tanenbaum */ /* It sometimes happens that one needs to mail a large directory full of * files to someone. This program can be used to collect these files into * shar archives, compress and uuencode them. The interesting property that * it has is that it makes sure that none of the archives are too big, and * that no files are split over two archives. * * Syntax: gather [-s source_dir] [-d dest_dir] [-b max_arch_size] [-f file] * * -s source directory (where are the files to be sent) * -d destination dir (where should the archives be put) * -b bytes (maximum size of the archives; default 60K) * -f file (use file_00.uue etc as archive names) * * Examples: * gather # make 60K archives in this dir * gather -d mailings -b 50000 # make 50K archives in mailings * * Note: * The maximum size given by -b (default 60000 bytes) is only an * approximation, since it is hard to tell how big the final file * will be after shar'ing, compressing, and uue'ing. A heuristic * is used. */ #include <sys/types.h> #include <sys/stat.h> #include <dirent.h> #include <fcntl.h> #include <stdio.h> #define DEFAULT 60000 /* default archive size */ #define MAX_DIR_ENT 512 /* how many directory entries allowed */ #define HEAP_SIZE 20000 /* storage size for all file names */ #define NAME_SIZE 4096 /* storage size for current command */ #define BASE_SIZE 7 /* max number of chars in basename */ #define PATH_MAX 512 /* largest path name */ #define NUMERATOR 138L /* heuristic parameter */ #define DENOMINATOR 100L /* heuristic parameter */ char heap[HEAP_SIZE + 2]; /* dir entries stored here */ char names[NAME_SIZE]; /* file name lists constructed here */ char work[NAME_SIZE]; /* scratch buffer */ char base_name[BASE_SIZE + 1]; /* base name to use for the archives */ char target[PATH_MAX]; /* storage for target file names */ struct dir_ent { char *file_name; long file_size; } dir_ent[MAX_DIR_ENT]; long atol(), heuristic(); char *getcwd(); main(argc, argv) int argc; char *argv[]; { /* Parse the command and get ready. */ int i, counter, l, s, nonlocal; char *p, num[3]; struct dirent *d; DIR *dirp; struct stat stbuf; int first; /* first entry not used yet */ int limit; /* number of files in src_dir */ char *src_dir = "."; /* pointer to source directory */ char *dst_dir = "."; /* pointer to destination directory */ char *file = ""; /* name to use */ long max_bytes = DEFAULT; /* max archive size (approx.) */ long cutoff; /* max cumulative input size */ if (argc > 9) usage(); i = 1; while (i < argc) { /* Examine the i-th argument. */ p = argv[i]; if (*p != '-') usage(); switch (*(p + 1)) { case 's': src_dir = argv[i + 1]; break; case 'd': dst_dir = argv[i + 1]; break; case 'f': file = argv[i + 1]; break; case 'b': max_bytes = atol(argv[i + 1]); if (max_bytes <= 0) { fprintf(stderr, "gather: bad -b value\n"); exit(1); } break; default: fprintf(stderr, "gather: unknown flag %s\n", p); exit(1); } i += 2; } /* Determine the basename. */ get_basename(src_dir, file); /* Open the source directory. */ i = 0; p = heap; if ((dirp = opendir(src_dir)) == (DIR *) NULL) { fprintf(stderr, "gather: cannot open %s\n", src_dir); exit(2); } /* Read in all the file names. */ while (1) { d = readdir(dirp); if (d == (struct dirent *) NULL) break; l = strlen(d->d_name); if (p + l >= &heap[HEAP_SIZE] || i >= MAX_DIR_ENT) { fprintf(stderr, "gather: %s is too large\n", src_dir); exit(2); } strcpy(work, src_dir); strcat(work, "/"); strcat(work, d->d_name); stat(work, &stbuf); if ((stbuf.st_mode & S_IFMT) == S_IFDIR) continue; dir_ent[i].file_name = p; strcpy(p, d->d_name); dir_ent[i].file_size = stbuf.st_size; p += l + 1; i++; } limit = i; closedir(dirp); /* Sort the names. */ sort_dir(limit); /* Figure out when to stop reading files. */ cutoff = heuristic(max_bytes); /* Collect files into archives. */ first = 0; counter = 0; while (first < limit) { first = collect(first, limit, cutoff); num[0] = '0' + (counter / 10); num[1] = '0' + (counter % 10); num[2] = 0; /* Construct full path of compressed target. */ target[0] = 0; if (strcmp(dst_dir, ".") != 0) { strcpy(target, dst_dir); strcat(target, "/"); } strcat(target, base_name); strcat(target, "_"); strcat(target, num); strcat(target, ".Z"); /* (cd src; shar file ... | compress -fc) >dir/base.00.Z */ nonlocal = strcmp(src_dir, "."); work[0] = 0; if (nonlocal) { strcat(work, "(cd "); strcat(work, src_dir); strcat(work, "; "); } strcat(work, "shar "); strcat(work, names); strcat(work, " | compress -fc "); if (nonlocal) strcat(work, ")"); strcat(work, " >"); strcat(work, target); s = system(work); if (s < 0) { fprintf(stderr, "gather: shar command failed\n"); exit(2); } /* Uue dir/base.00.Z */ strcpy(work, "uue "); strcat(work, target); strcat(work, "\n"); s = system(work); if (s < 0) { fprintf(stderr, "gather: uue command failed\n"); exit(2); } /* Unlink dir/base.00.Z */ unlink(target); counter++; } } int collect(first, limit, cutoff) int first; int limit; long cutoff; { /* See how many files will fit in an archive. */ int nr_files; long cum_size, size; struct dir_ent *p, *endp; names[0] = 0; p = &dir_ent[first]; endp = &dir_ent[limit]; nr_files = 0; cum_size = 0; while (p < endp) { size = p->file_size; if (size > cutoff) { fprintf(stderr, "gather: %s is too big\n", p->file_name); exit(2); } /* First peek to see if next file fits. If not, maybe some * other file can be used instead. Swap them. */ if (cum_size + size > cutoff) fudge(p, endp, cutoff - cum_size); /* If it fails now, there is no file that will fit. */ size = p->file_size; if (cum_size + size > cutoff) return(p - dir_ent); strcat(names, p->file_name); strcat(names, " "); cum_size += size; p++; } return(p - dir_ent); } long heuristic(m) long m; { /* The basic algorithm is to collect files up to some limit, and put * them in an archive. It is tricky to determine how many files to * collect, because they will be shar'ed, compressed and uue'ed. * Thus we need a heuristic for guessing how to relate the total size * of the input files to the size of the final uue archive. This * heuristic is contained in this procedure. It takes the desired * final size as input and produces the file cutoff as output. */ return((NUMERATOR * m) / DENOMINATOR); } sort_dir(limit) int limit; /* how many entries in dir_ent */ { /* Sort the directory using bubble sort. */ struct dir_ent *p, *q; for (p = &dir_ent[0]; p < &dir_ent[limit - 1]; p++) { for (q = p + 1; q < &dir_ent[limit]; q++) { if (strcmp(p->file_name, q->file_name) > 0) swap(p, q); } } } swap(p, q) struct dir_ent *p, *q; { /* Exchange two entries. */ char *cp; long l; cp = p->file_name; l = p->file_size; p->file_name = q->file_name; p->file_size = q->file_size; q->file_name = cp; q->file_size = l; } fudge(p, endp, size) struct dir_ent *p, *endp; long size; { /* Look for a file that will fit (i.e., <= size). This fudging gives a more * uniform distribution, and reduces the number of files needed. */ register struct dir_ent *q; for (q = p + 1; q < endp; q++) { if (q->file_size <= size) { swap(p, q); return; } } } get_basename(s, file) char *s; char *file; { /* Determine the basename and copy it to base_name. */ int fd, n; char *p, *q; if (*file != 0) p = file; else if (strcmp(s, ".") == 0) { if (getcwd(work, NAME_SIZE) == (char *) NULL) { fprintf(stderr, "gather: could not get name of working dir\n"); exit(2); } p = work; } else { p = s; } q = p + strlen(p) - 1; if (*q == '\n') { *q = 0; q--; } while (1) { if (q < p || *q == '/') break; q--; } strncpy(base_name, q + 1, BASE_SIZE); } usage() { fprintf(stderr, "Usage: gather [-b bytes] [-s src_dir] [-d dst_dir] [-f file]\n"); exit(1); }