home *** CD-ROM | disk | FTP | other *** search
- Path: xanth!mcnc!gatech!bloom-beacon!husc6!spdcc!ima!necntc!ncoast!allbery
- From: paulsc@radio_flyer.gwd.tek.com (Paul Scherf)
- Newsgroups: comp.sources.misc
- Subject: v03i084: .newsrc shrinking/sorting programs
- Keywords: .newsrc, sort, shrink
- Message-ID: <10166@tekecs.TEK.COM>
- Date: 14 Jul 88 23:36:36 GMT
- Sender: allbery@ncoast.UUCP
- Reply-To: paulsc@radio_flyer.gwd.tek.com (Paul Scherf)
- Organization: Tektronix, Inc., Wilsonville, OR
- Lines: 426
- Approved: allbery@ncoast.UUCP
-
- Posting-number: Volume 3, Issue 84
- Submitted-by: "Paul Scherf" <paulsc@radio_flyer.gwd.tek.com>
- Archive-name: sort-.newsrc
-
- Did I post this correctly?
- [Yup. ++bsa]
-
- #!/bin/sh
- # run this as a Bourne shell script
- # to extract the files archived here
- echo x - README
- sed -e 's/^!!/!/' > README <<'!Funky!Stuff!'
- Once upon a time, I wrote a sed command to mark all the skipped
- articles (in unsubscribed to groups), as if I had read them.
-
- Another day (about the time all the news group names changed), I
- started telling rn just to add new groups at the end of my .newsrc.
- The idea was that I would edit my .newsrc later.
-
- Yet another day, I realized that I didn't want to edit my 463 line
- .newsrc file, to make sure a couple new groups were in the "right"
- place. So I whipped together a program that would "sort" my .newsrc
- the way I like it.
-
- Today, I wondered if anyone else might be interested in having these
- programs for their own use, hacking, lint testing, bird cage lining (-:
- or other amusement.
-
- You probably have a different set of regional "distributions"
- (categories), or you just like them in a different order, so you will
- probably want to edit the categories[] table to suit your tastes and
- locale. You probably have different preferences on how to order the
- groups, so you probably want to edit the rest of the program. I'm
- posting this program mostly to publish the idea of sorting (or
- shrinking) your .newsrc file as a convenience (or a way to waste more
- disk space by storing this program, than is saved by using this program
- (-: ).
-
- Anyone who wants to can copy this program and change it any way they
- wish. (How am I going to stop you?) I don't think anyone would buy a
- program as trivial as this, so I don't care, if you try to sell it. If
- you manage to get someone to buy this from you, I would find it
- amusing.
-
- If you come up with an interesting variation, I am interested in
- hearing about it.
-
- I have run this only on 4.2BSD-based machines, so there is probably
- some machine somewhere out there where it won't run unchanged. There
- may well be machines where it doesn't work right. I haven't run it
- through lint for a long time, so you might get a bunch of complaints
- from lint. This program works fine for me, but your mileage may vary.
- Use this program at your own peril.
-
- Paul Scherf, Tektronix, Box 1000, MS 61-033, Wilsonville, OR, USA
- paulsc@orca.GWD.Tek.COM tektronix!orca!paulsc
- !Funky!Stuff!
- echo x - shrink.newsrc
- sed -e 's/^!!/!/' > shrink.newsrc <<'!Funky!Stuff!'
- #!/bin/sh
- # Usage: shrink.newsrc
- #
- # Shrink out all the commas in unsubscribed to groups.
- # (i.e. mark skipped articles as read)
- # Sort the groups into the desired order.
- #
- # It would have been nice to use a pipe from the sed part to the
- # sort.newsrc part, but sh, on the machines I run this on at least, only
- # yields the exit status of the last program on the pipeline. I want to
- # make sure every stage of the pipeline succeeds.
-
- sed < $HOME/.newsrc 's/^\([^!]*![^,-]*\).*[,-]\(.*\)/\1-\2/' > /tmp/newsrc$$ &&
- mv /tmp/newsrc$$ $HOME/.newsrc &&
- sort.newsrc < $HOME/.newsrc > /tmp/newsrc$$ &&
- mv /tmp/newsrc$$ $HOME/.newsrc
- !Funky!Stuff!
- echo x - sort.newsrc.c
- sed -e 's/^!!/!/' > sort.newsrc.c <<'!Funky!Stuff!'
- /* re-order the .newsrc according to my likes */
-
- #include <stdio.h>
- #include <strings.h>
-
- /*
- * misc utility routines
- */
-
- /* like malloc, except emalloc() will abort instead of returning NULL */
- static char *emalloc(len)
- int len; /* number of bytes to allocate */
- {
- char *malloc();
- char *new = malloc(len);
-
- if (!new) {
- fprintf(stderr, "Out of memory\n");
- exit(1);
- }
-
- return new;
- }
-
- /* return a copy of string str */
- static char *stralloc(str)
- char *str; /* string to copy */
- {
- return strcat(emalloc(strlen(str) + 1), str);
- }
-
- /* like strncmp, except the lengths of both strings are supplied */
- static int mystrncmp(str1, str1len, str2, str2len)
- char *str1; /* first string */
- int str1len; /* str1 length */
- char *str2; /* second string */
- int str2len; /* str2 length */
- {
- if (str1len < str2len) {
- int tmp = strncmp(str1, str2, str1len);
-
- if (tmp == 0) /* longest prefix is equal */
- return -1;
- return tmp;
-
- } else if (str1len > str2len) {
- int tmp = strncmp(str1, str2, str2len);
-
- if (tmp == 0) /* longest prefix is equal */
- return 1;
- return tmp;
-
- } /* else str1len == str2len */
-
- return strncmp(str1, str2, str1len);
- }
-
- /*
- * low level group list routines/data
- */
-
- /* return the category index for category at grp and catlen characters long */
- /* return -1 if category is not found */
- static int catindex(grp, catlen)
- char *grp; /* group name string */
- int catlen; /* category name length */
- {
- char **cat; /* loop index */
- static char *categories[] = { /* the category names */
- /* edit to taste and locale */
-
- /* most local to all but most global */
- "gwd",
- "tekwv",
- "tek",
- "pdx",
- "uwcsa",
- "or",
- "pnw",
-
- /* among the most global, */
- /* interesting/important to boring/worthless */
- "news",
- "comp",
- "sci",
- "misc",
- "alt",
- "gnu",
- "rec",
- "soc",
- "talk",
- "control",
- "junk",
- "test",
- };
- /* number of entries in categories[] */
- #define NCATEGORIES (sizeof categories / sizeof categories[0])
-
- for (cat = categories; cat < &categories[NCATEGORIES]; ++cat) {
- if (catlen == strlen(*cat) && strncmp(grp, *cat, catlen) == 0)
- return cat - categories;
- }
- return -1;
- }
-
- /* a group list element */
- struct group { /* a doubly-linked list for easy insertion */
- struct group *next; /* next struct group in list */
- struct group *prev; /* previous struct group in list */
- char *group; /* the original input line */
- int catindex; /* corresponding index into categories */
- int categorylen; /* length of category prefix */
- int grouplen; /* length of group prefix */
- };
-
- struct group *groups = NULL; /* pointer to the head of the group list */
- struct group *end_groups = NULL; /* pointer to the tail of the group list */
-
- /* loop to iterate through the groups, indexed by cur */
- #define FOREACH_GROUP(cur) for (cur = groups; cur; cur = cur->next)
-
- #ifdef DEBUG /* for debugging */
- /* print the data in a struct group */
- static void groupprint(grp)
- struct group *grp; /* pointer to the group to print */
- {
- printf("%x next %x prev %x cat %d catlen %d grplen %d %s", grp,
- grp->next, grp->prev, grp->catindex, grp->categorylen,
- grp->grouplen, grp->group);
- }
-
- /* print all groups */
- static void groupallprint()
- {
- struct group *grp; /* loop index */
-
- FOREACH_GROUP(grp) groupprint(grp);
- }
- #endif /* DEBUG */
-
- /* return a struct group, initialized from line, abort if out of memory */
- static struct group *groupalloc(line)
- char *line; /* the .newsrc line to "parse" */
- {
- register struct group *new; /* pointer to the "parsed" .newsrc line */
-
- /* allocate/initialize a new group entry */
-
- new = (struct group *)emalloc(sizeof(struct group));
- new->group = stralloc(line);
-
- /* find group length (first [:!\n]) */
- new->grouplen = strcspn(line, ":!\n");
-
- /* find category length (first [.:!\n]) */
- new->categorylen = strcspn(line, ".:!\n");
-
- /* error checking */
- if ((new->catindex = catindex(line, new->categorylen)) < 0) {
- /* If this message comes out, edit the categories[] array */
- fprintf(stderr, "Unknown category: %.*s\n",
- new->categorylen, line);
- }
-
- new->next = NULL;
- new->prev = NULL;
-
- return new;
- }
-
- /* insert grp just before cur (at tail, if cur == NULL) */
- static void groupinsert(grp, cur)
- register struct group *grp; /* group to insert */
- register struct group *cur; /* group to be just after grp, ow/ NULL */
- {
- grp->next = cur;
- if (cur) { /* insert just before cur */
- grp->prev = cur->prev;
- if (cur->prev) /* grp is after cur->prev */
- cur->prev->next = grp;
- else /* grp is now the first group */
- groups = grp;
- cur->prev = grp;
-
- } else { /* insert at tail */
- grp->prev = end_groups;
- if (end_groups)
- end_groups->next = grp;
- else /* group list was empty */
- groups = grp; /* grp is now also the first group */
- end_groups = grp;
- }
- }
-
- /* return logical: "Is grp a *.tail group (or the group 'tail')?" */
- static int grptailcmp(grp, tail)
- struct group *grp; /* the group to test */
- char *tail; /* the tail to test for (e.g. "general", "misc") */
- {
- char *tmp = rindex(grp->group, '.'); /* pointer to tail of grp */
- int taillen = strlen(tail); /* length of tail */
-
- if (tmp) /* Is grp a *.tail group? */
- return grp->group + grp->grouplen == ++tmp + taillen
- && strncmp(tmp, tail, taillen) == 0;
- else /* Is grp the group "tail"? */
- return grp->grouplen == taillen
- && strncmp(grp->group, tail, taillen) == 0;
- }
-
- /* like strcmp(), except for checking ordering of group heads (X.*) */
- /* Does not know about categories, or *.general, *.misc, ... */
- static int grpheadcmp(grp1, grp2)
- struct group *grp1; /* first group to compare */
- struct group *grp2; /* second group to compare */
- {
- char *head1tail; /* pointer to "tail" of grp1 */
- char *head2tail; /* pointer to "tail" of grp2 */
-
- #ifdef KNOW_CATEGORIES
- if (grp1->catindex > grp2->catindex) {
- return 1;
- }
- if (grp1->catindex < grp2->catindex) {
- return -1;
- }
- /* hereafter: categories are equal */
- #else /* KNOW_CATEGORIES */
- /* assume categories are equal or don't matter */
- #endif /* KNOW_CATEGORIES */
-
- head1tail = rindex(grp1->group, '.');
- head2tail = rindex(grp2->group, '.');
- if (head1tail == head2tail) /* grp1 is grp2 */
- return 0;
- if (!head1tail) /* only grp1 has no head */
- return -1;
- if (!head2tail) /* only grp2 has no head */
- return 1;
-
- /* finally, the implied comparison */
- return mystrncmp(grp1->group, head1tail - grp1->group,
- grp2->group, head2tail - grp2->group);
- }
-
- /*
- * high level group routines
- */
-
- /* like strcmp(), except it compares "struct group"'s instead of strings */
- static int grpcmp(grp1, grp2)
- struct group *grp1; /* first group to compare */
- struct group *grp2; /* second group to compare */
- {
- /* optimization */
- /* . the categories are in a specified order */
- if (grp2->catindex > grp1->catindex)
- return -1; /* grp1 is before grp2 */
- if (grp2->catindex < grp1->catindex)
- return 1; /* grp1 is after grp2 */
-
- /* hereafter: grp2->catindex == grp1->catindex */
-
- #ifdef PREFIX_GOES_AFTER /* e.g. comp.sources after comp.sources.bugs */
- /* . X is after all other X.* */
- if (grp1->grouplen < grp2->grouplen) {
- /* if grp1 is a prefix of grp2 ... */
- if (grp2->group[grp1->grouplen] == '.' &&
- strncmp(grp1->group, grp2->group, grp1->grouplen) == 0)
- return 1;
-
- /* . X is after all other X.* */
- } else if (grp1->grouplen > grp2->grouplen) {
- /* if grp2 is a prefix of grp1 ... */
- if (grp1->group[grp2->grouplen] == '.' &&
- strncmp(grp1->group, grp2->group, grp2->grouplen) == 0)
- return -1;
- }
- #endif /* PREFIX_GOES_AFTER */
-
- /* . X.general is before all other X* */
- if (grptailcmp(grp1, "general")) {
- if (grpheadcmp(grp1, grp2) == 0)
- return -1; /* grp1 <= grp2 */
-
- /* . X.misc is after all other X* */
- } else if (grptailcmp(grp1, "misc")) {
- if (grpheadcmp(grp1, grp2) == 0)
- return 1; /* grp1 >= grp2 */
- }
-
- /* . X.general is before all other X* */
- if (grptailcmp(grp2, "general")) {
- if (grpheadcmp(grp1, grp2) == 0)
- return 1; /* grp1 >= grp2 */
-
- /* . X.misc is after all other X* */
- } else if (grptailcmp(grp2, "misc")) {
- if (grpheadcmp(grp1, grp2) == 0)
- return -1; /* grp1 <= grp2 */
- }
-
- /* . other groups within one category are sorted as if by "sort" */
- return mystrncmp(grp1->group, grp1->grouplen,
- grp2->group, grp2->grouplen);
- }
-
- /* add line from .newsrc to group list at the proper place */
- static void addgroup(line)
- char *line; /* .newsrc line to insert */
- {
- struct group *new = groupalloc(line); /* "parsed" version of line */
- struct group *cur; /* search loop index */
-
- /* optimization, the list is usually "almost" already sorted */
- if (!end_groups || grpcmp(new, end_groups) > 0) {
- groupinsert(new, (struct group *)NULL);
- return;
- }
-
- /* find insertion point */
- FOREACH_GROUP(cur)
- if (grpcmp(new, cur) < 0) break;
-
- /* insert new just before cur (at tail, if cur == NULL) */
- groupinsert(new, cur);
- }
-
- /* Read a .newsrc from stdin. Write a sorted .newsrc to stdout. */
- main()
- {
- struct group *cur; /* loop index */
- static char line[512]; /* hopefully more than long enough */
-
- /* read in .newsrc, (bubble) sorting on the fly */
- while (fgets(line, sizeof line, stdin))
- addgroup(line);
-
- /* write .newsrc */
- FOREACH_GROUP(cur) printf("%s", cur->group);
-
- exit(0);
- }
- !Funky!Stuff!
- exit 0
-
- Paul Scherf, Tektronix, Box 1000, MS 61-033, Wilsonville, OR, USA
- paulsc@orca.GWD.Tek.COM tektronix!orca!paulsc
-