00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 const char *title
00022 = "readSeq (1Feb93), multi-format molbio sequence reader.\n";
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171 #include <stdio.h>
00172 #include <string.h>
00173 #include <ctype.h>
00174
00175 #include "ureadseq.h"
00176
00177 #pragma segment readseq
00178
00179
00180
00181 static char inputfilestore[256], *inputfile = inputfilestore;
00182
00183 const char *formats[kMaxFormat+1] = {
00184 " 1. IG/Stanford",
00185 " 2. GenBank/GB",
00186 " 3. NBRF",
00187 " 4. EMBL",
00188 " 5. GCG",
00189 " 6. DNAStrider",
00190 " 7. Fitch",
00191 " 8. Pearson/Fasta",
00192 " 9. Zuker (in-only)",
00193 "10. Olsen (in-only)",
00194 "11. Phylip3.2",
00195 "12. Phylip",
00196 "13. Plain/Raw",
00197 "14. PIR/CODATA",
00198 "15. MSF",
00199 "16. ASN.1",
00200 "17. PAUP/NEXUS",
00201 "18. Pretty (out-only)",
00202 "" };
00203
00204 #define kFormCount 30
00205 #define kMaxFormName 15
00206
00207 const struct formatTable {
00208 char *name;
00209 short num;
00210 } formname[] = {
00211 {"ig", kIG},
00212 {"stanford", kIG},
00213 {"genbank", kGenBank},
00214 {"gb", kGenBank},
00215 {"nbrf", kNBRF},
00216 {"embl", kEMBL},
00217 {"gcg", kGCG},
00218 {"uwgcg", kGCG},
00219 {"dnastrider", kStrider},
00220 {"strider", kStrider},
00221 {"fitch", kFitch},
00222 {"pearson", kPearson},
00223 {"fasta", kPearson},
00224 {"zuker", kZuker},
00225 {"olsen", kOlsen},
00226 {"phylip", kPhylip},
00227 {"phylip3.2", kPhylip2},
00228 {"phylip3.3", kPhylip3},
00229 {"phylip3.4", kPhylip4},
00230 {"phylip-interleaved", kPhylip4},
00231 {"phylip-sequential", kPhylip2},
00232 {"plain", kPlain},
00233 {"raw", kPlain},
00234 {"pir", kPIR},
00235 {"codata", kPIR},
00236 {"asn.1", kASN1},
00237 {"msf", kMSF},
00238 {"paup", kPAUP},
00239 {"nexus", kPAUP},
00240 {"pretty", kPretty},
00241 };
00242
00243 const char *kASN1headline = "Bioseq-set ::= {\nseq-set {\n";
00244
00245
00246
00247 const char compl[] = " !\"#$%&'()*+,-./0123456789:;<=>?@TVGHNNCDNNMNKNNYRYSAABWNRN[\\]^_`tvghnncdnnmnknnyrysaabwnrn{|}~";
00248
00249
00250
00251 char *formatstr( short format)
00252 {
00253 if (format < 1 || format > kMaxFormat) {
00254 switch (format) {
00255 case kASNseqentry :
00256 case kASNseqset : return formats[kASN1-1];
00257 case kPhylipInterleave:
00258 case kPhylipSequential: return formats[kPhylip-1];
00259 default: return "(unknown)";
00260 }
00261 }
00262 else return formats[format-1];
00263 }
00264
00265 int parseformat( char *name)
00266 {
00267 #define kDupmatch -2
00268 int namelen, maxlen, i, match, matchat;
00269 char lname[kMaxFormName+1];
00270
00271 skipwhitespace(name);
00272 namelen = strlen(name);
00273 if (namelen == 0)
00274 return kNoformat;
00275 else if (isdigit(*name)) {
00276 i = atol( name);
00277 if (i < kMinFormat | i > kMaxFormat) return kNoformat;
00278 else return i;
00279 }
00280
00281
00282 maxlen = min( kMaxFormName, namelen);
00283 for (i=0; i<maxlen; i++) lname[i] = to_lower(name[i]);
00284 lname[maxlen]=0;
00285 matchat = kNoformat;
00286
00287 for (i=0; i<kFormCount; i++) {
00288 match = strncmp( lname, formname[i].name, maxlen);
00289 if (match == 0) {
00290 if (strlen(formname[i].name) == namelen) return (formname[i].num);
00291 else if (matchat == kNoformat) matchat = i;
00292 else matchat = kDupmatch;
00293 }
00294 }
00295 if (matchat == kNoformat || matchat == kDupmatch)
00296 return kNoformat;
00297 else
00298 return formname[matchat].num;
00299 }
00300
00301
00302
00303 static void dumpSeqList(char *list, short format)
00304 {
00305 long i, l, listlen;
00306 char s[256];
00307
00308 listlen = strlen(list);
00309 printf("Sequences in %s (format is %s)\n", inputfile, formatstr(format));
00310 for (i=0, l=0; i < listlen; i++) {
00311 if (list[i] == (char)NEWLINE) {
00312 s[l] = '\0'; l = 0;
00313 puts(s);
00314 }
00315 else if (l < 255)
00316 s[l++] = list[i];
00317 }
00318 putchar('\n');
00319 }
00320
00321
00322
00323 void usage()
00324 {
00325 short i, midi;
00326
00327 fprintf(stderr,title);
00328 fprintf(stderr,
00329 "usage: readseq [-options] in.seq > out.seq\n");
00330 fprintf(stderr," options\n");
00331
00332 fprintf(stderr, " -a[ll] select All sequences\n");
00333 fprintf(stderr, " -c[aselower] change to lower case\n");
00334 fprintf(stderr, " -C[ASEUPPER] change to UPPER CASE\n");
00335 fprintf(stderr, " -degap[=-] remove gap symbols\n");
00336 fprintf(stderr, " -i[tem=2,3,4] select Item number(s) from several\n");
00337 fprintf(stderr, " -l[ist] List sequences only\n");
00338 fprintf(stderr, " -o[utput=]out.seq redirect Output\n");
00339 fprintf(stderr, " -p[ipe] Pipe (command line, <stdin, >stdout)\n");
00340 fprintf(stderr, " -r[everse] change to Reverse-complement\n");
00341 fprintf(stderr, " -v[erbose] Verbose progress\n");
00342 fprintf(stderr, " -f[ormat=]# Format number for output, or\n");
00343 fprintf(stderr, " -f[ormat=]Name Format name for output:\n");
00344 midi = (kMaxFormat+1) / 2;
00345 for (i = kMinFormat-1; i < midi; i++)
00346 fprintf( stderr, " %-20s %-20s\n",
00347 formats[i], formats[midi+i]);
00348
00349
00350 fprintf(stderr, " \n");
00351 fprintf(stderr, " Pretty format options: \n");
00352 fprintf(stderr, " -wid[th]=# sequence line width\n");
00353 fprintf(stderr, " -tab=# left indent\n");
00354 fprintf(stderr, " -col[space]=# column space within sequence line on output\n");
00355 fprintf(stderr, " -gap[count] count gap chars in sequence numbers\n");
00356 fprintf(stderr, " -nameleft, -nameright[=#] name on left/right side [=max width]\n");
00357 fprintf(stderr, " -nametop name at top/bottom\n");
00358 fprintf(stderr, " -numleft, -numright seq index on left/right side\n");
00359 fprintf(stderr, " -numtop, -numbot index on top/bottom\n");
00360 fprintf(stderr, " -match[=.] use match base for 2..n species\n");
00361 fprintf(stderr, " -inter[line=#] blank line(s) between sequence blocks\n");
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372 }
00373
00374 void erralert(short err)
00375 {
00376 switch (err) {
00377 case 0 :
00378 break;
00379 case eFileNotFound: fprintf(stderr, "File not found: %s\n", inputfile);
00380 break;
00381 case eFileCreate: fprintf(stderr, "Can't open output file.\n");
00382 break;
00383 case eASNerr: fprintf(stderr, "Error in ASN.1 sequence routines.\n");
00384 break;
00385 case eNoData: fprintf(stderr, "No data in file.\n");
00386 break;
00387 case eItemNotFound: fprintf(stderr, "Specified item not in file.\n");
00388 break;
00389 case eUnequalSize: fprintf(stderr,
00390 "This format requires equal length sequences.\nSequence truncated or padded to fit.\n");
00391 break;
00392 case eUnknownFormat: fprintf(stderr, "Error: this format is unknown to me.\n");
00393 break;
00394 case eOneFormat: fprintf(stderr,
00395 "Warning: This format permits only 1 sequence per file.\n");
00396 break;
00397 case eMemFull: fprintf(stderr, "Out of storage memory. Sequence truncated.\n");
00398 break;
00399 default: fprintf(stderr, "readSeq error = %d\n", err);
00400 break;
00401 }
00402 }
00403
00404
00405 int chooseFormat( boolean quietly)
00406 {
00407 char sform[128];
00408 int midi, i, outform;
00409
00410 if (quietly)
00411 return kPearson;
00412 else {
00413 midi = (kMaxFormat+1) / 2;
00414 for (i = kMinFormat-1; i < midi; i++)
00415 fprintf( stderr, " %-20s %-20s\n",
00416 formats[i], formats[midi+i]);
00417 fprintf(stderr,"\nChoose an output format (name or #): \n");
00418 gets(sform);
00419 outform = parseformat(sform);
00420 if (outform == kNoformat) outform = kPearson;
00421 return outform;
00422 }
00423 }
00424
00425
00426
00427
00428
00429 boolean checkopt( boolean casesense, char *sopt, const char *smatch, short minword)
00430 {
00431 long lenopt, lenmatch;
00432 boolean result;
00433 short minmaxw;
00434
00435 lenopt = strlen(sopt);
00436 lenmatch= strlen(smatch);
00437 minmaxw= max(minword, min(lenopt, lenmatch));
00438
00439 if (casesense)
00440 result= (!strncmp( sopt, smatch, minmaxw));
00441 else
00442 result= (!Strncasecmp( sopt, smatch, minmaxw ));
00443
00444
00445
00446 return result;
00447 }
00448
00449
00450 #define kMaxwhichlist 50
00451
00452
00453 boolean chooseall = false, quietly = false, gotinputfile = false,
00454 listonly = false, closeout = false, verbose = false,
00455 manyout = false, dolower = false, doupper = false, doreverse= false,
00456 askout = true, dopipe= false, interleaved = false;
00457 short nfile = 0, iwhichlist=0, nwhichlist = 0;
00458 short whichlist[kMaxwhichlist+1];
00459 long whichSeq = 0, outform = kNoformat;
00460 char onamestore[128], *oname = onamestore;
00461 FILE *foo = NULL;
00462
00463 void resetGlobals()
00464
00465
00466 {
00467 chooseall = false; quietly = false; gotinputfile = false;
00468 listonly = false; closeout = false; verbose = false;
00469 manyout = false; dolower = false; doupper = false; doreverse= false;
00470 askout = true; dopipe= false; interleaved = false;
00471 nfile = 0; iwhichlist=0; nwhichlist = 0;
00472 whichSeq = 0; outform = kNoformat;
00473 oname = onamestore;
00474 foo = NULL;
00475
00476 gPrettyInit(gPretty);
00477 }
00478
00479
00480 #define kOptOkay 1
00481 #define kOptNone 0
00482
00483 int readopt( char *sopt)
00484 {
00485 char sparamstore[256], *sparam= sparamstore;
00486 short n, slen= strlen(sopt);
00487
00488
00489
00490 if (*sopt == '?') {
00491 usage();
00492 return kOptNone;
00493 }
00494
00495 else if (*sopt == '-') {
00496
00497 char *cp= strchr(sopt,'=');
00498 *sparam= '\0';
00499 if (cp) {
00500 strcpy(sparam, cp+1);
00501 *cp= 0;
00502 }
00503
00504 if (checkopt( false, sopt, "-help", 2)) {
00505 usage();
00506 return kOptNone;
00507 }
00508
00509 if (checkopt( false, sopt, "-all", 2)) {
00510 whichSeq= 1; chooseall= true;
00511 return kOptOkay;
00512 }
00513
00514 if (checkopt( false, sopt, "-colspace", 4)) {
00515 n= atoi( sparam);
00516 gPretty.spacer = n;
00517 return kOptOkay;
00518 }
00519
00520 if (checkopt( true, sopt, "-caselower", 2)) {
00521 dolower= true;
00522 return kOptOkay;
00523 }
00524 if (checkopt( true, sopt, "-CASEUPPER", 2)) {
00525 doupper= true;
00526 return kOptOkay;
00527 }
00528
00529 if (checkopt( false, sopt, "-pipe", 2)) {
00530 dopipe= true; askout= false;
00531 return kOptOkay;
00532 }
00533
00534 if (checkopt( false, sopt, "-list", 2)) {
00535 listonly = true; askout = false;
00536 return kOptOkay;
00537 }
00538
00539 if (checkopt( false, sopt, "-reverse", 2)) {
00540 doreverse = true;
00541 return kOptOkay;
00542 }
00543
00544 if (checkopt( false, sopt, "-verbose", 2)) {
00545 verbose = true;
00546 return kOptOkay;
00547 }
00548
00549 if (checkopt( false, sopt, "-match", 5)) {
00550 gPretty.domatch= true;
00551 if (*sparam >= ' ') gPretty.matchchar= *sparam;
00552 return kOptOkay;
00553 }
00554 if (checkopt( false, sopt, "-degap", 4)) {
00555 gPretty.degap= true;
00556 if (*sparam >= ' ') gPretty.gapchar= *sparam;
00557 return kOptOkay;
00558 }
00559
00560 if (checkopt( false, sopt, "-interline", 4)) {
00561 gPretty.interline= atoi( sparam);
00562 return kOptOkay;
00563 }
00564
00565 if (checkopt( false, sopt, "-item", 2)) {
00566 char *cp = sparam;
00567 nwhichlist= 0;
00568 whichlist[0]= 0;
00569 if (*cp == 0) cp= sopt+2;
00570 do {
00571 while (*cp!=0 && !isdigit(*cp)) cp++;
00572 if (*cp!=0) {
00573 n = atoi( cp);
00574 whichlist[nwhichlist++]= n;
00575 while (*cp!=0 && isdigit(*cp)) cp++;
00576 }
00577 } while (*cp!=0 && n>0 && nwhichlist<kMaxwhichlist);
00578 whichlist[nwhichlist++]= 0;
00579 whichSeq= max(1,whichlist[0]); iwhichlist= 1;
00580 return kOptOkay;
00581 }
00582
00583 if (checkopt( false, sopt, "-format", 5)) {
00584 if (*sparam==0) { for (sparam= sopt+2; isalpha(*sparam); sparam++) ; }
00585 outform = parseformat( sparam);
00586 return kOptOkay;
00587 }
00588 if (checkopt( false, sopt, "-f", 2)) {
00589 if (*sparam==0) sparam= sopt+2;
00590 outform = parseformat( sparam);
00591 return kOptOkay;
00592 }
00593
00594 if (checkopt( false, sopt, "-output", 3)) {
00595 if (*sparam==0) { for (sparam= sopt+3; isalpha(*sparam); sparam++) ; }
00596 strcpy( oname, sparam);
00597 foo = fopen( oname, "w");
00598 if (!foo) { erralert(eFileCreate); return eFileCreate; }
00599 closeout = true;
00600 askout = false;
00601 return kOptOkay;
00602 }
00603 if (checkopt( false, sopt, "-o", 2)) {
00604 if (*sparam==0) sparam= sopt+2;
00605 strcpy( oname, sparam);
00606 foo = fopen( oname, "w");
00607 if (!foo) { erralert(eFileCreate); return eFileCreate; }
00608 closeout = true;
00609 askout = false;
00610 return kOptOkay;
00611 }
00612
00613 if (checkopt( false, sopt, "-width", 2)) {
00614 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
00615 n= atoi( sparam);
00616 if (n>0) gPretty.seqwidth = n;
00617 return kOptOkay;
00618 }
00619
00620 if (checkopt( false, sopt, "-tab", 4)) {
00621 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
00622 n= atoi( sparam);
00623 gPretty.tab = n;
00624 return kOptOkay;
00625 }
00626
00627 if (checkopt( false, sopt, "-gapcount", 4)) {
00628 gPretty.baseonlynum = false;
00629
00630 return kOptOkay;
00631 }
00632 if (checkopt( false, sopt, "-nointerleave", 8)) {
00633 gPretty.noleaves = true;
00634 return kOptOkay;
00635 }
00636
00637 if (checkopt( false, sopt, "-nameleft", 7)) {
00638 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
00639 n= atoi( sparam);
00640 if (n>0 && n<50) gPretty.namewidth = n;
00641 gPretty.nameleft= true;
00642 return kOptOkay;
00643 }
00644 if (checkopt( false, sopt, "-nameright", 7)) {
00645 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
00646 n= atoi( sparam);
00647 if (n>0 && n<50) gPretty.namewidth = n;
00648 gPretty.nameright= true;
00649 return kOptOkay;
00650 }
00651 if (checkopt( false, sopt, "-nametop", 6)) {
00652 gPretty.nametop= true;
00653 return kOptOkay;
00654 }
00655
00656 if (checkopt( false, sopt, "-numleft", 6)) {
00657 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
00658 n= atoi( sparam);
00659 if (n>0 && n<50) gPretty.numwidth = n;
00660 gPretty.numleft= true;
00661 return kOptOkay;
00662 }
00663 if (checkopt( false, sopt, "-numright", 6)) {
00664 if (*sparam==0) { for (sparam= sopt+2; !isdigit(*sparam) && *sparam!=0; sparam++) ; }
00665 n= atoi( sparam);
00666 if (n>0 && n<50) gPretty.numwidth = n;
00667 gPretty.numright= true;
00668 return kOptOkay;
00669 }
00670
00671 if (checkopt( false, sopt, "-numtop", 6)) {
00672 gPretty.numtop= true;
00673 return kOptOkay;
00674 }
00675 if (checkopt( false, sopt, "-numbottom", 6)) {
00676 gPretty.numbot= true;
00677 return kOptOkay;
00678 }
00679
00680 else {
00681 usage();
00682 return eOptionBad;
00683 }
00684 }
00685
00686 else {
00687 strcpy( inputfile, sopt);
00688 gotinputfile = (*inputfile != 0);
00689 nfile++;
00690 return kOptOkay;
00691 }
00692
00693
00694 }
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704 #ifdef SIOW
00705 #define Exit(a) return(a)
00706 siow_main( int argc, char *argv[])
00707
00708 #else
00709 #define Exit(a) exit(a)
00710
00711 main( int argc, char *argv[])
00712 #endif
00713 {
00714 boolean closein = false;
00715 short ifile, nseq, atseq, format, err = 0, seqtype = kDNA,
00716 nlines, seqout = 0, phylvers = 2;
00717 long i, skiplines, seqlen, seqlen0;
00718 unsigned long checksum= 0, checkall= 0;
00719 char *seq, *cp, *firstseq = NULL, *seqlist, *progname, tempname[256];
00720 char seqid[256], *seqidptr = seqid;
00721 char stempstore[256], *stemp = stempstore;
00722 FILE *ftmp, *fin, *fout;
00723 long outindexmax= 0, noutindex= 0, *outindex = NULL;
00724
00725 #define exit_main(err) { \
00726 if (closeout) fclose(fout); \
00727 if (closein) fclose(fin); \
00728 if (*tempname!=0) remove(tempname);\
00729 Exit(err); }
00730
00731 #define indexout() if (interleaved) {\
00732 if (noutindex>=outindexmax) {\
00733 outindexmax= noutindex + 20;\
00734 outindex= (long*) realloc(outindex, sizeof(long)*outindexmax);\
00735 if (outindex==NULL) { err= eMemFull; erralert(err); exit_main(err); }\
00736 }\
00737 outindex[noutindex++]= ftell(fout);\
00738 }
00739
00740
00741 resetGlobals();
00742 foo = stdout;
00743 progname = argv[0];
00744 *oname = 0;
00745 *tempname = 0;
00746
00747
00748 for (i=1; i < argc; i++) {
00749 err= readopt( argv[i]);
00750 if (err <= 0) exit_main(err);
00751 }
00752
00753
00754 if (dopipe && !gotinputfile) {
00755 int c;
00756 tmpnam(tempname);
00757 inputfile = tempname;
00758 ftmp = fopen( inputfile, "w");
00759 if (!ftmp) { erralert(eFileCreate); exit_main(eFileCreate); }
00760 while ((c = getc(stdin)) != EOF) fputc(c, ftmp);
00761 fclose(ftmp);
00762 gotinputfile= true;
00763 }
00764
00765 quietly = (dopipe || (gotinputfile && (listonly || whichSeq != 0)));
00766
00767 if (verbose || (!quietly && !gotinputfile)) fprintf( stderr, title);
00768 ifile = 1;
00769
00770
00771 if (askout && !closeout && !quietly) {
00772 askout = false;
00773 fprintf(stderr,"\nName of output file (?=help, defaults to display): \n");
00774 gets(oname= onamestore);
00775 skipwhitespace(oname);
00776 if (*oname == '?') { usage(); exit_main(0); }
00777 else if (*oname != 0) {
00778 closeout = true;
00779 foo = fopen( oname, "w");
00780 if (!foo) { erralert(eFileCreate); exit_main(eFileCreate); }
00781 }
00782 }
00783
00784 fout = foo;
00785 if (outform == kNoformat) outform = chooseFormat(quietly);
00786
00787
00788 switch (outform) {
00789 case kPhylip2:
00790 interleaved= false;
00791 phylvers = 2;
00792 outform = kPhylip;
00793 break;
00794
00795 case kPhylip4:
00796 interleaved= true;
00797 phylvers = 4;
00798 outform = kPhylip;
00799 break;
00800
00801 case kMSF:
00802 case kPAUP:
00803 interleaved= true;
00804 break;
00805
00806 case kPretty:
00807 gPretty.isactive= true;
00808 interleaved= true;
00809 break;
00810
00811 }
00812
00813 if (gPretty.isactive && gPretty.noleaves) interleaved= false;
00814 if (interleaved) {
00815 fout = ftmp = tmpfile();
00816 outindexmax= 30; noutindex= 0;
00817 outindex = (long*) malloc(outindexmax*sizeof(long));
00818 if (outindex==NULL) { err= eMemFull; erralert(err); exit_main(err); }
00819 }
00820
00821
00822 do {
00823
00824 gotinputfile = (*tempname != 0);
00825 while ((ifile < argc) && (!gotinputfile)) {
00826 if (*argv[ifile] != '-') {
00827 strcpy( inputfile, argv[ifile]);
00828 gotinputfile = (*inputfile != 0);
00829 --nfile;
00830 }
00831 ifile++;
00832 }
00833
00834 while (!gotinputfile) {
00835 fprintf(stderr,"\nName an input sequence or -option: \n");
00836 inputfile= inputfilestore;
00837
00838 gets(stemp= stempstore);
00839 if (*stemp==0) goto fini;
00840 stemp= strtok(stempstore, " \n\r\t");
00841 while (stemp) {
00842 err= readopt( stemp);
00843 if (err<0) exit_main(err);
00844 stemp= strtok( NULL, " \n\r\t");
00845 }
00846 }
00847
00848
00849 if (*inputfile == 0) break;
00850
00851 format = seqFileFormat( inputfile, &skiplines, &err);
00852
00853 if (err == 0) {
00854 #ifdef NCBI
00855 if (format == kASNseqentry || format == kASNseqset)
00856 seqlist = listASNSeqs( inputfile, skiplines, format, &nseq, &err);
00857 else
00858 #endif
00859 seqlist = listSeqs( inputfile, skiplines, format, &nseq, &err);
00860 }
00861
00862 if (err != 0)
00863 erralert(err);
00864
00865 else if (listonly) {
00866 dumpSeqList(seqlist,format);
00867 free( seqlist);
00868 }
00869
00870 else {
00871
00872 if (nseq == 1 || chooseall || (quietly && whichSeq == 0)) {
00873 chooseall= true;
00874 whichSeq = 1;
00875 quietly = true;
00876 }
00877 else if (whichSeq > nseq && quietly) {
00878 erralert(eItemNotFound);
00879 err= eItemNotFound;
00880 }
00881 else if (whichSeq > nseq || !quietly) {
00882 dumpSeqList(seqlist, format);
00883 fprintf(stderr,"\nChoose a sequence (# or All): \n");
00884 gets(stemp= stempstore);
00885 skipwhitespace(stemp);
00886 if (to_lower(*stemp) == 'a') {
00887 chooseall= true;
00888 whichSeq = 1;
00889 quietly = true;
00890
00891 }
00892 else if (isdigit(*stemp)) whichSeq= atol(stemp);
00893 else whichSeq= 1;
00894 }
00895 free( seqlist);
00896
00897 if (false ) {
00898 fin = fopen(inputfile, "r");
00899 closein= true;
00900 }
00901
00902 while (whichSeq > 0 && whichSeq <= nseq) {
00903
00904 manyout = ((chooseall || nwhichlist>1) && nseq > 1
00905 && (outform == kPlain || outform == kGCG));
00906 if (manyout) {
00907 if ( whichSeq == 1 ) erralert(eOneFormat);
00908 else if (closeout) {
00909 sprintf( stemp,"%s_%d", oname, whichSeq);
00910 freopen( stemp, "w", fout);
00911 fprintf( stderr,"Writing sequence %d to file %s\n", whichSeq, stemp);
00912 }
00913 }
00914
00915 if (closein) {
00916
00917
00918
00919 atseq= whichSeq-1;
00920 seqidptr= seqid;
00921 seq = readSeqFp( whichSeq, fin, skiplines, format,
00922 &seqlen, &atseq, &err, seqidptr);
00923 skiplines= 0;
00924 }
00925 else {
00926 atseq= 0;
00927 seqidptr= seqid;
00928 #ifdef NCBI
00929 if (format == kASNseqentry || format == kASNseqset) {
00930 seqidptr= NULL;
00931 seq = readASNSeq( whichSeq, inputfile, skiplines, format,
00932 &seqlen, &atseq, &err, &seqidptr);
00933 }
00934 else
00935 #endif
00936 seq = readSeq( whichSeq, inputfile, skiplines, format,
00937 &seqlen, &atseq, &err, seqidptr);
00938 }
00939
00940
00941 if (gPretty.degap) {
00942 char *newseq;
00943 long newlen;
00944 newseq= compressSeq( gPretty.gapchar, seq, seqlen, &newlen);
00945 if (newseq) {
00946 free(seq); seq= newseq; seqlen= newlen;
00947 }
00948 }
00949
00950 if (outform == kMSF) checksum= GCGchecksum(seq, seqlen, &checkall);
00951 else if (verbose) checksum= seqchecksum(seq, seqlen, &checkall);
00952 if (verbose)
00953 fprintf( stderr, "Sequence %d, length= %d, checksum= %X, format= %s, id= %s\n",
00954 whichSeq, seqlen, checksum, formatstr(format), seqidptr);
00955
00956 if (err != 0) erralert(err);
00957 else {
00958
00959 switch (outform) {
00960 case kPIR:
00961 if (seqout == 0) fprintf( foo,"\\\\\\\n");
00962 break;
00963 case kASN1:
00964 if (seqout == 0) fprintf( foo, kASN1headline);
00965 break;
00966
00967 case kPhylip:
00968 if (seqout == 0) {
00969 if (!interleaved) {
00970 if (chooseall) i= nseq; else i=1;
00971 if (phylvers >= 4) fprintf(foo," %d %d\n", i, seqlen);
00972 else fprintf(foo," %d %d YF\n", i, seqlen);
00973 }
00974 seqlen0 = seqlen;
00975 }
00976 else if (seqlen != seqlen0) {
00977 erralert(eUnequalSize);
00978 if (seqlen < seqlen0) seq = (char *)realloc(seq, seqlen0);
00979 for (i=seqlen; i<seqlen0; i++) seq[i]= gPretty.gapchar;
00980 seqlen = seqlen0;
00981 seq[seqlen] = 0;
00982 }
00983 break;
00984
00985 case kPAUP:
00986 if (seqout == 0) {
00987 seqtype= getseqtype(seq, seqlen);
00988 seqlen0 = seqlen;
00989 }
00990 else if (seqlen != seqlen0) {
00991 erralert(eUnequalSize);
00992 if (seqlen < seqlen0) seq = (char *)realloc(seq, seqlen0);
00993 for (i=seqlen; i<seqlen0; i++) seq[i]= gPretty.gapchar;
00994 seqlen = seqlen0;
00995 seq[seqlen] = 0;
00996 }
00997 break;
00998
00999 }
01000
01001 if (doupper)
01002 for (i = 0; i<seqlen; i++) seq[i] = to_upper(seq[i]);
01003 else if (dolower)
01004 for (i = 0; i<seqlen; i++) seq[i] = to_lower(seq[i]);
01005
01006 if (doreverse) {
01007 long j, k;
01008 char ctemp;
01009 for (j=0, k=seqlen-1; j <= k; j++, k--) {
01010 ctemp = compl[seq[j] - ' '];
01011 seq[j] = compl[seq[k] - ' '];
01012 seq[k] = ctemp;
01013 }
01014 }
01015
01016 if ((gPretty.isactive || outform==kPAUP) && gPretty.domatch && firstseq != NULL) {
01017 for (i=0; i<seqlen; i++)
01018 if (seq[i]==firstseq[i]) seq[i]= gPretty.matchchar;
01019 }
01020
01021
01022 if (gPretty.isactive && gPretty.numtop && seqout == 0) {
01023 gPretty.numline = 1;
01024 indexout();
01025 (void) writeSeq( fout, seq, seqlen, outform, seqidptr);
01026 gPretty.numline = 2;
01027 indexout();
01028 (void) writeSeq( fout, seq, seqlen, outform, seqidptr);
01029 gPretty.numline = 0;
01030 }
01031
01032 indexout();
01033 nlines = writeSeq( fout, seq, seqlen, outform, seqidptr);
01034 seqout++;
01035 }
01036
01037 if ((gPretty.isactive || outform==kPAUP) && gPretty.domatch && firstseq == NULL) {
01038 firstseq= seq;
01039 seq = NULL;
01040 }
01041 else if (seq!=NULL) { free(seq); seq = NULL; }
01042
01043 #ifdef NCBI
01044 if ( (format == kASNseqentry || format == kASNseqset)
01045 && seqidptr && seqidptr!= seqid)
01046 free(seqidptr);
01047 #endif
01048 if (chooseall) whichSeq++;
01049 else if (iwhichlist<nwhichlist) whichSeq= whichlist[iwhichlist++];
01050 else whichSeq= 0;
01051 }
01052 if (closein) { fclose(fin); closein= false; }
01053 }
01054 whichSeq = 0;
01055 } while (nfile > 0 || !quietly);
01056
01057
01058 fini:
01059 if (firstseq) { free(firstseq); firstseq= NULL; }
01060 if (err || listonly) exit_main(err);
01061
01062 if (gPretty.isactive && gPretty.numbot) {
01063 gPretty.numline = 2;
01064 indexout();
01065 (void) writeSeq( fout, seq, seqlen, outform, seqidptr);
01066 gPretty.numline = 1;
01067 indexout();
01068 (void) writeSeq( fout, seq, seqlen, outform, seqidptr);
01069 gPretty.numline = 0;
01070 }
01071
01072 if (outform == kMSF) {
01073 if (*oname) cp= oname; else cp= inputfile;
01074 fprintf(foo,"\n %s MSF: %d Type: N January 01, 1776 12:00 Check: %d ..\n\n",
01075 cp, seqlen, checkall);
01076 }
01077
01078 if (outform == kPAUP) {
01079 fprintf(foo,"#NEXUS\n");
01080 if (*oname) cp= oname; else cp= inputfile;
01081 fprintf(foo,"[%s -- data title]\n\n", cp);
01082
01083 }
01084
01085 if (outform==kPhylip && interleaved) {
01086 if (phylvers >= 4) fprintf(foo," %d %d\n", seqout, seqlen);
01087 else fprintf(foo," %d %d YF\n", seqout, seqlen);
01088 }
01089
01090 if (interleaved) {
01091
01092
01093 short iline, j, leaf, iseq;
01094 char *s = stempstore;
01095
01096 indexout(); noutindex--;
01097
01098 for (leaf=0; leaf<nlines; leaf++) {
01099 if (outform == kMSF && leaf == 1) {
01100 fputs("//\n\n", foo);
01101 }
01102 if (outform == kPAUP && leaf==1) {
01103 switch (seqtype) {
01104 case kDNA : cp= "dna"; break;
01105 case kRNA : cp= "rna"; break;
01106 case kNucleic : cp= "dna"; break;
01107 case kAmino : cp= "protein"; break;
01108 case kOtherSeq: cp= "dna"; break;
01109 }
01110 fprintf(foo,"\nbegin data;\n");
01111 fprintf(foo," dimensions ntax=%d nchar=%d;\n", seqout, seqlen);
01112 fprintf(foo," format datatype=%s interleave missing=%c", cp, gPretty.gapchar);
01113 if (gPretty.domatch) fprintf(foo," matchchar=%c", gPretty.matchchar);
01114 fprintf(foo,";\n matrix\n");
01115 }
01116
01117 for (iseq=0; iseq<noutindex; iseq++) {
01118 fseek(ftmp, outindex[iseq], 0);
01119 for (iline=0; iline<=leaf; iline++)
01120 if (!fgets(s, 256, ftmp)) *s= 0;
01121 if (ftell(ftmp) <= outindex[iseq+1])
01122 fputs( s, foo);
01123 }
01124
01125 for (j=0; j<gPretty.interline; j++)
01126 fputs( "\n", foo);
01127 }
01128 fclose(ftmp);
01129 fout= foo;
01130 }
01131
01132 if (outform == kASN1) fprintf( foo, "} }\n");
01133 if (outform == kPAUP) fprintf( foo,";\n end;\n");
01134
01135 if (outindex != NULL) free(outindex);
01136 exit_main(0);
01137 }
01138
01139