00001 #line 16 "create_gml.lw"
00002 #include<LEDA/graph.h>
00003 #include<LEDA/graph_alg.h>
00004 #include<LEDA/sortseq.h>
00005 #include<LEDA/stack.h>
00006 #include<LEDA/set.h>
00007 #include<LEDA/p_queue.h>
00008 #include<LEDA/d2_dictionary.h>
00009 #include<LEDA/map.h>
00010 #include<LEDA/array2.h>
00011 #include<LEDA/node_map2.h>
00012 #include<LEDA/string.h>
00013
00014 #include<ctype.h>
00015
00016 using namespace LEDA;
00017 using namespace std;
00018
00019 int add = 15;
00020 int lp = 3;
00021 int gp = 8;
00022 int sp = 0;
00023 int egp = 0;
00024
00025 class ABA_CPUTIMER {
00026 float mytime;
00027 float laststart;
00028
00029 public:
00030 ABA_CPUTIMER(void*) {
00031 mytime=0;
00032 }
00033
00034 void start() {
00035 used_time(laststart);
00036 };
00037
00038 void stop() {
00039 laststart=used_time(laststart);
00040 mytime+=laststart;
00041 };
00042
00043 friend ostream& operator<<(ostream& o,const ABA_CPUTIMER& c) {
00044 o<<c.mytime;
00045 return o;
00046 };
00047 };
00048
00049 double maxupperlower=10000;
00050 bool fewvariables=true;
00051 bool allnonpruned=false;
00052 double mylowerbound=0;
00053 bool writeoptvalue=true;
00054 bool readoptvalue=false;
00055 int edgebounce=4;
00056 int gapbounce=3;
00057 double inittolerance=0.98;
00058 double pricingtolerance=0.98;
00059 ABA_CPUTIMER heur(nil);
00060 ABA_CPUTIMER extmixed(nil);
00061 ABA_CPUTIMER extpair(nil);
00062 ABA_CPUTIMER trans(nil);
00063 ABA_CPUTIMER pgbuild(nil);
00064 ABA_CPUTIMER prepro(nil);
00065 ABA_CPUTIMER roottimer(nil);
00066 double rootbound;
00067 int allgvars;
00068 int allavars;
00069 int redavars;
00070
00071 LEDA::string filename;
00072
00073 class gap_edge {
00074 private:
00075 int s1, s2, l1, l2;
00076 double v;
00077
00078 public:
00079 gap_edge(int s1t, int s2t, int l1t, int l2t) :
00080 s1(s1t), s2(s2t), l1(l1t), l2(l2t) {
00081 };
00082
00083 gap_edge() {};
00084
00085 int start_seq() const { return s1; };
00086 int end_seq() const { return s2; };
00087 int start_pos() const { return l1; };
00088 int end_pos() const { return l2; };
00089
00090 double& value() { return v; };
00091
00092 friend ostream& operator<<(ostream& o, const gap_edge&) {
00093 return o; };
00094 friend istream& operator>>(istream& i,const gap_edge&) {
00095 return i; };
00096
00097 bool operator==(const gap_edge& ge) const {
00098 if(ge.start_seq()!=start_seq()) return false;
00099 if(ge.end_seq()!=end_seq()) return false;
00100 if(ge.start_pos()!=start_pos()) return false;
00101 if(ge.end_pos()!=end_pos()) return false;
00102 return true;
00103 };
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114 };
00115
00116 namespace LEDA {
00117 int compare(const gap_edge& g1, const gap_edge& g2) {
00118 if (compare(g1.start_seq(), g2.start_seq())!=0) return compare(g1.start_seq(), g2.start_seq());
00119 if (compare(g1.end_seq(), g2.end_seq())!=0) return compare(g1.end_seq(), g2.end_seq());
00120 if (compare(g1.start_pos(), g2.start_pos())!=0) return compare(g1.start_pos(), g2.start_pos());
00121 return compare(g1.end_pos(), g2.end_pos());
00122 };
00123 };
00124
00125 int Hash(const gap_edge& ge) {
00126 return 50000000*ge.start_seq()+1000000*ge.end_seq()+
00127 1000*ge.start_pos()+ge.end_pos();
00128 };
00129
00130 int ID_Number(const gap_edge& ge) {
00131 return Hash(ge);
00132 }
00133
00134 LEDA::string Name[100];
00135
00136 class node_information {
00137 private:
00138 int p, s;
00139 char c;
00140
00141 public:
00142 node_information(int pt, int st, char ct) :
00143 p(pt), s(st), c(ct) {
00144 };
00145
00146 node_information() {};
00147
00148 int pos() { return p; };
00149 int seq() { return s; };
00150 char letter() { return c; };
00151
00152 friend ostream& operator<<(ostream& o, const node_information& i) {
00153 if(i.p==0) o<<Name[i.s]<<" ";
00154 o<<i.c;
00155 return o; };
00156 friend istream& operator>>(istream& i,const node_information&) {
00157 return i; };
00158
00159 };
00160
00161 edge_array<bool> LPEdge;
00162 set<gap_edge> LPGap;
00163 sortseq<gap_edge, double> GAP_COST;
00164 sortseq<gap_edge, double> UPG;
00165 GRAPH<node_information, two_tuple<int,double> > G;
00166 edge_map<double> UPE(G);
00167 node* Nodes[100];
00168 node_map2<edge> AM(G);
00169 int length[100];
00170 LEDA::string strings[100];
00171
00172 edge AM_(node u, node v) {
00173 if(G[u].seq()>G[v].seq())
00174 return AM(u,v);
00175 else
00176 return AM(v,u);
00177 }
00178
00179 double gap_cost(int k, int l, int i) {
00180 if(k>l) leda_swap(k,l);
00181 if(egp==0) {
00182 if((k==0) || (l==length[i]-1))
00183 return -sp*sqrt((float) l-k+1)-lp*(l-k+1) +add*(l-k+1);
00184 }
00185 return -gp-sp*sqrt((float) l-k+1)-lp*(l-k+1) +add*(l-k+1);
00186 }
00187
00188 double** CM;
00189 map<char,int> CMap;
00190
00191 double align_cost(int p1, int s1, int p2, int s2, const edge_array<two_tuple<int, double> >& EA) {
00192 if(s1<s2) return align_cost(p2,s2,p1,s1, EA);
00193 return EA[AM(Nodes[s1][p1],Nodes[s2][p2])].second();
00194 };
00195
00196 double align_cost(char c1, char c2) {
00197 return CM[CMap[c1]][CMap[c2]]+2*add;
00198 };
00199
00200 double gap_cost(gap_edge ge) {
00201 seq_item s=GAP_COST.lookup(ge);
00202 if(s==nil) {
00203
00204
00205
00206
00207 return gap_cost(ge.end_pos(),ge.start_pos(), ge.end_seq());
00208 }
00209 return GAP_COST[s];
00210 }
00211
00212 bool zero(const double& d) {
00213 return ((d>-0.001) && (d<0.001));
00214 }
00215
00216 bool lzero(const double& d) {
00217 return d<-0.001;
00218 }
00219
00220 bool lezero(const double& d) {
00221 return d<0.001;
00222 }
00223
00224 #line 865 "create_gml.lw"
00225 void make_pairgraph(GRAPH<node_information, double>& G, int i, int j,
00226 GRAPH<edge,double>& H, int lgi, int lgj, edge_array<double>& ew,
00227 node* lowest, node* highest, list<edge>& L, bool border_nodes,
00228 node& u, int wi=-1, int wj=-1)
00229 {
00230
00231
00232 pgbuild.start();
00233
00234 H.clear();
00235
00236 node_array<int> IJ(H, L.size()+2,0);
00237 node_array<int> IJ1(H, L.size()+2,0);
00238 node_array<double> V(H, L.size()+2, 0.0);
00239
00240 edge e;
00241 node w; u=nil;
00242 bool s=false, t=false;
00243 forall(e, L) {
00244 w=H.new_node(e);
00245 if(G[source(e)].seq()==i)
00246 IJ[w]=lgi*G[target(e)].pos()+(lgi-G[source(e)].pos()-1);
00247 else
00248 IJ[w]=lgi*G[source(e)].pos()+(lgi-G[target(e)].pos()-1);
00249 if(IJ[w]==0) s=true;
00250 if(IJ[w]==lgj*lgi-1) t=true;
00251 V[w]=ew[e];
00252 }
00253
00254 if(s==false) {
00255 w=H.new_node(nil);
00256 IJ[w]=0;
00257 V[w]=0;
00258 }
00259
00260 if(t==false) {
00261 w=H.new_node(nil);
00262 IJ[w]=lgj*lgi-1;
00263 V[w]=0;
00264 }
00265
00266 H.sort_nodes(IJ);
00267
00268 for(int k=0; k<lgi; k++) {
00269 lowest[k]=nil;
00270 highest[k]=nil;
00271 }
00272
00273 int i1,j1;
00274 forall_nodes(w, H) {
00275 i1=IJ[w]%lgi; j1=-1;
00276 while(i1>=0) {
00277 if((lowest[lgi-i1-1]!=nil) && (IJ[lowest[lgi-i1-1]]/lgi>j1)) {
00278 H.new_edge(w, lowest[lgi-i1-1], -V[w]);
00279 j1=IJ[lowest[lgi-i1-1]]/lgi;
00280 }
00281 i1--;
00282 }
00283 lowest[lgi-IJ[w]%lgi-1]=w;
00284 if((highest[lgi-IJ[w]%lgi-1]==nil) && (IJ[w]>=0))
00285 highest[lgi-IJ[w]%lgi-1]=w;
00286 if((IJ[w]%lgi <= lgi-wi-1) && (IJ[w]/lgi <= wj)) u=w;
00287 }
00288 if(wi!=-1)
00289 if(IJ[u]!=lgi-wi-1+wj*lgi) {
00290 cout<<"wrong u\n";
00291 cout<<IJ[u]%lgi<<" "<<IJ[u]/lgi<<" "<<wi<<" "<<wj<<endl;
00292 }
00293
00294 pgbuild.stop();
00295 }
00296
00297
00298
00299 #line 638 "create_gml.lw"
00300 double pairwise_align(LEDA::string s1, LEDA::string s2, LEDA::string& s3, LEDA::string& s4, int n1, int n2,
00301 const edge_array<two_tuple<int, double> >& EA, array2<double>& V) {
00302 int i,j,k, l1=s1.length(), l2=s2.length();
00303 array2<double> G(-1,l1,-1,l2), E(-1,l1,-1,l2), F(-1,l1,-1,l2);
00304 array2<int> TE(-1,l1,-1,l2), TF(-1,l1,-1,l2), TVE(-1,l1,-1,l2), TVF(-1,l1,-1,l2);
00305
00306 V(-1,-1)=0;
00307 for(i=0; i<l1; i++) {
00308 F(i,-1)=gap_cost(gap_edge(n2,n1,0,i));
00309 V(i,-1)=F(i,-1);
00310 TF(i,-1)=i+1;
00311 TE(i,-1)=0;
00312 TVE(i,-1)=0;
00313 TVF(i,-1)=i+1;
00314 G(i,-1)=-MAXDOUBLE/2;
00315 E(i,-1)=-MAXDOUBLE/2;
00316 }
00317 for(j=0; j<l2; j++) {
00318 E(-1,j)=gap_cost(gap_edge(n1,n2,0,j));
00319 V(-1,j)=E(-1,j);
00320 TE(-1,j)=j+1;
00321 TF(-1,j)=0;
00322 TVE(-1,j)=j+1;
00323 TVF(-1,j)=0;
00324 G(-1,j)=-MAXDOUBLE/2;
00325 F(-1,j)=-MAXDOUBLE/2;
00326 }
00327
00328 for(i=0; i<l1; i++) {
00329 for(j=0; j<l2; j++) {
00330 G(i,j)=V(i-1,j-1)+align_cost(i,n1,j,n2, EA);
00331 E(i,j)=G(i,j-1)+gap_cost(gap_edge(n1,n2,j,j)); TE(i,j)=1;
00332 for(k=-1; k<j-1; k++) {
00333 if(E(i,j)<G(i,k)+gap_cost(gap_edge(n1,n2,k+1,j))) {
00334 E(i,j)=G(i,k)+gap_cost(gap_edge(n1,n2,k+1,j)); TE(i,j)=j-k;
00335 }
00336 }
00337 F(i,j)=G(i-1,j)+gap_cost(gap_edge(n2,n1,i,i)); TF(i,j)=1;
00338 for(k=-1; k<i-1; k++) {
00339 if(F(i,j)<G(k,j)+gap_cost(gap_edge(n2,n1,k+1,i))) {
00340 F(i,j)=G(k,j)+gap_cost(gap_edge(n2,n1,k+1,i)); TF(i,j)=i-k;
00341 }
00342 }
00343 V(i,j)=G(i,j); TVE(i,j)=0; TVF(i,j)=0;
00344 for(k=-1; k<j; k++) {
00345 if(V(i,j)<F(i,k)+gap_cost(gap_edge(n1,n2,k+1,j))) {
00346 V(i,j)=F(i,k)+gap_cost(gap_edge(n1,n2,k+1,j));
00347 TVF(i,j)=TF(i,k);
00348 TVE(i,j)=j-k;
00349 }
00350 }
00351 if(E(i,j)>V(i,j)) { V(i,j)=E(i,j); TVE(i,j)=TE(i,j); TVF(i,j)=0; }
00352 if(F(i,j)>V(i,j)) { V(i,j)=F(i,j); TVF(i,j)=TF(i,j); TVE(i,j)=0; }
00353
00354 }
00355 }
00356
00357 cout<<V(l1-1,l2-1)<<" pairwise\n";
00358
00359 s3=""; s4="";
00360 i=l1-1; j=l2-1;
00361 int de,df;
00362
00363 double d=0;
00364 while((i>=0) || (j>=0)) {
00365 if(TVE(i,j)>0) {
00366 for(k=0; k<TVE(i,j); k++) {
00367 s3="-"+s3; s4=LEDA::string(s2[j-k])+s4;
00368 }
00369 if(gap_cost(gap_edge(n1,n2,j-k+1,j))>0) cout<<j<<" "<<k<<" pos gap\n";
00370 d+=gap_cost(gap_edge(n1,n2,j-k+1,j));
00371 de=k;
00372 } else de=0;
00373 if(TVF(i,j)>0) {
00374 for(k=0; k<TVF(i,j); k++) {
00375 s3=LEDA::string(s1[i-k])+s3; s4="-"+s4;
00376 }
00377 if(gap_cost(gap_edge(n2,n1,i-k+1, i))>0) cout<<"pos gap2\n";
00378 d+=gap_cost(gap_edge(n2,n1,i-k+1, i));
00379 df=k;
00380 } else df=0;
00381 i-=df; j-=de;
00382 if((i>=0) && (j>=0)) {
00383 s3=LEDA::string(s1[i])+s3; s4=LEDA::string(s2[j])+s4;
00384 d+=align_cost(i,n1,j,n2,EA);
00385 i--; j--;
00386 }
00387 }
00388
00389 cout<<d<<" test value\n";
00390 cout<<s3<<"\n"<<s4<<"\n";
00391
00392 return V(l1-1,l2-1);
00393 };
00394
00395
00396
00397 double pairwise_align(LEDA::string s1, LEDA::string s2, array2<double>& V, int i1, int i2) {
00398 int i,j,k, l1=s1.length(), l2=s2.length();
00399 array2<double> G(-1,l1,-1,l2), E(-1,l1,-1,l2), F(-1,l1,-1,l2);
00400 array2<int> TE(-1,l1,-1,l2), TF(-1,l1,-1,l2), TVE(-1,l1,-1,l2), TVF(-1,l1,-1,l2);
00401
00402 V(-1,-1)=0;
00403 for(i=0; i<l1; i++) {
00404 F(i,-1)=gap_cost(0,i,i1);
00405 V(i,-1)=F(i,-1);
00406 TF(i,-1)=i+1;
00407 TE(i,-1)=0;
00408 TVE(i,-1)=0;
00409 TVF(i,-1)=i+1;
00410 G(i,-1)=-MAXDOUBLE/2;
00411 E(i,-1)=-MAXDOUBLE/2;
00412 }
00413 for(j=0; j<l2; j++) {
00414 E(-1,j)=gap_cost(0,j,i2);
00415 V(-1,j)=E(-1,j);
00416 TE(-1,j)=j+1;
00417 TF(-1,j)=0;
00418 TVE(-1,j)=j+1;
00419 TVF(-1,j)=0;
00420 G(-1,j)=-MAXDOUBLE/2;
00421 F(-1,j)=-MAXDOUBLE/2;
00422 }
00423
00424 for(i=0; i<l1; i++) {
00425 for(j=0; j<l2; j++) {
00426 G(i,j)=V(i-1,j-1)+align_cost(s1[i],s2[j]);
00427 E(i,j)=G(i,j-1)+gap_cost(j,j,i2); TE(i,j)=1;
00428 for(k=-1; k<j-1; k++) {
00429 if(E(i,j)<G(i,k)+gap_cost(j,k+1,i2)) {
00430 E(i,j)=G(i,k)+gap_cost(j,k+1,i2); TE(i,j)=j-k;
00431 }
00432 }
00433 F(i,j)=G(i-1,j)+gap_cost(i,i,i1); TF(i,j)=1;
00434 for(k=-1; k<i-1; k++) {
00435 if(F(i,j)<G(k,j)+gap_cost(i,k+1,i1)) {
00436 F(i,j)=G(k,j)+gap_cost(i,k+1,i1); TF(i,j)=i-k;
00437 }
00438 }
00439 V(i,j)=G(i,j); TVE(i,j)=0; TVF(i,j)=0;
00440 for(k=-1; k<j; k++) {
00441 if(V(i,j)<F(i,k)+gap_cost(j,k+1,i2)) {
00442 V(i,j)=F(i,k)+gap_cost(j,k+1,i2);
00443 TVF(i,j)=TF(i,k);
00444 TVE(i,j)=j-k;
00445 }
00446 }
00447 if(E(i,j)>V(i,j)) { V(i,j)=E(i,j); TVE(i,j)=TE(i,j); TVF(i,j)=0; }
00448 if(F(i,j)>V(i,j)) { V(i,j)=F(i,j); TVF(i,j)=TF(i,j); TVE(i,j)=0; }
00449
00450 }
00451 }
00452
00453 cout<<V(l1-1,l2-1)<<" pairwise\n";
00454
00455 return V(l1-1,l2-1);
00456 };
00457
00458
00459
00460 double pairwise_align_rev(LEDA::string s1, LEDA::string s2, LEDA::string& s3, LEDA::string& s4,
00461 int n1, int n2,
00462 const edge_array<two_tuple<int, double> >& EA, array2<double>& V) {
00463 int i,j,k, l1=s1.length(), l2=s2.length();
00464 array2<double> G(-1,l1,-1,l2), E(-1,l1,-1,l2), F(-1,l1,-1,l2);
00465 array2<int> TE(-1,l1,-1,l2), TF(-1,l1,-1,l2), TVE(-1,l1,-1,l2),
00466 TVF(-1,l1,-1,l2);
00467
00468 V(l1,l2)=0;
00469 for(i=0; i<l1; i++) {
00470 F(i,l2)=gap_cost(gap_edge(n2,n1,i,l1-1));
00471 V(i,l2)=F(i,l2);
00472 TF(i,l2)=l1-i;
00473 TE(i,l2)=0;
00474 TVE(i,l2)=0;
00475 TVF(i,l2)=l1-i;
00476 G(i,l2)=-MAXDOUBLE/2;
00477 E(i,l2)=-MAXDOUBLE/2;
00478 }
00479 for(j=0; j<l2; j++) {
00480 E(l1,j)=gap_cost(gap_edge(n1,n2,j,l2-1));
00481 V(l1,j)=E(l1,j);
00482 TE(l1,j)=l2-j;
00483 TF(l1,j)=0;
00484 TVE(l1,j)=l2-j;
00485 TVF(l1,j)=0;
00486 G(l1,j)=-MAXDOUBLE/2;
00487 F(l1,j)=-MAXDOUBLE/2;
00488 }
00489
00490 for(i=l1-1; i>=0; i--) {
00491 for(j=l2-1; j>=0; j--) {
00492 G(i,j)=V(i+1,j+1)+align_cost(i,n1,j,n2, EA);
00493 E(i,j)=G(i,j+1)+gap_cost(gap_edge(n1,n2,j,j)); TE(i,j)=1;
00494 for(k=j+1; k<=l2; k++) {
00495 if(E(i,j)<G(i,k)+gap_cost(gap_edge(n1,n2,j,k-1))) {
00496 E(i,j)=G(i,k)+gap_cost(gap_edge(n1,n2,j,k-1)); TE(i,j)=k-j;
00497 }
00498 }
00499 F(i,j)=G(i+1,j)+gap_cost(gap_edge(n2,n1,i,i)); TF(i,j)=1;
00500 for(k=i+1; k<=l1; k++) {
00501 if(F(i,j)<G(k,j)+gap_cost(gap_edge(n2,n1,i, k-1))) {
00502 F(i,j)=G(k,j)+gap_cost(gap_edge(n2,n1,i, k-1)); TF(i,j)=k-i;
00503 }
00504 }
00505 V(i,j)=G(i,j); TVE(i,j)=0; TVF(i,j)=0;
00506 for(k=j+1; k<=l2; k++) {
00507 if(V(i,j)<F(i,k)+gap_cost(gap_edge(n1,n2,j, k-1))) {
00508 V(i,j)=F(i,k)+gap_cost(gap_edge(n1,n2,j, k-1));
00509 TVF(i,j)=TF(i,k);
00510 TVE(i,j)=k-j;
00511 }
00512 }
00513 if(E(i,j)>V(i,j)) { V(i,j)=E(i,j); TVE(i,j)=TE(i,j); TVF(i,j)=0; }
00514 if(F(i,j)>V(i,j)) { V(i,j)=F(i,j); TVF(i,j)=TF(i,j); TVE(i,j)=0; }
00515
00516 }
00517 }
00518
00519 cout<<V(0,0)<<" pairwise rev\n";
00520
00521 return V(0,0);
00522 };
00523
00524
00525 #line 1027 "create_gml.lw"
00526 void read_config_file() {
00527 ifstream is("config");
00528 LEDA::string s;
00529
00530 while(!is.eof()) {
00531 is>>s;
00532 if(s=="FewVariables") is>>fewvariables;
00533 if(s=="AllNonPruned") is>>allnonpruned;
00534 if(s=="ReadLowerBound") is>>readoptvalue;
00535 if(s=="WriteOptValue") is>>writeoptvalue;
00536 if(s=="EdgeBounce") is>>edgebounce;
00537 if(s=="GapBounce") is>>gapbounce;
00538 if(s=="PricingTolerance") is>>pricingtolerance;
00539 if(s=="InitTolerance") is>>inittolerance;
00540 if(s=="MaxUpperLower") is>>maxupperlower;
00541 if(s=="GlobalPart") is>>gp;
00542 if(s=="LinearPart") is>>lp;
00543 if(s=="SquareRootPart") is>>sp;
00544 if(s=="EndGapPenality") is>>egp;
00545 if(s=="Shift") is>>add;
00546
00547 }
00548
00549 cout<<"\nConfiguration\n===============\n";
00550 cout<<"\nFewVariables: "<<fewvariables;
00551 cout<<"\nAllNonPruned: "<<allnonpruned;
00552 cout<<"\nReadLowerBound: "<<readoptvalue;
00553 cout<<"\nWriteOptValue: "<<writeoptvalue;
00554 cout<<"\nEdgeBounce: "<<edgebounce;
00555 cout<<"\nGapBounce: "<<gapbounce;
00556 cout<<"\nPricingTolerance: "<<pricingtolerance;
00557 cout<<"\nInitTolerance: "<<inittolerance<<"\n\n";
00558 }
00559
00560 #line 244 "create_gml.lw"
00561 int main(int argc, char* argv[]) {
00562
00563 rootbound=-1;
00564 allgvars=0;
00565 allavars=0;
00566 redavars=0;
00567 prepro.start();
00568
00569 if(argc<2) {
00570 cout<<"Usage: Infile(fasta) Outfile a b c (in a+b*l+c*sqrt(l))\n\n";
00571 return 1;
00572 }
00573
00574 if(argc>2) {
00575 gp=atoi(argv[3]);
00576 lp=atoi(argv[4]);
00577 sp=atoi(argv[5]);
00578
00579 add=gp+lp+sp;
00580 cout<<gp<<" "<<lp<<" "<<sp<<" "<<egp<<" "<<add<<" score data\n";
00581 }
00582
00583 read_config_file();
00584
00585 if (readoptvalue) {
00586 LEDA::string s=argv[1];
00587 s+=".opt";
00588 ifstream ifstr(s);
00589 ifstr>>mylowerbound;
00590 mylowerbound=mylowerbound*0.9999999;
00591 cout<<mylowerbound<<" given lower bound\n\n";
00592 }
00593
00594 int sequences;
00595
00596
00597 #line 357 "create_gml.lw"
00598 ifstream is(argv[1]);
00599
00600 sequences=-1;
00601
00602 LEDA::string s;
00603 while (!is.eof()) {
00604 s=read_line(is);
00605 if(s[0]=='>') {
00606 sequences++;
00607 Name[sequences]=s.del(0);
00608 int k=0;
00609 while((k<Name[sequences].length()) && (Name[sequences][k]!=','))
00610 k++;
00611 Name[sequences]=Name[sequences].del(k,Name[sequences].length());
00612 cout<<Name[sequences]<<" new sequence\n";
00613 } else if(sequences!=-1)
00614 strings[sequences]+=s;
00615 }
00616 sequences++;
00617
00618 cout<<"here\n";
00619
00620 for(int i=0; i<sequences; i++) {
00621 int k=0;
00622 while (k<strings[i].length()) {
00623 if((strings[i][k]>='A') && (strings[i][k]<='Z')) k++;
00624 else {
00625 if ((strings[i][k]>='a') && (strings[i][k]<='z')) {
00626 strings[i][k]=strings[i][k]+'A'-'a';
00627 k++;
00628 } else strings[i]=strings[i].del(k);
00629 }
00630 };
00631 cout<<strings[i]<<endl;
00632 };
00633
00634 for(int j=0; j<sequences; j++) {
00635 Nodes[j]=new node[strings[j].length()];
00636 for(int i=0; i<strings[j].length(); i++) {
00637 Nodes[j][i]=G.new_node(node_information(i, j, strings[j][i]));
00638 }
00639 length[j]=strings[j].length();
00640 }
00641
00642 cout<<sequences<<"\n reading done\n";
00643
00644 cout<<"read matrix\n";
00645
00646 ifstream matrix("matrix");
00647
00648 s="#";
00649 while ((s[0]=='#') || (s.length()==0)) s.read_line(matrix);
00650
00651 int ns=0;
00652 cout<<s<<" : list of characters\n";
00653 for(int i=0; i<s.length(); i++) if (s[i]!=' ') {
00654 CMap[s[i]]=ns;
00655 ns++;
00656 }
00657
00658 CM=new double*[ns+1];
00659 for(int i=0; i<ns+1; i++) CM[i]=new double[ns+1];
00660 for(int i=0; i<ns; i++) {
00661 matrix>>s; cout<<endl;
00662 for(int j=0; j<ns; j++) {
00663 matrix>>CM[i][j];
00664
00665 cout<<CM[i][j]<<" ";
00666 }
00667 }
00668
00669 CMap['+']=ns;
00670 for(int i=0; i<ns; i++) { CM[ns][i]=-10000; CM[i][ns]=-10000; }
00671 CM[ns][ns]=1000;
00672
00673 cout<<"setup graph\n";
00674
00675
00676 node u, v;
00677 forall_nodes(u, G) forall_nodes(v, G) if(G[u].seq()>G[v].seq()) {
00678 AM(u,v)=G.new_edge(u, v, two_tuple<int, double>(-1, align_cost(G[u].letter(),G[v].letter())));
00679 }
00680
00681
00682 cout<<"problem created\n\n";
00683
00684
00685 edge_array<double> fix(G);
00686 set<int> GE[sequences][sequences];
00687 LEDA::string ts1, ts2, s1t,s2t;
00688 int l1, l2;
00689 edge e;
00690 double pa[sequences][sequences];
00691 double hs=0;
00692 for(int i=0; i<sequences; i++) for(int j=i+1; j<sequences; j++) {
00693 cout<<i<<" "<<j<<" sequences\n";
00694 l1=strings[i].length();
00695 l2=strings[j].length();
00696 array2<double> V1(-1,l1,-1,l2), V2(-1,l1,-1,l2);
00697 int p1=0, p2=0, g=0;
00698 s1t="";
00699 for(int k=l1-1; k>=0; k--) s1t+=strings[i][k];
00700 s2t="";
00701 for(int k=l2-1; k>=0; k--) s2t+=strings[j][k];
00702 pairwise_align(s1t,s2t,V2, i,j);
00703 pa[i][j]=pairwise_align(strings[i],strings[j],ts1,ts2,i,j, G.edge_data(), V1);
00704 for(int k=0; k<l1; k++) for(int l=0; l<l2; l++) {
00705 e=AM(Nodes[j][l],Nodes[i][k]);
00706 UPE[e]=V1(k-1,l-1)+V2(l1-k-2,l2-l-2)+G[e].second();
00707 fix[e]=V1(k,l)+V2(l1-k-2,l2-l-2);
00708 for(int N=leda_max(k-25,0); N<k; N++)
00709 for(int M=k+1; M<leda_min(k+25,strings[i].length()); M++) {
00710 fix[e]=leda_max(fix[e], V1(N-1,l)+V2(l1-M-2,l2-l-2)+
00711 gap_cost(M,N,strings[i].length()));
00712 }
00713 for(int N=leda_max(l-25,0); N<l; N++)
00714 for(int M=l+1; M<leda_min(l+25,strings[j].length()); M++) {
00715 fix[e]=leda_max(fix[e], V1(k,N-1)+V2(l1-k-2,l2-M-2)+
00716 gap_cost(M,N,strings[j].length()));
00717 }
00718 if(fix[e]>pa[i][j]+0.001) cout<<"fix better than pairwise upper\n";
00719
00720 }
00721
00722 for(int k=0; k<l2; k++) {
00723 for(int l=k; l<l2; l++) {
00724 double zg=0;
00725 if(k==0) zg=V2(l1-1,l2-l-2);
00726 if(l==l2-1) zg=V1(l1-1,k-1);
00727 if((k>0) && (l<l2-1)) {
00728 for(int z=-1; z<l1; z++) {
00729 zg=leda_max(zg, V1(z,k-1)+V2(l1-z-2,l2-l-2));
00730 }
00731 }
00732 gap_edge ge(i,j,k,l);
00733 UPG.insert(ge, zg+gap_cost(l,k,j));
00734 }
00735 }
00736 for(int k=0; k<l1; k++) {
00737 for(int l=k; l<l1; l++) {
00738 double zg=0;
00739 if(k==0) zg=V2(l1-l-2, l2-1);
00740 if(l==l1-1) zg=V1(k-1, l2-1);
00741 if((k>0) && (l<l1-1)) {
00742 for(int z=-1; z<l2; z++) {
00743 zg=leda_max(zg, V1(k-1,z)+V2(l1-l-2, l2-z-2));
00744 }
00745 }
00746 gap_edge ge(j,i,k,l);
00747 UPG.insert(ge, zg+gap_cost(l,k,i));
00748 }
00749 }
00750
00751 hs+=pa[i][j];
00752 cout<<pa[i][j]<<" pw "<<hs<<" sum\n";
00753 pa[j][i]=pa[i][j];
00754 for(int k=0; k<ts1.length(); k++) {
00755 if((ts1[k]!='-') && (ts2[k]!='-')) {
00756 if(g>0) {
00757 gap_edge ge(i,j,p2-g,p2-1);
00758 if(g<=gapbounce) LPGap.insert(ge);
00759 }
00760 if(g<0) {
00761 gap_edge ge(j,i,p1+g,p1-1);
00762 if(-g<=gapbounce) LPGap.insert(ge);
00763 }
00764 GE[i][j].insert(1000*p1+p2);
00765 p1++;p2++; g=0;
00766 } else {
00767 if(ts1[k]=='-') {
00768 g++; p2++;
00769 } else {
00770 g--; p1++;
00771 }
00772 }
00773 }
00774 if(g>0) {
00775 gap_edge ge(i,j,p2-g,p2-1);
00776 if(g<=gapbounce) LPGap.insert(ge);
00777 }
00778 if(g<0) {
00779 gap_edge ge(j,i,p1+g,p1-1);
00780 if(-g<=gapbounce) LPGap.insert(ge);
00781 }
00782 }
00783
00784 mylowerbound=leda_max(mylowerbound, hs-maxupperlower);
00785
00786 edge_array<double> UPE2(G);
00787 node S,t;
00788 forall_edges(e, G) {
00789 S=source(e); t=target(e);
00790 if((hs+UPE[e]-pa[G[S].seq()][G[t].seq()] >= mylowerbound) &&
00791 (G[S].pos()>0) && (G[t].pos()>0) &&
00792 (G[S].pos()<strings[G[S].seq()].length()-1) &&
00793 (G[t].pos()<strings[G[t].seq()].length()-1)) {
00794 double ub=0;
00795 for(int i=1; i<sequences; i++)
00796 if ((i!=G[S].seq()) && (i!=G[t].seq())) {
00797 double m=fix[AM_(Nodes[i][strings[i].length()-1],
00798 Nodes[G[S].seq()][G[S].pos()])]+
00799 fix[AM_(Nodes[i][strings[i].length()-1],
00800 Nodes[G[t].seq()][G[t].pos()])];
00801
00802
00803
00804 for(int k=0; k<strings[i].length()-1; k++) {
00805 edge e1=AM_(Nodes[i][k],Nodes[G[S].seq()][G[S].pos()]),
00806 e2=AM_(Nodes[i][k],Nodes[G[t].seq()][G[t].pos()]);
00807 m=leda_max(m,fix[e1]+fix[e2]);
00808 }
00809 ub+=leda_min(0.0, m-pa[i][G[S].seq()]-pa[i][G[t].seq()]);
00810
00811 }
00812 UPE2[e]=hs+UPE[e]-pa[G[S].seq()][G[t].seq()]+ub;
00813
00814 } else UPE2[e]=hs+UPE[e]-pa[G[S].seq()][G[t].seq()];
00815 }
00816
00817
00818 forall_edges(e, G) {
00819 UPE[e]=hs+UPE[e]-pa[G[source(e)].seq()][G[target(e)].seq()];
00820
00821 }
00822
00823 cout<<hs<<" Obere Schranke\n";
00824
00825 cout<<"init LPedge\n";
00826 LPEdge.init(G, !fewvariables);
00827
00828 l1=0; l2=0; int ltest=0;
00829 forall_edges(e, G) {
00830 if(UPE2[e] < mylowerbound ) {
00831 l1++;
00832 if(UPE[e] >= mylowerbound) {
00833 ltest++;
00834
00835
00836 }
00837 } else {
00838 l2++;
00839 if ((allnonpruned) || (UPE[e] >= hs*inittolerance)) LPEdge[e]=true;
00840 }
00841 }
00842 cout<<l1<<" pruned edges "<<l2<<" unpruned edges\n";
00843 cout<<ltest<<" newly prunded\n";
00844
00845 forall_edges(e,G) if (!LPEdge[e]) G.hide_edge(e);
00846
00847 l1=0; l2=0;
00848 if (allnonpruned) {
00849 seq_item si;
00850 forall_items(si, UPG) {
00851 gap_edge ge=UPG.key(si);
00852 UPG.change_inf(si, hs+UPG.inf(si)-pa[leda_min(ge.start_seq(),ge.end_seq())]
00853 [leda_max(ge.start_seq(),ge.end_seq())]);
00854 if((!LPGap.member(ge)) && (UPG.inf(si) >= mylowerbound )) {
00855 l2++;
00856 if(ge.end_pos()-ge.start_pos()<gapbounce) LPGap.insert(ge);
00857 } else l1++;
00858 }
00859 } else {
00860 seq_item si;
00861 forall_items(si, UPG) {
00862 gap_edge ge=UPG.key(si);
00863 UPG.change_inf(si, hs+UPG.inf(si)-pa[leda_min(ge.start_seq(),ge.end_seq())]
00864 [leda_max(ge.start_seq(),ge.end_seq())]);
00865 if((!LPGap.member(ge)) && (UPG.inf(si) >= hs*inittolerance)) {
00866 l2++;
00867 if(ge.end_pos()-ge.start_pos()<gapbounce) LPGap.insert(ge);
00868 } else l1++;
00869 }
00870 }
00871
00872 G.restore_all_edges();
00873 cout<<l1<<" pruned gaps "<<l2<<" unpruned gaps\n";
00874
00875 #line 279 "create_gml.lw"
00876 ;
00877
00878
00879 #line 289 "create_gml.lw"
00880
00881 if (!allnonpruned) {
00882 forall_edges(e, G) {
00883 if((GE[G[target(e)].seq()][G[source(e)].seq()].member(1000*G[target(e)].pos()
00884 +G[source(e)].pos()))
00885 || (abs(G[source(e)].pos()-G[target(e)].pos())<=edgebounce))
00886 LPEdge[e]=true;
00887 }
00888
00889 int i, j, k, l;
00890 for(i=0; i<sequences; i++) {
00891 for(j=0; j<sequences; j++) if (i!=j) {
00892 for(k=0; k<length[j]; k++) {
00893 for(l=k; l<length[j]; l++) {
00894 gap_edge ge(i,j,k,l);
00895 if (l-k<gapbounce)
00896 LPGap.insert(ge);
00897 }
00898 }
00899 }
00900 }
00901 }
00902
00903 forall_edges(e, G) if (!LPEdge[e]) G.del_edge(e);
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00918
00919
00920
00921
00922 ofstream os1(argv[2]);
00923 os1 << sequences <<" " << G.number_of_edges()+LPGap.size() << endl;
00924
00925 int SI[sequences];
00926 SI[0]=0;
00927 for(int i=0; i<sequences; i++) {
00928 os1 << length[i] <<" "<<Name[i] <<" "<<strings[i] <<endl;
00929 if(i<sequences-1) SI[i+1]=SI[i]+length[i];
00930 }
00931
00932 forall_edges(e, G) {
00933 os1 << SI[G[source(e)].seq()]+G[source(e)].pos() <<" "
00934 << SI[G[target(e)].seq()]+G[target(e)].pos() <<" -1 "
00935 << G[e].second() << endl;
00936 }
00937
00938 gap_edge ge;
00939 forall(ge, LPGap) {
00940 os1 << SI[ge.end_seq()]+ge.start_pos() <<" "
00941 << SI[ge.end_seq()]+ge.end_pos() <<" "
00942 << ge.start_seq() <<" "
00943 << gap_cost(ge.start_pos(), ge.end_pos(), ge.start_seq()) <<endl;
00944 }
00945
00946 #line 283 "create_gml.lw"
00947 return 0;
00948 }
00949