Main Page   Class Hierarchy   Compound List   File List   Contact   Download   Symbolic Constraints   Examples  

create_gml.c

00001 #line 16 "create_gml.lw"
00002 #include<LEDA/graph.h>
00003 #include<LEDA/graph_alg.h>
00004 #include<LEDA/sortseq.h>
00005 #include<LEDA/stack.h>
00006 #include<LEDA/set.h>
00007 #include<LEDA/p_queue.h>
00008 #include<LEDA/d2_dictionary.h>
00009 #include<LEDA/map.h>
00010 #include<LEDA/array2.h>
00011 #include<LEDA/node_map2.h>
00012 #include<LEDA/string.h>
00013 
00014 #include<ctype.h>
00015 
00016 using namespace LEDA;
00017 using namespace std;
00018 
00019 int add = 15;
00020 int lp = 3;
00021 int gp = 8;
00022 int sp = 0;
00023 int egp = 0;
00024 
00025 class ABA_CPUTIMER {
00026   float mytime;
00027   float laststart;
00028 
00029 public:
00030   ABA_CPUTIMER(void*) {
00031     mytime=0;
00032   }
00033 
00034   void start() {
00035     used_time(laststart);
00036   };
00037 
00038   void stop() {
00039     laststart=used_time(laststart);
00040     mytime+=laststart;
00041   };
00042 
00043   friend ostream& operator<<(ostream& o,const ABA_CPUTIMER& c) { 
00044     o<<c.mytime;
00045     return o; 
00046   };
00047 };
00048 
00049 double maxupperlower=10000;
00050 bool fewvariables=true;
00051 bool allnonpruned=false;
00052 double mylowerbound=0;
00053 bool writeoptvalue=true;
00054 bool readoptvalue=false;
00055 int edgebounce=4;
00056 int gapbounce=3;
00057 double inittolerance=0.98;
00058 double pricingtolerance=0.98;
00059 ABA_CPUTIMER heur(nil);
00060 ABA_CPUTIMER extmixed(nil);
00061 ABA_CPUTIMER extpair(nil);
00062 ABA_CPUTIMER trans(nil);
00063 ABA_CPUTIMER pgbuild(nil);
00064 ABA_CPUTIMER prepro(nil);
00065 ABA_CPUTIMER roottimer(nil);
00066 double rootbound;
00067 int allgvars;
00068 int allavars;
00069 int redavars;
00070 
00071 LEDA::string filename;
00072 
00073 class gap_edge {
00074  private:
00075   int s1, s2, l1, l2;
00076   double v;
00077 
00078  public:
00079   gap_edge(int s1t, int s2t, int l1t, int l2t) :
00080     s1(s1t), s2(s2t), l1(l1t), l2(l2t) {
00081   };
00082 
00083   gap_edge() {};
00084 
00085   int start_seq() const { return s1; };
00086   int end_seq() const { return s2; };
00087   int start_pos() const { return l1; };
00088   int end_pos() const { return l2; };
00089 
00090   double& value() { return v; };
00091 
00092   friend ostream& operator<<(ostream& o, const gap_edge&) {
00093     return o; }; 
00094   friend istream& operator>>(istream& i,const gap_edge&) {
00095     return i; };
00096 
00097   bool operator==(const gap_edge& ge) const {
00098     if(ge.start_seq()!=start_seq()) return false;
00099     if(ge.end_seq()!=end_seq()) return false;
00100     if(ge.start_pos()!=start_pos()) return false;
00101     if(ge.end_pos()!=end_pos()) return false;
00102     return true;
00103   };
00104 
00105   /*
00106   friend
00107   int compare(const gap_edge& g1, const gap_edge& g2) {
00108     if (compare(g1.s1, g2.s1)!=0) return compare(g1.s1, g2.s1);
00109     if (compare(g1.s2, g2.s2)!=0) return compare(g1.s2, g2.s2);
00110     if (compare(g1.l1, g2.l1)!=0) return compare(g1.l1, g2.l1);
00111     return compare(g1.l2, g2.l2);
00112   };
00113   */
00114 };
00115 
00116 namespace LEDA {
00117 int compare(const gap_edge& g1, const gap_edge& g2) {
00118     if (compare(g1.start_seq(), g2.start_seq())!=0) return compare(g1.start_seq(), g2.start_seq());
00119     if (compare(g1.end_seq(), g2.end_seq())!=0) return compare(g1.end_seq(), g2.end_seq());
00120     if (compare(g1.start_pos(), g2.start_pos())!=0) return compare(g1.start_pos(), g2.start_pos());
00121     return compare(g1.end_pos(), g2.end_pos());
00122 };
00123 };
00124 
00125 int Hash(const gap_edge& ge) {
00126   return 50000000*ge.start_seq()+1000000*ge.end_seq()+
00127     1000*ge.start_pos()+ge.end_pos();
00128 };
00129 
00130 int ID_Number(const gap_edge& ge) { 
00131   return Hash(ge);
00132 }
00133 
00134 LEDA::string Name[100];
00135 
00136 class node_information {
00137  private:
00138   int p, s;
00139   char c;
00140 
00141  public:
00142   node_information(int pt, int st, char ct) :
00143     p(pt), s(st), c(ct) {
00144   };
00145 
00146   node_information() {};
00147 
00148   int pos() { return p; };
00149   int seq() { return s; };
00150   char letter() { return c; };
00151 
00152   friend ostream& operator<<(ostream& o, const node_information& i) {
00153     if(i.p==0) o<<Name[i.s]<<" ";
00154     o<<i.c;
00155     return o; }; 
00156   friend istream& operator>>(istream& i,const node_information&) {
00157     return i; };
00158 
00159 };
00160 
00161 edge_array<bool> LPEdge;
00162 set<gap_edge> LPGap;
00163 sortseq<gap_edge, double> GAP_COST;
00164 sortseq<gap_edge, double> UPG;
00165 GRAPH<node_information, two_tuple<int,double> > G;
00166 edge_map<double> UPE(G);
00167 node* Nodes[100];
00168 node_map2<edge> AM(G);
00169 int length[100];
00170 LEDA::string strings[100];
00171 
00172 edge AM_(node u, node v) {
00173   if(G[u].seq()>G[v].seq())
00174     return AM(u,v);
00175   else
00176     return AM(v,u);
00177 }
00178 
00179 double gap_cost(int k, int l, int i) {
00180   if(k>l) leda_swap(k,l);
00181   if(egp==0) {
00182     if((k==0) || (l==length[i]-1)) 
00183       return -sp*sqrt((float) l-k+1)-lp*(l-k+1) +add*(l-k+1);
00184   }
00185   return -gp-sp*sqrt((float) l-k+1)-lp*(l-k+1) +add*(l-k+1);
00186 }
00187 
00188 double** CM;
00189 map<char,int> CMap;
00190 
00191 double align_cost(int p1, int s1, int p2, int s2, const edge_array<two_tuple<int, double> >& EA) {
00192   if(s1<s2) return align_cost(p2,s2,p1,s1, EA);
00193   return EA[AM(Nodes[s1][p1],Nodes[s2][p2])].second();
00194 };
00195 
00196 double align_cost(char c1, char c2) {
00197   return CM[CMap[c1]][CMap[c2]]+2*add;
00198 };
00199 
00200 double gap_cost(gap_edge ge) {
00201   seq_item s=GAP_COST.lookup(ge);
00202   if(s==nil) {
00203     //cout<<"mil errror\n";
00204     //cout<<ge.end_pos()<<" "<<ge.start_pos()<<" "<<ge.end_seq()<<" "
00205     //<<length[ge.end_seq()]<<" "
00206     //<<gap_cost(ge.end_pos(),ge.start_pos(), ge.end_seq())<<" ge\n";
00207     return gap_cost(ge.end_pos(),ge.start_pos(), ge.end_seq());
00208   }
00209   return GAP_COST[s];
00210 }
00211 
00212 bool zero(const double& d) {
00213   return ((d>-0.001) && (d<0.001));
00214 }
00215 
00216 bool lzero(const double& d) {
00217   return d<-0.001;
00218 }
00219 
00220 bool lezero(const double& d) {
00221   return d<0.001;
00222 }
00223 
00224 #line 865 "create_gml.lw"
00225 void make_pairgraph(GRAPH<node_information, double>& G, int i, int j,
00226                     GRAPH<edge,double>& H, int lgi, int lgj, edge_array<double>& ew,
00227                     node* lowest, node* highest, list<edge>& L, bool border_nodes,
00228                     node& u, int wi=-1, int wj=-1) 
00229 {
00230   // Achtung, i Index wird Vertaucht i=lgi-i-1!
00231 
00232   pgbuild.start();
00233 
00234   H.clear();
00235 
00236   node_array<int> IJ(H, L.size()+2,0);
00237   node_array<int> IJ1(H, L.size()+2,0);
00238   node_array<double> V(H, L.size()+2, 0.0);
00239 
00240   edge e;
00241   node w; u=nil;
00242   bool s=false, t=false;
00243   forall(e, L) {
00244     w=H.new_node(e);
00245     if(G[source(e)].seq()==i)
00246       IJ[w]=lgi*G[target(e)].pos()+(lgi-G[source(e)].pos()-1);
00247     else
00248       IJ[w]=lgi*G[source(e)].pos()+(lgi-G[target(e)].pos()-1);
00249     if(IJ[w]==0) s=true;
00250     if(IJ[w]==lgj*lgi-1) t=true;
00251     V[w]=ew[e];
00252   }
00253 
00254   if(s==false) {
00255     w=H.new_node(nil);
00256     IJ[w]=0;
00257     V[w]=0;
00258   }
00259 
00260   if(t==false) {
00261     w=H.new_node(nil);
00262     IJ[w]=lgj*lgi-1;
00263     V[w]=0;
00264   }
00265 
00266   H.sort_nodes(IJ);
00267 
00268   for(int k=0; k<lgi; k++) {
00269     lowest[k]=nil;
00270     highest[k]=nil;
00271   }
00272 
00273   int i1,j1;
00274   forall_nodes(w, H) {   
00275     i1=IJ[w]%lgi; j1=-1;
00276     while(i1>=0) {
00277       if((lowest[lgi-i1-1]!=nil) && (IJ[lowest[lgi-i1-1]]/lgi>j1)) {
00278         H.new_edge(w, lowest[lgi-i1-1], -V[w]);
00279         j1=IJ[lowest[lgi-i1-1]]/lgi;
00280       }
00281       i1--;
00282     }
00283     lowest[lgi-IJ[w]%lgi-1]=w;
00284     if((highest[lgi-IJ[w]%lgi-1]==nil) && (IJ[w]>=0)) 
00285       highest[lgi-IJ[w]%lgi-1]=w;
00286     if((IJ[w]%lgi <= lgi-wi-1) && (IJ[w]/lgi <= wj)) u=w;
00287   }
00288   if(wi!=-1) 
00289     if(IJ[u]!=lgi-wi-1+wj*lgi) {
00290       cout<<"wrong u\n";
00291       cout<<IJ[u]%lgi<<" "<<IJ[u]/lgi<<" "<<wi<<" "<<wj<<endl;
00292     }
00293   
00294   pgbuild.stop();
00295 }
00296 
00297 
00298 
00299 #line 638 "create_gml.lw"
00300 double pairwise_align(LEDA::string s1, LEDA::string s2, LEDA::string& s3, LEDA::string& s4, int n1, int n2,
00301                       const edge_array<two_tuple<int, double> >& EA, array2<double>& V) {
00302   int i,j,k, l1=s1.length(), l2=s2.length(); 
00303   array2<double> G(-1,l1,-1,l2), E(-1,l1,-1,l2), F(-1,l1,-1,l2);
00304   array2<int> TE(-1,l1,-1,l2), TF(-1,l1,-1,l2), TVE(-1,l1,-1,l2), TVF(-1,l1,-1,l2); // Trace 0=V(i-1,j-1);k=V(i,k);-k=V(k,j)
00305 
00306   V(-1,-1)=0;
00307   for(i=0; i<l1; i++) {
00308    F(i,-1)=gap_cost(gap_edge(n2,n1,0,i));
00309    V(i,-1)=F(i,-1);
00310    TF(i,-1)=i+1;
00311    TE(i,-1)=0;
00312    TVE(i,-1)=0;
00313    TVF(i,-1)=i+1;
00314    G(i,-1)=-MAXDOUBLE/2;
00315    E(i,-1)=-MAXDOUBLE/2;
00316   }
00317   for(j=0; j<l2; j++) {
00318    E(-1,j)=gap_cost(gap_edge(n1,n2,0,j));
00319    V(-1,j)=E(-1,j);
00320    TE(-1,j)=j+1;
00321    TF(-1,j)=0;
00322    TVE(-1,j)=j+1;
00323    TVF(-1,j)=0;
00324    G(-1,j)=-MAXDOUBLE/2;
00325    F(-1,j)=-MAXDOUBLE/2;
00326   }
00327 
00328   for(i=0; i<l1; i++) { 
00329     for(j=0; j<l2; j++) {
00330       G(i,j)=V(i-1,j-1)+align_cost(i,n1,j,n2, EA);
00331       E(i,j)=G(i,j-1)+gap_cost(gap_edge(n1,n2,j,j)); TE(i,j)=1;
00332       for(k=-1; k<j-1; k++) {
00333         if(E(i,j)<G(i,k)+gap_cost(gap_edge(n1,n2,k+1,j))) {
00334           E(i,j)=G(i,k)+gap_cost(gap_edge(n1,n2,k+1,j)); TE(i,j)=j-k; 
00335         }
00336       }
00337       F(i,j)=G(i-1,j)+gap_cost(gap_edge(n2,n1,i,i)); TF(i,j)=1;
00338       for(k=-1; k<i-1; k++) {
00339         if(F(i,j)<G(k,j)+gap_cost(gap_edge(n2,n1,k+1,i))) {
00340           F(i,j)=G(k,j)+gap_cost(gap_edge(n2,n1,k+1,i)); TF(i,j)=i-k;
00341         }
00342       }
00343       V(i,j)=G(i,j); TVE(i,j)=0; TVF(i,j)=0;
00344       for(k=-1; k<j; k++) {
00345         if(V(i,j)<F(i,k)+gap_cost(gap_edge(n1,n2,k+1,j))) {
00346           V(i,j)=F(i,k)+gap_cost(gap_edge(n1,n2,k+1,j));
00347           TVF(i,j)=TF(i,k);
00348           TVE(i,j)=j-k;
00349         }
00350       }
00351       if(E(i,j)>V(i,j)) { V(i,j)=E(i,j); TVE(i,j)=TE(i,j); TVF(i,j)=0; }
00352       if(F(i,j)>V(i,j)) { V(i,j)=F(i,j); TVF(i,j)=TF(i,j); TVE(i,j)=0; }
00353 
00354     }
00355   }
00356 
00357   cout<<V(l1-1,l2-1)<<" pairwise\n";
00358 
00359   s3=""; s4="";
00360   i=l1-1; j=l2-1;
00361   int de,df;
00362   
00363   double d=0;
00364   while((i>=0) || (j>=0)) {
00365     if(TVE(i,j)>0) {
00366       for(k=0; k<TVE(i,j); k++) {
00367         s3="-"+s3; s4=LEDA::string(s2[j-k])+s4; 
00368       }
00369       if(gap_cost(gap_edge(n1,n2,j-k+1,j))>0) cout<<j<<" "<<k<<" pos gap\n";
00370       d+=gap_cost(gap_edge(n1,n2,j-k+1,j));
00371       de=k;
00372     } else de=0;
00373     if(TVF(i,j)>0) {
00374       for(k=0; k<TVF(i,j); k++) {
00375         s3=LEDA::string(s1[i-k])+s3; s4="-"+s4;
00376       }
00377       if(gap_cost(gap_edge(n2,n1,i-k+1, i))>0) cout<<"pos gap2\n";
00378       d+=gap_cost(gap_edge(n2,n1,i-k+1, i));
00379       df=k;
00380     } else df=0;
00381     i-=df; j-=de;
00382     if((i>=0) && (j>=0)) {
00383       s3=LEDA::string(s1[i])+s3; s4=LEDA::string(s2[j])+s4;
00384       d+=align_cost(i,n1,j,n2,EA);
00385       i--; j--;
00386     }
00387   }
00388  
00389   cout<<d<<" test value\n";
00390   cout<<s3<<"\n"<<s4<<"\n";
00391 
00392   return V(l1-1,l2-1);
00393 };
00394 
00395 //TODO this is almost the same as the previous, but for an other objective function
00396 // and does not compute the solution strings!
00397 double pairwise_align(LEDA::string s1, LEDA::string s2, array2<double>& V, int i1, int i2) {
00398   int i,j,k, l1=s1.length(), l2=s2.length(); 
00399   array2<double> G(-1,l1,-1,l2), E(-1,l1,-1,l2), F(-1,l1,-1,l2);
00400   array2<int> TE(-1,l1,-1,l2), TF(-1,l1,-1,l2), TVE(-1,l1,-1,l2), TVF(-1,l1,-1,l2); // Trace 0=V(i-1,j-1);k=V(i,k);-k=V(k,j)
00401 
00402   V(-1,-1)=0;
00403   for(i=0; i<l1; i++) {
00404    F(i,-1)=gap_cost(0,i,i1);
00405    V(i,-1)=F(i,-1);
00406    TF(i,-1)=i+1;
00407    TE(i,-1)=0;
00408    TVE(i,-1)=0;
00409    TVF(i,-1)=i+1;
00410    G(i,-1)=-MAXDOUBLE/2;
00411    E(i,-1)=-MAXDOUBLE/2;
00412   }
00413   for(j=0; j<l2; j++) {
00414    E(-1,j)=gap_cost(0,j,i2);
00415    V(-1,j)=E(-1,j);
00416    TE(-1,j)=j+1;
00417    TF(-1,j)=0;
00418    TVE(-1,j)=j+1;
00419    TVF(-1,j)=0;
00420    G(-1,j)=-MAXDOUBLE/2;
00421    F(-1,j)=-MAXDOUBLE/2;
00422   }
00423 
00424   for(i=0; i<l1; i++) { 
00425     for(j=0; j<l2; j++) {
00426       G(i,j)=V(i-1,j-1)+align_cost(s1[i],s2[j]);
00427       E(i,j)=G(i,j-1)+gap_cost(j,j,i2); TE(i,j)=1;
00428       for(k=-1; k<j-1; k++) {
00429         if(E(i,j)<G(i,k)+gap_cost(j,k+1,i2)) {
00430           E(i,j)=G(i,k)+gap_cost(j,k+1,i2); TE(i,j)=j-k; 
00431         }
00432       }
00433       F(i,j)=G(i-1,j)+gap_cost(i,i,i1); TF(i,j)=1;
00434       for(k=-1; k<i-1; k++) {
00435         if(F(i,j)<G(k,j)+gap_cost(i,k+1,i1)) {
00436           F(i,j)=G(k,j)+gap_cost(i,k+1,i1); TF(i,j)=i-k;
00437         }
00438       }
00439       V(i,j)=G(i,j); TVE(i,j)=0; TVF(i,j)=0;
00440       for(k=-1; k<j; k++) {
00441         if(V(i,j)<F(i,k)+gap_cost(j,k+1,i2)) {
00442           V(i,j)=F(i,k)+gap_cost(j,k+1,i2);
00443           TVF(i,j)=TF(i,k);
00444           TVE(i,j)=j-k;
00445         }
00446       }
00447       if(E(i,j)>V(i,j)) { V(i,j)=E(i,j); TVE(i,j)=TE(i,j); TVF(i,j)=0; }
00448       if(F(i,j)>V(i,j)) { V(i,j)=F(i,j); TVF(i,j)=TF(i,j); TVE(i,j)=0; }
00449 
00450     }
00451   }
00452 
00453   cout<<V(l1-1,l2-1)<<" pairwise\n";
00454 
00455   return V(l1-1,l2-1);
00456 };
00457 
00458 // Pairwise sequence alignemnent with the cost function as first, but in
00459 // reverse order! ohne rueckrechnung
00460 double pairwise_align_rev(LEDA::string s1, LEDA::string s2, LEDA::string& s3, LEDA::string& s4, 
00461                           int n1, int n2,
00462                           const edge_array<two_tuple<int, double> >& EA, array2<double>& V) {
00463   int i,j,k, l1=s1.length(), l2=s2.length(); 
00464   array2<double> G(-1,l1,-1,l2), E(-1,l1,-1,l2), F(-1,l1,-1,l2);
00465   array2<int> TE(-1,l1,-1,l2), TF(-1,l1,-1,l2), TVE(-1,l1,-1,l2), 
00466     TVF(-1,l1,-1,l2); // Trace 0=V(i-1,j-1);k=V(i,k);-k=V(k,j)
00467 
00468   V(l1,l2)=0;
00469   for(i=0; i<l1; i++) {
00470    F(i,l2)=gap_cost(gap_edge(n2,n1,i,l1-1));
00471    V(i,l2)=F(i,l2);
00472    TF(i,l2)=l1-i;
00473    TE(i,l2)=0;
00474    TVE(i,l2)=0;
00475    TVF(i,l2)=l1-i;
00476    G(i,l2)=-MAXDOUBLE/2;
00477    E(i,l2)=-MAXDOUBLE/2;
00478   }
00479   for(j=0; j<l2; j++) {
00480    E(l1,j)=gap_cost(gap_edge(n1,n2,j,l2-1));
00481    V(l1,j)=E(l1,j);
00482    TE(l1,j)=l2-j;
00483    TF(l1,j)=0;
00484    TVE(l1,j)=l2-j;
00485    TVF(l1,j)=0;
00486    G(l1,j)=-MAXDOUBLE/2;
00487    F(l1,j)=-MAXDOUBLE/2;
00488   }
00489 
00490   for(i=l1-1; i>=0; i--) { 
00491     for(j=l2-1; j>=0; j--) {
00492       G(i,j)=V(i+1,j+1)+align_cost(i,n1,j,n2, EA);
00493       E(i,j)=G(i,j+1)+gap_cost(gap_edge(n1,n2,j,j)); TE(i,j)=1;
00494       for(k=j+1; k<=l2; k++) {
00495         if(E(i,j)<G(i,k)+gap_cost(gap_edge(n1,n2,j,k-1))) {
00496           E(i,j)=G(i,k)+gap_cost(gap_edge(n1,n2,j,k-1)); TE(i,j)=k-j; 
00497         }
00498       }
00499       F(i,j)=G(i+1,j)+gap_cost(gap_edge(n2,n1,i,i)); TF(i,j)=1;
00500       for(k=i+1; k<=l1; k++) {
00501         if(F(i,j)<G(k,j)+gap_cost(gap_edge(n2,n1,i, k-1))) {
00502           F(i,j)=G(k,j)+gap_cost(gap_edge(n2,n1,i, k-1)); TF(i,j)=k-i;
00503         }
00504       }
00505       V(i,j)=G(i,j); TVE(i,j)=0; TVF(i,j)=0;
00506       for(k=j+1; k<=l2; k++) {
00507         if(V(i,j)<F(i,k)+gap_cost(gap_edge(n1,n2,j, k-1))) {
00508           V(i,j)=F(i,k)+gap_cost(gap_edge(n1,n2,j, k-1));
00509           TVF(i,j)=TF(i,k);
00510           TVE(i,j)=k-j;
00511         }
00512       }
00513       if(E(i,j)>V(i,j)) { V(i,j)=E(i,j); TVE(i,j)=TE(i,j); TVF(i,j)=0; }
00514       if(F(i,j)>V(i,j)) { V(i,j)=F(i,j); TVF(i,j)=TF(i,j); TVE(i,j)=0; }
00515 
00516     }
00517   }
00518 
00519   cout<<V(0,0)<<" pairwise rev\n";
00520 
00521   return V(0,0);
00522 };
00523 
00524 
00525 #line 1027 "create_gml.lw"
00526 void read_config_file() {
00527   ifstream is("config");
00528   LEDA::string s;
00529   
00530   while(!is.eof()) {
00531     is>>s;
00532     if(s=="FewVariables") is>>fewvariables;
00533     if(s=="AllNonPruned") is>>allnonpruned;
00534     if(s=="ReadLowerBound") is>>readoptvalue;
00535     if(s=="WriteOptValue") is>>writeoptvalue;
00536     if(s=="EdgeBounce") is>>edgebounce;
00537     if(s=="GapBounce") is>>gapbounce;
00538     if(s=="PricingTolerance") is>>pricingtolerance;
00539     if(s=="InitTolerance") is>>inittolerance;
00540     if(s=="MaxUpperLower") is>>maxupperlower;
00541     if(s=="GlobalPart") is>>gp;  
00542     if(s=="LinearPart") is>>lp;  
00543     if(s=="SquareRootPart") is>>sp;
00544     if(s=="EndGapPenality") is>>egp;  
00545     if(s=="Shift") is>>add; 
00546 
00547   }
00548 
00549   cout<<"\nConfiguration\n===============\n";
00550   cout<<"\nFewVariables: "<<fewvariables;
00551   cout<<"\nAllNonPruned: "<<allnonpruned;
00552   cout<<"\nReadLowerBound: "<<readoptvalue;
00553   cout<<"\nWriteOptValue: "<<writeoptvalue;
00554   cout<<"\nEdgeBounce: "<<edgebounce;
00555   cout<<"\nGapBounce: "<<gapbounce;
00556   cout<<"\nPricingTolerance: "<<pricingtolerance;
00557   cout<<"\nInitTolerance: "<<inittolerance<<"\n\n";
00558 }
00559 
00560 #line 244 "create_gml.lw"
00561 int main(int argc, char* argv[]) {
00562 
00563   rootbound=-1;
00564   allgvars=0;
00565   allavars=0;
00566   redavars=0;
00567   prepro.start();
00568 
00569   if(argc<2) {
00570     cout<<"Usage: Infile(fasta) Outfile a b c (in a+b*l+c*sqrt(l))\n\n";
00571     return 1;
00572   }
00573 
00574   if(argc>2) {
00575     gp=atoi(argv[3]);
00576     lp=atoi(argv[4]);
00577     sp=atoi(argv[5]);
00578     //gp=atoi(argv[6]);
00579     add=gp+lp+sp;
00580     cout<<gp<<" "<<lp<<" "<<sp<<" "<<egp<<" "<<add<<" score data\n";
00581   }
00582 
00583   read_config_file();
00584 
00585   if (readoptvalue) {
00586     LEDA::string s=argv[1];
00587     s+=".opt";
00588     ifstream ifstr(s);
00589     ifstr>>mylowerbound;
00590     mylowerbound=mylowerbound*0.9999999;
00591     cout<<mylowerbound<<" given lower bound\n\n";
00592   }
00593 
00594   int sequences;
00595 
00596   
00597 #line 357 "create_gml.lw"
00598 ifstream is(argv[1]);
00599 
00600 sequences=-1;
00601 
00602 LEDA::string s;
00603 while (!is.eof()) {
00604   s=read_line(is);
00605   if(s[0]=='>') {
00606    sequences++;
00607    Name[sequences]=s.del(0);
00608    int k=0;
00609    while((k<Name[sequences].length()) && (Name[sequences][k]!=','))
00610      k++;
00611    Name[sequences]=Name[sequences].del(k,Name[sequences].length());
00612    cout<<Name[sequences]<<" new sequence\n";
00613   } else if(sequences!=-1)
00614    strings[sequences]+=s;
00615 }
00616 sequences++;
00617 
00618 cout<<"here\n";
00619 
00620 for(int i=0; i<sequences; i++) {
00621   int k=0;
00622   while (k<strings[i].length()) {
00623     if((strings[i][k]>='A') && (strings[i][k]<='Z')) k++;
00624     else {
00625       if ((strings[i][k]>='a') && (strings[i][k]<='z')) {
00626         strings[i][k]=strings[i][k]+'A'-'a'; 
00627         k++;
00628       } else strings[i]=strings[i].del(k);
00629     }
00630   };
00631   cout<<strings[i]<<endl;
00632 };
00633 
00634 for(int j=0; j<sequences; j++) {
00635   Nodes[j]=new node[strings[j].length()];
00636   for(int i=0; i<strings[j].length(); i++) {
00637     Nodes[j][i]=G.new_node(node_information(i, j, strings[j][i]));
00638   }
00639   length[j]=strings[j].length();
00640 }
00641 
00642 cout<<sequences<<"\n reading done\n";
00643 
00644 cout<<"read matrix\n";
00645 
00646 ifstream matrix("matrix");
00647 //string 
00648 s="#";
00649 while ((s[0]=='#') || (s.length()==0)) s.read_line(matrix);
00650 
00651 int ns=0;
00652 cout<<s<<" : list of characters\n";
00653 for(int i=0; i<s.length(); i++) if (s[i]!=' ') {
00654   CMap[s[i]]=ns;
00655   ns++;
00656 }
00657 
00658 CM=new double*[ns+1];
00659 for(int i=0; i<ns+1; i++) CM[i]=new double[ns+1];
00660 for(int i=0; i<ns; i++) {
00661   matrix>>s; cout<<endl;
00662   for(int j=0; j<ns; j++) {
00663     matrix>>CM[i][j];
00664     //CM[i][j]=-CM[i][j];
00665     cout<<CM[i][j]<<" ";
00666   }
00667 }
00668 
00669 CMap['+']=ns;
00670 for(int i=0; i<ns; i++) { CM[ns][i]=-10000; CM[i][ns]=-10000; }
00671 CM[ns][ns]=1000;
00672 
00673 cout<<"setup graph\n";
00674 
00675 // ! Order of the edges is important for pairgraph_separator heuristic
00676 node u, v;
00677 forall_nodes(u, G) forall_nodes(v, G) if(G[u].seq()>G[v].seq()) {
00678   AM(u,v)=G.new_edge(u, v, two_tuple<int, double>(-1, align_cost(G[u].letter(),G[v].letter())));
00679 }
00680 
00681 
00682 cout<<"problem created\n\n";
00683 
00684 //edge_array<double> UPE(G);
00685 edge_array<double> fix(G);
00686 set<int> GE[sequences][sequences];
00687 LEDA::string ts1, ts2, s1t,s2t;
00688 int l1, l2;
00689 edge e;
00690 double pa[sequences][sequences];
00691 double hs=0;
00692 for(int i=0; i<sequences; i++) for(int j=i+1; j<sequences; j++) {
00693   cout<<i<<" "<<j<<" sequences\n";
00694   l1=strings[i].length();
00695   l2=strings[j].length();
00696   array2<double> V1(-1,l1,-1,l2), V2(-1,l1,-1,l2);
00697   int p1=0, p2=0, g=0;
00698   s1t="";
00699   for(int k=l1-1; k>=0; k--) s1t+=strings[i][k];
00700   s2t="";
00701   for(int k=l2-1; k>=0; k--) s2t+=strings[j][k];
00702   pairwise_align(s1t,s2t,V2, i,j);
00703   pa[i][j]=pairwise_align(strings[i],strings[j],ts1,ts2,i,j, G.edge_data(), V1);
00704   for(int k=0; k<l1; k++) for(int l=0; l<l2; l++) {
00705     e=AM(Nodes[j][l],Nodes[i][k]);
00706     UPE[e]=V1(k-1,l-1)+V2(l1-k-2,l2-l-2)+G[e].second();
00707     fix[e]=V1(k,l)+V2(l1-k-2,l2-l-2);
00708     for(int N=leda_max(k-25,0); N<k; N++) 
00709       for(int M=k+1; M<leda_min(k+25,strings[i].length()); M++) {
00710         fix[e]=leda_max(fix[e], V1(N-1,l)+V2(l1-M-2,l2-l-2)+
00711                         gap_cost(M,N,strings[i].length()));
00712       }
00713     for(int N=leda_max(l-25,0); N<l; N++) 
00714       for(int M=l+1; M<leda_min(l+25,strings[j].length()); M++) {
00715         fix[e]=leda_max(fix[e], V1(k,N-1)+V2(l1-k-2,l2-M-2)+
00716                         gap_cost(M,N,strings[j].length()));
00717       }
00718 if(fix[e]>pa[i][j]+0.001) cout<<"fix better than pairwise upper\n";
00719     // The best one can do if all letters before k are aligned before l and ...
00720   }
00721 
00722   for(int k=0; k<l2; k++) {
00723     for(int l=k; l<l2; l++) {
00724       double zg=0;
00725       if(k==0) zg=V2(l1-1,l2-l-2);
00726       if(l==l2-1) zg=V1(l1-1,k-1);
00727       if((k>0) && (l<l2-1)) {
00728         for(int z=-1; z<l1; z++) {
00729           zg=leda_max(zg, V1(z,k-1)+V2(l1-z-2,l2-l-2));
00730         }
00731       }
00732       gap_edge ge(i,j,k,l);
00733       UPG.insert(ge, zg+gap_cost(l,k,j));
00734     }
00735   }
00736   for(int k=0; k<l1; k++) {
00737     for(int l=k; l<l1; l++) {
00738       double zg=0;
00739       if(k==0) zg=V2(l1-l-2, l2-1);
00740       if(l==l1-1) zg=V1(k-1, l2-1);
00741       if((k>0) && (l<l1-1)) {
00742         for(int z=-1; z<l2; z++) {
00743           zg=leda_max(zg, V1(k-1,z)+V2(l1-l-2, l2-z-2));
00744         }
00745       }
00746       gap_edge ge(j,i,k,l);
00747       UPG.insert(ge, zg+gap_cost(l,k,i));
00748     }
00749   }
00750 
00751   hs+=pa[i][j];
00752   cout<<pa[i][j]<<" pw "<<hs<<" sum\n";
00753   pa[j][i]=pa[i][j];
00754   for(int k=0; k<ts1.length(); k++) {
00755     if((ts1[k]!='-') && (ts2[k]!='-')) {
00756       if(g>0) {
00757         gap_edge ge(i,j,p2-g,p2-1); // TODO exchanged i and j (4 times)
00758         if(g<=gapbounce) LPGap.insert(ge);
00759       }
00760       if(g<0) {
00761         gap_edge ge(j,i,p1+g,p1-1);
00762         if(-g<=gapbounce) LPGap.insert(ge);
00763       }
00764       GE[i][j].insert(1000*p1+p2);
00765       p1++;p2++; g=0;
00766     } else {
00767       if(ts1[k]=='-') {
00768         g++; p2++;
00769       } else {
00770         g--; p1++;
00771       }
00772     }
00773   }
00774   if(g>0) {
00775     gap_edge ge(i,j,p2-g,p2-1);
00776     if(g<=gapbounce) LPGap.insert(ge);
00777   }
00778   if(g<0) {
00779     gap_edge ge(j,i,p1+g,p1-1);
00780     if(-g<=gapbounce) LPGap.insert(ge);
00781   }
00782 }
00783 
00784 mylowerbound=leda_max(mylowerbound, hs-maxupperlower);
00785 
00786 edge_array<double> UPE2(G);
00787 node S,t;
00788 forall_edges(e, G) {
00789   S=source(e); t=target(e);
00790   if((hs+UPE[e]-pa[G[S].seq()][G[t].seq()] >= mylowerbound) &&
00791      (G[S].pos()>0) && (G[t].pos()>0) &&
00792      (G[S].pos()<strings[G[S].seq()].length()-1) &&
00793      (G[t].pos()<strings[G[t].seq()].length()-1)) {
00794     double ub=0;
00795     for(int i=1; i<sequences; i++) 
00796       if ((i!=G[S].seq()) && (i!=G[t].seq())) {
00797         double m=fix[AM_(Nodes[i][strings[i].length()-1],
00798                          Nodes[G[S].seq()][G[S].pos()])]+
00799           fix[AM_(Nodes[i][strings[i].length()-1],
00800                   Nodes[G[t].seq()][G[t].pos()])];
00801           //gap_cost(G[S].pos()+1,strings[G[S].seq()].length()-1,G[S].seq())+
00802           //gap_cost(G[t].pos()+1,strings[G[t].seq()].length()-1,G[t].seq());
00803 
00804         for(int k=0; k<strings[i].length()-1; k++) {
00805           edge e1=AM_(Nodes[i][k],Nodes[G[S].seq()][G[S].pos()]),
00806             e2=AM_(Nodes[i][k],Nodes[G[t].seq()][G[t].pos()]);
00807           m=leda_max(m,fix[e1]+fix[e2]);
00808         }
00809         ub+=leda_min(0.0, m-pa[i][G[S].seq()]-pa[i][G[t].seq()]); // TODO required?
00810         //cout<< m-pa[i][G[S].seq()]-pa[i][G[t].seq()]<<" save over pairwise\n";
00811       }
00812     UPE2[e]=hs+UPE[e]-pa[G[S].seq()][G[t].seq()]+ub;
00813     //cout<<hs+UPE[e]-pa[G[S].seq()][G[t].seq()]<<" "<<UPE2[e]<<" old and new bound\n";
00814   } else UPE2[e]=hs+UPE[e]-pa[G[S].seq()][G[t].seq()];
00815 }
00816 
00817 
00818 forall_edges(e, G) {
00819   UPE[e]=hs+UPE[e]-pa[G[source(e)].seq()][G[target(e)].seq()];
00820   //UPE2[e]=UPE[e]; // TODO remove
00821 }
00822 
00823 cout<<hs<<" Obere Schranke\n";
00824 
00825 cout<<"init LPedge\n";
00826 LPEdge.init(G, !fewvariables);
00827 
00828 l1=0; l2=0; int ltest=0;
00829 forall_edges(e, G) {
00830   if(UPE2[e] < mylowerbound ) { // TODO UPE2 !
00831     l1++; 
00832     if(UPE[e] >= mylowerbound) {
00833       ltest++;
00834       //cout<<G[source(e)].seq()<<" "<<G[source(e)].pos()<<" "
00835       //  <<G[target(e)].seq()<<" "<<G[target(e)].pos()<<" edge\n";
00836     }
00837   } else { 
00838     l2++;
00839     if ((allnonpruned) || (UPE[e] >= hs*inittolerance)) LPEdge[e]=true;
00840   }
00841 }
00842 cout<<l1<<" pruned edges "<<l2<<" unpruned edges\n";
00843 cout<<ltest<<" newly prunded\n";
00844 
00845 forall_edges(e,G) if (!LPEdge[e]) G.hide_edge(e);
00846 
00847 l1=0; l2=0;
00848 if (allnonpruned) { // Take all needed gaps
00849   seq_item si;
00850   forall_items(si, UPG) {
00851     gap_edge ge=UPG.key(si);
00852     UPG.change_inf(si, hs+UPG.inf(si)-pa[leda_min(ge.start_seq(),ge.end_seq())]
00853                    [leda_max(ge.start_seq(),ge.end_seq())]);
00854     if((!LPGap.member(ge)) && (UPG.inf(si) >= mylowerbound )) {
00855       l2++;
00856       if(ge.end_pos()-ge.start_pos()<gapbounce) LPGap.insert(ge);
00857     } else l1++;
00858   }
00859 } else {
00860   seq_item si;
00861   forall_items(si, UPG) { // Take all gaps close to pairwise optimal
00862     gap_edge ge=UPG.key(si);
00863     UPG.change_inf(si, hs+UPG.inf(si)-pa[leda_min(ge.start_seq(),ge.end_seq())]
00864                    [leda_max(ge.start_seq(),ge.end_seq())]);
00865     if((!LPGap.member(ge)) && (UPG.inf(si) >= hs*inittolerance)) {
00866       l2++;
00867       if(ge.end_pos()-ge.start_pos()<gapbounce) LPGap.insert(ge);
00868     } else l1++;
00869   }
00870 }
00871 
00872 G.restore_all_edges();
00873 cout<<l1<<" pruned gaps "<<l2<<" unpruned gaps\n";
00874 
00875 #line 279 "create_gml.lw"
00876                   ;
00877 
00878   
00879 #line 289 "create_gml.lw"
00880 //edge e;
00881 if (!allnonpruned) {
00882   forall_edges(e, G) {
00883     if((GE[G[target(e)].seq()][G[source(e)].seq()].member(1000*G[target(e)].pos()
00884                                                           +G[source(e)].pos())) 
00885        || (abs(G[source(e)].pos()-G[target(e)].pos())<=edgebounce)) 
00886     LPEdge[e]=true;
00887   }
00888 
00889   int i, j, k, l;
00890   for(i=0; i<sequences; i++) {
00891     for(j=0; j<sequences; j++) if (i!=j) {
00892       for(k=0; k<length[j]; k++) {
00893         for(l=k; l<length[j]; l++) {
00894           gap_edge ge(i,j,k,l);
00895           if (l-k<gapbounce)
00896             LPGap.insert(ge);
00897         }
00898       }
00899     }
00900   }
00901 }
00902 
00903 forall_edges(e, G) if (!LPEdge[e]) G.del_edge(e);
00904 
00905 /*
00906 gap_edge ge;
00907 forall(ge, LPGap) {
00908   G.new_edge(Nodes[ge.end_seq()][ge.start_pos()],
00909              Nodes[ge.end_seq()][ge.end_pos()], 
00910              two_tuple<int, double>(ge.start_seq(),
00911                gap_cost(ge.start_pos(), ge.end_pos(), ge.start_seq())));
00912 }
00913 
00914 for(int i=0; i<sequences; i++) {
00915   for(int j=1; j<length[i]; j++) {
00916     G.new_edge(Nodes[i][j-1], Nodes[i][j], 
00917                two_tuple<int, double>(-2, 0));
00918   }
00919 };
00920 */
00921 
00922 ofstream os1(argv[2]);
00923 os1 << sequences <<" " << G.number_of_edges()+LPGap.size() << endl;
00924 
00925 int SI[sequences];
00926 SI[0]=0;
00927 for(int i=0; i<sequences; i++) {
00928   os1 << length[i] <<" "<<Name[i] <<" "<<strings[i] <<endl;
00929   if(i<sequences-1) SI[i+1]=SI[i]+length[i];
00930 }
00931 
00932 forall_edges(e, G) {
00933   os1 << SI[G[source(e)].seq()]+G[source(e)].pos() <<" "
00934       << SI[G[target(e)].seq()]+G[target(e)].pos() <<" -1 "
00935       << G[e].second() << endl;
00936 } 
00937 
00938 gap_edge ge;
00939 forall(ge, LPGap) {
00940   os1 << SI[ge.end_seq()]+ge.start_pos() <<" "
00941       << SI[ge.end_seq()]+ge.end_pos() <<" "
00942       << ge.start_seq() <<" " 
00943       << gap_cost(ge.start_pos(), ge.end_pos(), ge.start_seq()) <<endl;
00944 }
00945 
00946 #line 283 "create_gml.lw"
00947   return 0;
00948 }
00949 

Generated on Tue Nov 16 15:18:16 2004 for SCIL by doxygen1.2.16