/* DBSE 3 (c) 2000-2003 Andrea Rizzi License: GPLv2 */ #include #include "database.h" #include #include #include #include #include #define i18n (const char*) DataBase::DataBase(TQString dbpath,TQString dbname, TQString dblang) : Db(0,DB_CXX_NO_EXCEPTIONS) { filename=dbpath+"."+dblang+".db"; database=dbname; } int DataBase::open(DBTYPE type,unsigned int flags) { int ret; ret = Db::open( #if (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR > 0) || (DB_VERSION_MAJOR >= 5) NULL, #endif (const char*)filename.local8Bit(),(const char *)database.local8Bit(),type,flags,0644); mytype=type; return ret; } unsigned int DataBase::getLast() { if(mytype!=DB_RECNO) return 0; Dbc *cur; cursor(0,&cur,0); DBItemNum index; DBItemMainKey key; cur->get(&index,&key,DB_LAST); return index.getNum(); } QueryResult::QueryResult(TQString r) { res=r; } QueryResult::QueryResult(TQString r,TQString o,int s) { res=r; richr=r; orig=o; richo=o; sco=s; } QueryResult::QueryResult() { res=""; } DataBaseInterface::DataBaseInterface(TQString dir, DBSESettings *sets) { //FIXME Better db names!! main = openMyDataBase(dir+"/testm","main","it",DB_BTREE); alpha = openMyDataBase(dir+"/testa","alpha","it",DB_BTREE); numindex = openMyDataBase(dir+"/testn","numindex","it",DB_RECNO); wordsindex = openMyDataBase(dir+"/testw","wordsindex","it",DB_BTREE); sentence = openMyDataBase(dir+"/tests","sentence","it",DB_BTREE); corr = openMyDataBase(dir+"/testc","corr","it",DB_BTREE); transword = openMyDataBase(dir+"/testt","transword","it",DB_RECNO); // kdDebug(0) << main << endl; // kdDebug(0) << alpha << endl; settings=sets; _stopNow=false; } DataBaseInterface::~DataBaseInterface() { if(main){ main->close(0); delete main; } if(numindex){ numindex->close(0); delete numindex; } if(alpha){ alpha->close(0); delete alpha; } if(wordsindex){ wordsindex->close(0); delete wordsindex; } if(sentence){ sentence->close(0); delete sentence; } } DataBase *DataBaseInterface::openMyDataBase(const TQString& prefix,const TQString& name,const TQString& l,DBTYPE tt) { DataBase *aDb = new DataBase(prefix,name,l); if(aDb==0){ return 0; } else { if(aDb->open(tt)!=0) { kdDebug(0) << "Database '"<< name <<"'do not exist, I try to create it.." << endl; //ask only the first time. static bool create=( KMessageBox::questionYesNo(0,"Database do not exist. Do you want to create it now?", i18n("Create Database"), i18n("Create"), i18n("Do Not Create"))==KMessageBox::Yes); if(create) if(aDb->open(tt,DB_CREATE)!=0) { kdDebug(0) << "...cannot create!!"<< endl; return 0; } else { kdDebug(0) << "...done!" << endl; return aDb; } } } return aDb; } /* * query functions. * */ DataBaseInterface::MainEntry DataBaseInterface::get(const TQString& query,SearchFilter *filter) { static int counter=1; counter++; DBItemMainKey k(query); DBItemMainData d; //int r= main->get(&k,&d); // kdDebug(0) << "MAINDB->GET returned: " << r << endl; if(counter%5==0) kapp->processEvents(100); // kdDebug(0) << "events processed" << endl; return qMakePair(k,d); } /* * put functions * * */ bool DataBaseInterface::addEntry(TQString original,TQString translated,InputInfo *info) { DBItemMainKey mk(original); DBItemMainData md; TQMap correlationDiff; bool newentry=false; //try to get kdDebug(0) << "Inserting the pair:" << endl; kdDebug(0) << "ORIGINAL:" << original << endl; kdDebug(0) << "TRANSLATED:" << translated << endl; if(main->get(&mk,&md)==DB_NOTFOUND) { kdDebug(0) << "new entry" << endl; newentry=true; //This is a new entry, create index entry DBItemNum *nind; int newid=numindex->getLast()+1; nind=new DBItemNum(newid); numindex->put(nind,&mk); delete nind; md.clear(); md.setIndexnumber(newid); //Update secondary index alpha DBItemMainKey ka(simple(original)); DBItemMultiIndex in; if(alpha->get(&ka,&in)==DB_NOTFOUND) in.clear() ; //alpha->get(&ka,&in); in.addEntry(newid); alpha->put(&ka,&in); kdDebug(0) << "Updating the word index " << endl; //Update words index TQStringList ws=words(original); for(TQStringList::iterator it = ws.begin(); it!=ws.end(); ++it) { DBItemMainKey word(*it); DBItemMultiIndex win; if(wordsindex->get(&word,&win)==DB_NOTFOUND) win.clear(); win.addEntry(newid); wordsindex->put(&word,&win); } kdDebug(0) << "new entry preparation DONE" << endl; } else { kdDebug(0) << "It exists!" <1 ) //we already hav a database for single string. { kdDebug(0) << "inside sentence loop" << endl; for(int i=0; i< so.count() ; i++) { DBItemMainKey sk(so[i]); DBItemMainData sd; if(sentence->get(&sk,&sd)==DB_NOTFOUND&&!newentry) kdDebug(0) << "Warning: new sentence for old entry, do we changed sentence definition? " << endl; kdDebug(0) << "here alive" << endl; // if(clean) sd.removeRef(info->ref()); kdDebug(0) << "now alive" << endl; sd.addTranslation(st[i],info->ref()); kdDebug(0) << "still alive" << endl; sentence->put(&sk,&sd); } } kdDebug(0) << "Fuzzy sentence archive updated" << endl; //Add that translation, link to ref for information on that translation if(!translated.isEmpty()) { //loop on all translations to update correlation TQStringList tmpTranslations=md.getTranslations(); for(TQStringList::iterator otIt=tmpTranslations.begin(); otIt!=tmpTranslations.end();++otIt) { TQStringList wt=words(*otIt); for(TQStringList::iterator it = wt.begin(); it!=wt.end(); ++it) { if(correlationDiff.contains(*it)) correlationDiff[*it]--; else correlationDiff[*it]=-1; } } //clean so that we have only one translation per catalog. md.removeRef(info->ref()); md.addTranslation(translated,info->ref()); tmpTranslations=md.getTranslations(); for(TQStringList::iterator otIt=tmpTranslations.begin(); otIt!=tmpTranslations.end();++otIt) { TQStringList wt=words(*otIt); for(TQStringList::iterator it = wt.begin(); it!=wt.end(); ++it) { if(correlationDiff.contains(*it)) correlationDiff[*it]++; else correlationDiff[*it]=1; } } //FIXME: use the correlationDIff map somehow } //finally put! return (main->put(&mk,&md)==0); } bool DataBaseInterface::removeEntry(TQString original) { DBItemMainKey mk(original); DBItemMainData md; //FIXME implement remove //try to get if(main->get(&mk,&md)==DB_NOTFOUND) { /* //This is a new entry, create index entry DBItemNum *nind; int newid=numindex->getLast()+1; nind=new DBItemNum(newid); numindex->put(nind,&mk); delete nind; md.clear(); md.setIndexnumber(newid); //Update secondary index alpha DBItemMainKey ka(simple(original)); DBItemMultiIndex in; if(alpha->get(&ka,&in)==DB_NOTFOUND) in.clear() ; //alpha->get(&ka,&in); in.addEntry(newid); alpha->put(&ka,&in); //Update words index TQStringList ws=words(original); for(TQStringList::iterator it = ws.begin(); it!=ws.end(); it++) { DBItemMainKey word(*it); DBItemMultiIndex win; if(wordsindex->get(&word,&win)==DB_NOTFOUND) win.clear(); win.addEntry(newid); wordsindex->put(&word,&win); } //Update sentence index TQStringList so=sentences(original); TQStringList st=sentences(translated); if(so.count()==st.count() && st.count() >1 ) //we already hav a database for single string. { for(int i=0; i< so.count() ; i++) { DBItemMainKey sk(so[i]); DBItemMainKey sd(st[i]); //should be a list i.e. main data? sentence->put(&sk,&sd); } } */ } return false; } TQMap DataBaseInterface::correlation(TQString word,SearchFilter *filter,bool notify, float minSign) { TQDict res; // res.setAutoDelete(true); TQMapfinal; DBItemMultiIndex::IndexList il; unsigned int tot=0; unsigned int background=0; unsigned int nocck; TQString sword=simple(word); DBItemMainKey *k = new DBItemMainKey(sword); DBItemMultiIndex *d = new DBItemMultiIndex(); if(wordsindex->get(k,d)!=DB_NOTFOUND) { il=d->getList(); kdDebug(0) << il.count()<::iterator it=il.begin();it!=il.end();++it) { numindex->get(*it,k); // TQValueList trad=exactMatch(k->getString(),filter); MainEntry e=get(k->getString(),filter); TQStringList trad=e.second.getTranslations(); nocck=words(k->getString()).contains(sword); for( TQStringList::iterator it2=trad.begin();it2!=trad.end();++it2) { TQStringList w=words(*it2); unsigned int numWords = w.count()*10+1; unsigned int wei=100000/sqrt(numWords); //weight (is the best one?) background+=(numWords-nocck)*wei; TQDict count; //count.setAutoDelete(true); //FIXME:SET AUTODELETE FOR ALL DICTS for(TQStringList::iterator it1=w.begin();it1!=w.end();it1++) { uint *ip; if(!(ip=count[*it1])) count.insert(*it1,new uint(1)); else (*ip)++; } for(TQStringList::iterator it1=w.begin();it1!=w.end();it1++) { uint *ip; if(*(count[*it1])==nocck) //add only if same number of entry (it cuts articles) if(!(ip=res[*it1])) res.insert(*it1,new uint(wei)); else (*ip)+=wei; } } } unsigned int sqrBG=sqrt((1.0*background+1)/10000); for(TQDictIterator it(res) ; it.current(); ++it) { float sign=1.0*(*(it.current()))/(10000.0*sqrBG); if(sign >minSign){ final[it.currentKey()]=sign; kdDebug(0) << it.currentKey() <<" Score:" << 1.0*(*(it.current()))/10000 << "/" <)(.*)()"),"\\3"); //remove enclosing tags //Try to get rid of regexps. // res=res.replace(TQRegExp("(('|-|_|\\s|[^\\w%])+)")," "); //strip non-word char // res=res.replace(TQRegExp("(('|-|_)+)")," "); //strip non-word char // res=res.replace(TQRegExp("[^\\w\\s%]"),""); //strip non-word char TQString r; TQChar c; bool wasSpace=true; uint len=res.length(); for(uint i=0; i "as" // kdDebug(0) << res << endl; return res; } TQStringList DataBaseInterface::sentences(TQString s) { TQString str=s; TQStringList list; // kdDebug(0) << s << endl; int pos; do { TQRegExp re("((\\.|;|\\?|\\!|:)( |$|\\\\n\\n))"); pos=re.search(str); if(!str.left(pos).isEmpty()) list.append(str.left(pos).stripWhiteSpace()); kdDebug(0) << str.left(pos) << endl; str=str.remove(0,pos+re.cap(1).length()); } while(!str.isEmpty() && pos != -1); return list; } TQStringList DataBaseInterface::sentencesSeparator(TQString s) { TQString str=s; TQStringList list; // kdDebug(0) << s << endl; int pos; do { TQRegExp re; re.setPattern("([.:?!;]( |$|\\\\n\\n))"); pos = re.search(str); TQString separator=re.cap(1); if(pos!=-1){ list.append(separator); } str=str.remove(0,pos+1); } while(!str.isEmpty() && pos != -1); return list; } bool DataBaseInterface::isUpper(TQChar s) { return s==s.upper(); } bool DataBaseInterface::isLower(TQChar s) { return s==s.lower(); } TQString DataBaseInterface::format(TQString _s,TQString t) { //FIXME use settings //FIXME use regexp TQString s=_s; TQString noTagT=t.replace(TQRegExp("(<(.*)>)(.*)()"),"\\3"); TQChar first=noTagT[noTagT.find(TQRegExp("\\w"))]; bool firstCapital=isUpper(first); /* bool dotsAtEnd=(t.find("...")+3==t.length()); bool gtgtAtEnd=(t.find(">>")+2==t.length()); bool ltltAtEnd=(t.find("<<")==t.length()-2); bool columnAtEnd=(t.find(":")+1==t.length()); */ bool allupper=(t.upper()==t); if(firstCapital) s[0]=s[0].upper(); else s[0]=s[0].lower(); //if(dotsAtEnd) // s+="..."; /*if(gtgtAtEnd) s+=">>"; if(ltltAtEnd) s+="<<"; if(columnAtEnd) s+=":"; */ if(allupper) s=s.upper(); int pos=t.find(TQRegExp("&")); if(pos>=0) { TQChar accel=t[t.find(TQRegExp("&"))+1]; if(accel!='&') { pos=s.find(accel,false); if(pos<0) pos=0; s.insert(pos,"&"); } } s=formatRegExp(s,t,".*(\\.\\.\\.|:|>>|<<|\\.|\\?)$", "^(.*)$", "\\1@CAP1@"); s=formatRegExp(s,t,"(<(.*)>).*(\\.\\.\\.|:|>>|<<|\\.|\\?)*()$", "^(.*)$", "@CAP1@\\1@CAP3@@CAP4@"); return s; } TQString DataBaseInterface::formatRegExp(TQString _s, TQString t, TQString tre,TQString stringSearch,TQString stringReplace) { TQString s=_s; TQRegExp templateRegExp(tre); //TQString stringSearch = "(.*)!@CAP1@$"; // use @CAP1.. fot caps in templates //TQString stringReplace = "\\1@CAP1@"; // use \1, \2 for caps in str and @CAP1 fot caps in template if(templateRegExp.exactMatch(t)) { TQStringList caps=templateRegExp.capturedTexts(); int i=0; for(TQStringList::iterator capit=caps.begin();capit!=caps.end();++capit) { TQString phRegExp="(?!\\\\)@CAP"+TQString::number(i)+"@"; //kdDebug(0) << "phRegExp: " << phRegExp << endl; //kdDebug(0) << "cap[" << i << "]: "<< *capit<< endl; stringReplace = stringReplace.replace(TQRegExp(phRegExp),*capit); stringSearch = stringSearch.replace(TQRegExp(phRegExp),*capit); i++; } // kdDebug(0) << "stringSearch " << stringSearch << endl; // kdDebug(0) << "stringReplace " << stringReplace << endl; TQRegExp stringSearchRegExp = TQRegExp(stringSearch); // kdDebug(0) << "before: "<get(k,d); return d->getList(); } DataBaseInterface::MainEntry DataBaseInterface::getFromIndex( uint i ) { DBItemMainKey k; numindex->get(i,&k); return get(k.getString(),0); //FIXME: this is a BUG right now but the filter should be removed } DataBaseInterface::MainEntry DataBaseInterface::getSentence( const TQString & query ) { static int counter=1; counter++; DBItemMainKey k(query); DBItemMainData d; sentence->get(&k,&d); if(counter%5==0) kapp->processEvents(100); return qMakePair(k,d); } DBItemMultiIndex::IndexList DataBaseInterface::getWordIndex( const TQString & query ) { DBItemMainKey k = DBItemMainKey(query); DBItemMultiIndex d = DBItemMultiIndex(); if(wordsindex->get(&k,&d)!=DB_NOTFOUND){ return d.getList(); } else { TQValueList tmpList; return tmpList; } } //#include "database.moc.cpp"