#include "RF.h" #include #include #include #include #include #include #include long poisson(int Lambda) { int k = 0; long double p = 1.0; long double l = exp(-Lambda); srand((long)clock()); while(p>=l) { double u = (double)(rand()%10000)/10000; p *= u; k++; } if (k>11)k=11; return k-1; } struct DT{ int height; long* featureId; DT* left = nullptr; DT* right = nullptr; // split info bool terminate; double dpoint; long feature; long result; // Sparse data record double** sortedData; // for each feature, sorted data long** sortedResult; // Dense data record long*** count = nullptr;// for each feature, number of data belongs to each class and dense value double** record = nullptr;// for each feature, record each dense data long* max = nullptr;// number of dense value of each feature //long* T; // number of data in each class in this node double** dataRecord = nullptr;// Record the data long* resultRecord = nullptr;// Record the result long size = 0;// Size of the dataset }; RandomForest::RandomForest(long mTree, long feature, int* s, double forg, long noC, Evaluation eval, bool b, double t){ srand((long)clock()); bagging = b; activeTree = mTree; maxTree = mTree; allT = new long[mTree]; tThresh=t; lastT = -2; lastAll = 0; long i; f = feature; sparse = new int[f]; for(i=0; if)minF=f; DTrees = (DecisionTree**)malloc(mTree*sizeof(DecisionTree*)); for(i=0; iisRF=true; } } void RandomForest::fit(double** data, long* result, long size){ long i, j, k, l; double** newData; long* newResult; long localT = 0; int stale = 0; if(lastT==-2){ lastT=-1; }else{ for(i=0; i=0){ double lastSm = (double)lastT/lastAll; double localSm = (double)localT/localAll; double lastSd = sqrt(pow((1.0-lastSm),2)*lastT+pow(lastSm,2)*(lastAll-lastT)/(lastAll-1)); double localSd = sqrt(pow((1.0-localSm),2)*localT+pow(localSm,2)*(localAll-localT)/(localAll-1)); double v = lastAll+localAll-2; double sp = sqrt(((lastAll-1) * lastSd * lastSd + (localAll-1) * localSd * localSd) / v); double q; double t = lastSm-localSm; if(sp==0){q = 1;} else{ t = t/(sp*sqrt(1.0/lastAll+1.0/localAll)); boost::math::students_t dist(v); double c = cdf(dist, t); q = cdf(complement(dist, fabs(t))); } if(q<=tThresh){ lastT += localT; lastAll += localAll; }else if(t<0){ lastT = localT; lastAll = localAll; }else{ double newAcc = (double)localT/localAll; double lastAcc= (double)lastT/lastAll; stale = floor((newAcc-lastAcc)/(lastAcc)*maxTree); lastT = localT; lastAll = localAll; } }else{ lastT = localT; lastAll = localAll; } } Rotate(stale); for(i=0; ifit(newData, newResult, size*times); } /*for(i=0; iretain = 10*size; //if(backupTrees[i]==nullptr) continue; long times; //times = poisson(posMean); //if(times==0)continue; times=1; newData = (double**)malloc(sizeof(double*)*size*times); newResult = (long*)malloc(sizeof(long)*size*times); long c = 0; for(j = 0; jfit(newData, newResult, size*times); }*/ } long* RandomForest::fitThenPredict(double** trainData, long* trainResult, long trainSize, double** testData, long testSize){ fit(trainData, trainResult, trainSize); long* testResult = (long*)malloc(testSize*sizeof(long)); for(long i=0; i=0)return; else{ stale = std::min(stale, maxTree); stale*=-1; while(stale>0){ long currentMin = 2147483647; for(i = 0; iDTree->size; newData = (double**)malloc(sizeof(double*)*size); newResult = (long*)malloc(sizeof(long)*size); for(j = 0; jDTree->dataRecord[j][k]; } newData[j][f] = 0; newResult[j] = DTrees[minIndex]->DTree->resultRecord[j]; } DTrees[minIndex]->Stablelize(); DTrees[minIndex]->Free(); delete DTrees[minIndex]; DTrees[minIndex] = new DecisionTree(f, sparse, forget, minF+rand()%(f+1-minF), noClasses, e); DTrees[minIndex]->isRF=true; DTrees[minIndex]->fit(newData, newResult, size); for(j=0; jTest(newData[j], DTrees[minIndex]->DTree)==newResult[j])lastT2++; } DTrees[minIndex]->lastAll=size; DTrees[minIndex]->lastT=lastT2; } } } long RandomForest::Test(double* data, long result){ long i; long predict[noClasses]; for(i=0; iTest(data, DTrees[i]->DTree); predict[tmp]++; if(tmp==result)allT[i]++; } long ret = 0; for(i=1; ipredict[ret])ret = i; } return ret; } long RandomForest::Test(double* data){ long i; long predict[noClasses]; for(i=0; iTest(data, DTrees[i]->DTree)]++; } long ret = 0; for(i=1; ipredict[ret])ret = i; } return ret; }