数据仓库与数据挖掘
iGet = getc(fp); } i++;
iAttribute = (int *)malloc(sizeof(int)*i); for (int k = 0; k
iAttribute[k] = (int)malloc(sizeof(int)); iAttribute[k] = 1; }
while (EOF != iGet)
{
if ('\\n' == iGet) {
j++; }
iGet = getc(fp); } j++;
iInput = (int **)malloc(sizeof(int*)*j); iSamples = (int *)malloc(sizeof(int)*j); for (a = 0;a < j;a++) {
iInput[a] = (int *)malloc(sizeof(int)*i); iSamples[a] = (int)malloc(sizeof(int)); iSamples[a] = a; }
a = 0;
fclose(fp);
fp=fopen(\ iGet = getc(fp); while(EOF != iGet) {
if ((',' != iGet)&&('\\n' != iGet)) {
iInput[a][b] = iGet - 48; b++; }
if (b == i) {
a++;
数据仓库与数据挖掘
b = 0; }
iGet = getc(fp); }
fp1 = fopen(\
build_tree(fp1,iSamples,iAttribute,0); fclose(fp); return 0; }
void build_tree(FILE * fp, int* iSamples, int* iAttribute,int level)// {
int iTest_Attribute = 0; int iAttributeValue[MAX]; int k = 0; int l = 0; int m = 0;
int *iSamples1;
for (k = 0; k { iAttributeValue[k] = -1; } if (0 == check_samples(iSamples)) { fprintf(fp,\ return; } if (1 == check_attribute_null(iAttribute)) { fprintf(fp,\ return; } iTest_Attribute = choose_attribute(iSamples,iAttribute); iAttribute[iTest_Attribute] = -1; get_attributes(iSamples,iAttributeValue,iTest_Attribute); 数据仓库与数据挖掘 k = 0; while ((-1 != iAttributeValue[k])&&(k < MAX)) { l = 0; m = 0; while ((-1 != iSamples[l])&&(l < j)) { if (iInput[iSamples[l]][iTest_Attribute] == iAttributeValue[k]) { m++; } l++; } iSamples1 = (int *)malloc(sizeof(int)*(m+1)); l = 0; m = 0; while ((-1 != iSamples[l])&&(l < j)) { if (iInput[iSamples[l]][iTest_Attribute] == iAttributeValue[k]) { iSamples1[m] = iSamples[l]; m++; } l++; } iSamples1[m] = -1; if (-1 == iSamples1[0]) { fprintf(fp,\ return; } fprintf(fp,\ = %d\\n\ build_tree(fp,iSamples1,iAttribute,level+1); k++; } } int choose_attribute(int* iSamples, int* iAttribute) { int iTestAttribute = -1; int k = 0; int l = 0; int m = 0; 数据仓库与数据挖掘 int n = 0; int iTrue = 0; int iFalse = 0; int iTrue1 = 0; int iFalse1 = 0; int iDepart[MAX]; int iRecord[MAX]; double dEntropy = 0.0; double dGainratio = 0.0; double test = 0.0; for (k = 0;k iDepart[k] = -1; iRecord[k] = 0; } k = 0; while ((l!=2)&&(k<(i - 1))) { if (iAttribute[k] == -1) { l++; } k++; } if (l == 1) { for (k = 0;k<(k-1);k++) { if (iAttribute[k] == -1) { return iAttribute[k]; } } } for (k = 0;k < (i-1);k++) { l = 0; iTrue = 0; iFalse = 0; if (iAttribute[k] != -1) { 数据仓库与数据挖掘 while ((-1 != iSamples[l])&&(l < j)) { if (0 == iInput[iSamples[l]][i-1]) { iFalse++; } if (1 == iInput[iSamples[l]][i-1]) { iTrue++; } l++; } for (n = 0;n m = 0; while((iDepart[m]!=-1)&&(m!=MAX)) { if (iInput[iSamples[n]][iAttribute[k]] == iDepart[m]) { break; } m++; } if (-1 == iDepart[m]) { iDepart[m] = iInput[iSamples[n]][iAttribute[k]]; } } while ((iDepart[m] != -1)&&(m!=MAX)) { for (n = 0;n if (iInput[iSamples[n]][iAttribute[k]] == iDepart[m]) { if (1 == iInput[iSamples[n]][i-1]) { iTrue1++; } if (0 == iInput[iSamples[n]][i-1]) { iFalse1++; } iRecord[m]++; }