ITEEDU

4.7 实例研究:用数组计算平均值、中数和模

下面要举一个更大的例子。计算机常用于编译和分析调查结果,图4.17的程序用数组response

初始化调查的99个答复(用常量变量response表示),每个答复是1到9的数值。程序计算99

个值的平均值、中数和模。

// Fig. 4.17: fig04_lT.cpp
 // This program introduces the topic of survey data analysis.
 // It computes the mean, median, and mode of the data.
 #include <iostream.h>
 #include <iomanip.h>
 void mean( const int[],int );
 void median( int []
 voidmode(int[],int[],int);
 void bubbleSort( int[ ] , int );
 voidprintArray(constint[],int);
 int main()
 {
     const int responseSize = 99;
     intfrequency[10] ={ 0 },
         response[ responseSize ] =
          {6,7,8,9,8,7,8,9,8,9,
,8,9,5,9,8,7,8,7,8,
,7,8,9,3,9,8,7,8,7,
,8,9,8,9,8,9,7,8,9,
,7,8,7,8,7,9,8,9,2,
,8,9,8,9,8,9,7,5,3,
,6,7,2,5,3,9,4,6,4,
,8,9,6,8,7,8,9,7,8,
,4,4,2,5,3,8,7,5,6,
,5,6,1,6,5,7,8,7};
    mean( response, responseSize );
    median( response, responseSize );
    mode( frequency, response, responseSize );
    return O;
}
 void mean( const int answer[], int arraySize )
 {
     int total = 0;
   cout << "********\n Mean\n********\n";
    for ( int j = 0; j < arraySize; j++ )
       total += answer[ j ];
    cout << "The mean is the average value of the data\n"
        << "items. The mean is equal to the total of\n"
        << "all the data items divided by the number\n"
        << "of data items (" << arraySize
        << "}. The mean value for\nthis run is:"
        << total <<" / "<< arraySize <<" ="
        << setiosflags{ ios::fixed ] ios::showpoint }
        << setprecision( 4 ) << ( float } total / arraySize
        << "\n\n";
 }
 void median( int answer[], int size }
 {
       cout << "\n********\n Median\n********\n"
           << "The unsorted array of responses is";
      printArray( answer, size );
        bubbleSort( answer, size );
        cout << "\n\nThe sorted array is";
        printArray( answer, size );
        cout << "\n\nThe median is element" << size / 2
            <<" of\nthe sorted" << size
            << "element array.\nFor this run the median is"
            << answer[ size / 2 ] << "\n\n";
 }
 void mode( int freq[ ], int answer[], int size )
 {
      int rating, largest = 0, modeValue = 0;
      cout << "\n********\n Mode\n********\n";
      for ( rating = 1; rating <= 9; rating++ )
         freq[ rating ] = 0;
       for ( int j = 0; j < size; j++ }
              ++freq[ answe[ j ] ];
       cout << "Response"<< setw( 11 ) << "Frequency"
           << setw( 19 ) << "Histogram\n\n" << setw( 55 )
           << "1   1   2   2\n" << setw( 56 )
           << "5   0   5   0   5\n\n";
      for ( rating = 1; rating <= 9; rating++ ) {
        cout << setw( 8 ) << rating << setw(11)
            << freq[ rating ] << "        ";
      if ( freq[ rating ] > largest ) {
          largest = freq[ rating ];
         modeValue = rating;
      }
      for (int h = 1; h <= freq[ rating ]; h++ )
         cout << '*';
         cout << '\n';
  }
  cout << "The mode is the most frequent value.\n"
      << "For this run the mode is "<< modeValue
      << "which occurred" << largest <<" times." << endl;
 }
 void bubbleSort( int a[], int size )
 {
      int hold;
      for (int pass = 1; pass < size; pass++ )
      for ( int j - 0; j < size - 1; j++ )
           if ( a[ j ] > a[ j + 1 ] ) {
              hold = a[ j ];
                   a[ j ]  =a[ j + 1 ];
             a[ j + 1 ] = hold;
          }
 }
 void printArray( const int a[ ], int size )
 {
      for ( int j = 0; j < size; j++ )
          if ( j % 20 == 0 )
           cout << endl;
          cout << setw( 2 ) << a[ j ]
      }
 }
图4.17调查数据分析程序

平均值是99个数的算术平均值。函数mean计算99个元素的和除以99所得的平均值。

中数是“中间值”,函数median调用bubbleSort函数排序(按升序)答复数组,并选出数组的中间元素answer[responseSize/2]。注意,如果元素个数为偶数,则中数为中间两个元素的平均值,函数median目前没有提供这个功能。调用函数printArray输出response数组。

模是99个答复中最经常出现的值。函数mode计算每种答复的个数,然后选择个数最多的值。

这个版本的函数mode不处理连接(见练习4.14)。函数mode还产生直方图,帮助用图形确定模。图4.18包含这个程序的示例输出。这个例子包括数组问题中通常需要的最常见操作,包括将数组传递给函数。

* * * * * * * *
Mena
* * * * * * * *
The mean is the average value of the data
items,The mean is equal to the total of
all the data items divided by the number
of data items (99). The mean value for
this run is: 681 / 99 = 6.8788

* * * * * * * *
Mena
* * * * * * * *
The unsorted array of responses is
6 7 8 9 8 7 8 9 8 9 7 8 9 5 9 8 7 8 7 8
6 7 8 9 3 9 8 7 8 7 7 8 9 8 9 8 9 7 8 9
6 7 8 7 8 7 9 8 9 2 7 8 9 8 9 8 9 7 5 3
5 6 7 2 5 3 9 4 6 4 7 8 9 6 8 7 8 9 7 8
7 4 4 2 5 3 8 7 8 6 4 5 6 1 6 5 7 8 7

The sorted array is
1 2 2 2 3 3 3 3 4 4 4 4 4 5 5 5 5 5 5 5
5 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7
7 7 7 7 7 7 3 7 7 7 7 7 7 8 8 8 8 8 8 8
8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
The median is element 49 of
The sorted 99 element array.
For this run the median is 7
* * * * * * * *
Mode
* * * * * * * *
Response Frequency Histogram
1 1 2 2
5 0 5 0 5
1 1 *
2 3 * *
3 4 * * *
4 5 * * * * *
5 8 * * * * * * * *
6 9 * * * * * * * * *
7 23 * * * * * * * * * * * * * * * * * * * * * * * *
8 27 * * * * * * * * * * * * * * * * * * * * * * * * * * * *
9 19 * * * * * * * * * * * * * * * * * * * *
The mode is the most frequent value
For this run the is 8 which occurred 27 times.

图 4.18 调查数据分析程序的运行结果

4.8 查找数组:线性查找与折半查找

程序员经常要处理数组中存放的大量数据,可能需要确定数组是否包含符合某关键值(key value)的值。寻找数组中某个元素的过程称为查找(searching)。本节介绍两个查找方法:简单的线性查找(liner search)方法和更复杂的折半查找(binary search)方法。

练习4.33和练习4. 34要求用递归法实现线性查找与折半查找。

图4.19的线性查找比较数组中每个元素与查找键(search key)。由于数组没有特定的顺序,很可能第一个就找到,也可能要到最后一个才找到。因此,平均起来,程序要比较数组中一半的元素才能找到查找键值。要确定哪个值不在数组中,则程序要比较查找键与数组中每一个元素。

// Fig. 4.19: fig04_19.cpp
 // Linear search of an array
 #include < iostream.h>
 int linearSearch( const int [], int, int );
 int main()
 {
    const int arraySize = 100;
   int a[ arraySize ], searchKey, element;
   for(int x = 0; X < arraySize; x++ )  // create some data
       a[ x ] = 2 * x;
   cout << "Enter integer search key:" << endl;
   cin >> searchKey;
   element = linearSearch( a, searchKey, arraySize );
   if ( element != -1 )
     cout << "Found value in element " << element << endl;
   else
     cout << "Value not found" << endl;
   return 0;
 }
 int linearsearch( const int arraay[], int key, int sizeofArray )
 {
    for (int n = 0; n < sizeofArray; n++ )
     if(  arraay[ n ] == key )
        return n;
    return -1;
 }

输出结果:

Enter integer search key:

36

found value in element 18

Enter integer search key:

37

Value not found

图4.19数组的线性查找

线性查找方法适用于小数组或未排序数组。但是,对于大数组,线性查找是低效的。如果是排序数组,则可以用高速折半查找。

折半查找算法在每次比较之后排除所查找数组的一半元素。这个算法找到数组的中间位置,将其与查找键比较。如果相等,则已找到查找键,返回该元素的数组下标。否则将问题简化为查找—半数组。如果查找键小于数组中间元素,则查找数组的前半部分,否则查找数组的后半部分。如果查找键不是指定子数组(原始数组的一部分)中的中间元素,则对原数组的四分之—重复这个算法。查找一直继续,直到查找键等于指定子数组中的中间元素或子数组只剩一个元素且不等于查找键(表示找不到这个查找键)为止。

在最糟糕的情况下,查找1024个元素的数组只要用折半查找进行十次比较。重复将1024除2(因为折半查找算法在每次比较之后排除所查找效组的一半元素)得到值512、256、128、64、32、16、8、4、1和1。数值1024(210)除2十次之后即变成1。除以1就是折半查找算法的一次比较。

1048576(220)个元素的数组最多只要20次比较就可以取得查找键。十亿个元素的数组最多只要30次比较就可以取得查找键。这比线性查找的性能大有提高,后者平均要求比较数组元素个数一半的次数。对于十亿个元素的数组,这是5亿次比较与30次比较的差别。折半查找所需的最大比较次数可以通过大于数组元素个数的第一个2的次幂的指数确定。

性能提示4.6

折半查找所带来的巨大的性能提高是有代价的,比起一次查找一个项日,排序数组的成本要高得多。如乘数组需要多次高速查找.则排序数组的开销是值得的。

图4.20显示了函数binarySearch的迭代版本。函数取4个参数,一个整数数组b、一个整数searehKey、low数组下标和high数组下标。如果查找键不符合于数组的中间元素,则调整low数组下标和high数组下标,以便查找更小的子数组。如果查找键小于中间元素,则high数组的下标设置为middle-1,继续查找low到middle-1的元素。如果查找健大于中间元素,则low数组的下标设置为middle+1,继续查找middle+1到high的元素。程序使用15个元素的数组。大于数组元素个数的第一个2的次幂是16(24),因此寻找查找健最多只要四次比较。函数printHeader输出数组下标,函数printRow输出折半查找过程中的每个子数组。每个子数组的中间元素标上星号(*),表示用这个元素与查找键比较。

 // Fig. 4.20: fig0420.cpp
 // Binary search of an array
 #include <iostream.h>
#include <iomanip.h> int binarySeareh( int [], int, int, int, int ); void printHeader( iht , void printRow( int [], int, int, int, int ); int main() { const iht arraySize = 15; int a[ arraySize ], key, result; for ( int i = 0; i < arraySize; i++ ) a[ i ]= 2 * i; // place some data in array cout << "Enter a number between 0 and 28: "; cin >> key; printHeader( arraySize ); result = binarySearsh( a, key, 0, arraySize - 1, arraySize ); if ( result != -1 ) cout << '\n' << key << "found in array element" << result << endl; else cout << '\n' << key << "not found" << endl; return 0; } // int binarysearch( int b[ ], int searchKey, int low, int high, int size ) { int middle; while ( low <= high ) { middle = ( low + high ) / 2; printRow( b, low, middle, high, size ); if ( searchKey == b[ middle ] ) // match return middle; else if ( searchKey < b[ middle ] ) high = middle - 1; // search low end of array else low = middle + 1; // search high end of array } return -1; // searchKey not found // Print a header for the output void printHeader( int size ) { cout << "\nSubscripts:\n"; for (int i = 0; i < size; i++ ) cout << setw( 3 ) << i << ' '; cout << '\n'; for ( i = 1; i <= 4 * size; i++ ) cout << '-'; cout << endl; } // Print one row of output showing the current // part of the array being processed. void printRow( int b[ ], int low, int mid, int high, int size ) { for ( int i = 0; i < size; i++ ) if ( i < low || i > high ) cout << " "; else if ( i == mid ) // mark middle value cout << setw( 3 ) << b[ i ] << '*'; else cout << setw( 3 ) << b[ i ] << ' '; cout << endl; }

输出结果:

Enter a number between 0 and 28:25

Subscripts:
    O    1    2    3    4    5    6    7    8    9    10    11    12    13    14
   ------------------------------------------------------------------------------
    O    2    4    6    8    10   12   14*  16  18   20    22    24    26    29
                                           16   18   20    22*   24    26    28
                                                                  24    26*   28
                                                                 24*
25 not found

Enter a number between 0 and 28: 8
    Subscripts:
    O    1    2    3    4    5    6    7    8    9    lO    ll    12    13    14
   ------------------------------------------------------------------------------
    O    2    4    6    8    lO   12   14*  16   18   20    22    24    26    28
    O    2    4    6*   8    10   12
                       8    lO*  12
                        8*
8  found  in  array  element  4

Enter  a  number  between  O  and  28:  6

Subscrlpts:
    O    l    2    3    4    5    6    7    8    9    10    11    12    13    14
   ------------------------------------------------------------------------------
    O    2    4    6    8   10   12   14*  16  18   20    22    24    26    28
    O    2    4    6*   8   10   12
6  found  ln  array  element  3
图4.20排序数组的折半查找过程