ࡱ> +  - @K{w qrVW; <  1 0 2 , 7 bjbjUU 7|7|ilfffT*kkkln<*:2o~"~~Zo8q8q8q8q8q8q8$; >8/SV//8~S1~:/LF ~o8/o8 .<#5o @ɶf*ZUk{G1P#5L:0:1?i ?#5**A Robust, Optimization-Based Approach for Approximate Answering of Aggregate Queries Surajit Chaudhuri Gautam Das Vivek Narasayya April 2001 Technical Report MSR-TR-2001-37 Microsoft Research Microsoft Corporation One Microsoft Way Redmond, WA 98052 Abstract The ability to approximately answer aggregation queries accurately and efficiently is of great benefit for decision support and data mining tools. In contrast to previous sampling-based studies, we treat the problem as an optimization problem whose goal is to minimize the error in answering queries in the given workload. A key novelty of our approach is that we can tailor the choice of samples to be robust even for workloads that are similar but not necessarily identical to the given workload. Finally, our techniques recognize the importance of taking into account the variance in the data distribution in a principled manner. We show how our solution can be implemented on a database system, and present results of extensive experiments on Microsoft SQL Server 2000 that demonstrate the superior quality of our method compared to previous work. Contents  TOC \o "1-3" \h \z   HYPERLINK \l "_Toc511732115" 1 Introduction  PAGEREF _Toc511732115 \h 1  HYPERLINK \l "_Toc511732116" 2 Related Work  PAGEREF _Toc511732116 \h 2  HYPERLINK \l "_Toc511732117" 3 Architecture for Approximate Query Processing  PAGEREF _Toc511732117 \h 3  HYPERLINK \l "_Toc511732118" 3.1 Preliminaries  PAGEREF _Toc511732118 \h 3  HYPERLINK \l "_Toc511732119" 3.2 Our Architecture  PAGEREF _Toc511732119 \h 4  HYPERLINK \l "_Toc511732120" 3.3 Error Metrics  PAGEREF _Toc511732120 \h 5  HYPERLINK \l "_Toc511732121" 4 The Special Case of a Fixed Workload  PAGEREF _Toc511732121 \h 5  HYPERLINK \l "_Toc511732122" 4.1 Problem Formulation  PAGEREF _Toc511732122 \h 6  HYPERLINK \l "_Toc511732123" 4.2 Fundamental Regions  PAGEREF _Toc511732123 \h 6  HYPERLINK \l "_Toc511732124" 4.3 Solutions for FIXEDSAMP  PAGEREF _Toc511732124 \h 7  HYPERLINK \l "_Toc511732125" 5 Lifting Workload to Query Distributions  PAGEREF _Toc511732125 \h 8  HYPERLINK \l "_Toc511732126" 6 Rationale for Stratified Sampling  PAGEREF _Toc511732126 \h 10  HYPERLINK \l "_Toc511732127" 7 Solution for Single-Table Selection Queries with Aggregation  PAGEREF _Toc511732127 \h 11  HYPERLINK \l "_Toc511732128" 7.1 Solution for COUNT Aggregate  PAGEREF _Toc511732128 \h 11  HYPERLINK \l "_Toc511732129" 7.1.1 Stratification  PAGEREF _Toc511732129 \h 11  HYPERLINK \l "_Toc511732130" 7.1.2 Allocation  PAGEREF _Toc511732130 \h 11  HYPERLINK \l "_Toc511732131" 7.2 Solution for SUM Aggregate  PAGEREF _Toc511732131 \h 13  HYPERLINK \l "_Toc511732132" 7.2.1 Stratification  PAGEREF _Toc511732132 \h 13  HYPERLINK \l "_Toc511732133" 7.2.2 Allocation  PAGEREF _Toc511732133 \h 14  HYPERLINK \l "_Toc511732134" 7.3 Pragmatic Issues  PAGEREF _Toc511732134 \h 14  HYPERLINK \l "_Toc511732135" 7.3.1 Identifying Fundamental Regions  PAGEREF _Toc511732135 \h 14  HYPERLINK \l "_Toc511732136" 7.3.2 Handling Large Number of Fundamental Regions  PAGEREF _Toc511732136 \h 15  HYPERLINK \l "_Toc511732137" 7.3.3 Obtaining Integer Solutions  PAGEREF _Toc511732137 \h 15  HYPERLINK \l "_Toc511732138" 7.3.4 Obtaining an Unbiased Estimator  PAGEREF _Toc511732138 \h 15  HYPERLINK \l "_Toc511732139" 7.4 Putting it All Together  PAGEREF _Toc511732139 \h 16  HYPERLINK \l "_Toc511732140" 8 Extensions for More General Workloads  PAGEREF _Toc511732140 \h 16  HYPERLINK \l "_Toc511732141" 8.1 Extensions for a Heterogeneous Mix of Queries  PAGEREF _Toc511732141 \h 16  HYPERLINK \l "_Toc511732142" 8.2 GROUP BY Queries  PAGEREF _Toc511732142 \h 17  HYPERLINK \l "_Toc511732143" 8.3 JOIN Queries  PAGEREF _Toc511732143 \h 18  HYPERLINK \l "_Toc511732144" 8.4 Extensions for Other Aggregates  PAGEREF _Toc511732144 \h 18  HYPERLINK \l "_Toc511732145" 9 Implementation and Experimental Results  PAGEREF _Toc511732145 \h 19  HYPERLINK \l "_Toc511732146" 9.1 Implementation  PAGEREF _Toc511732146 \h 19  HYPERLINK \l "_Toc511732147" 9.2 Experimental Setup  PAGEREF _Toc511732147 \h 19  HYPERLINK \l "_Toc511732148" 9.2.1 Hardware/OS  PAGEREF _Toc511732148 \h 19  HYPERLINK \l "_Toc511732149" 9.2.2 Databases  PAGEREF _Toc511732149 \h 20  HYPERLINK \l "_Toc511732150" 9.2.3 Workloads  PAGEREF _Toc511732150 \h 20  HYPERLINK \l "_Toc511732151" 9.2.4 Parameters  PAGEREF _Toc511732151 \h 20  HYPERLINK \l "_Toc511732152" 9.2.5 Error Metric  PAGEREF _Toc511732152 \h 20  HYPERLINK \l "_Toc511732153" 9.3 Results  PAGEREF _Toc511732153 \h 20  HYPERLINK \l "_Toc511732154" 9.3.1 Quality vs. Sampling Fraction  PAGEREF _Toc511732154 \h 20  HYPERLINK \l "_Toc511732155" 9.3.2 Quality Vs. Overlap between Training Set and Test Set  PAGEREF _Toc511732155 \h 21  HYPERLINK \l "_Toc511732156" 9.3.3 Automatically Determining the Lifting Parameters ( and (  PAGEREF _Toc511732156 \h 22  HYPERLINK \l "_Toc511732157" 9.3.4 STRAT vs. FIXED  PAGEREF _Toc511732157 \h 22  HYPERLINK \l "_Toc511732158" 9.3.5 Quality vs. Data Skew  PAGEREF _Toc511732158 \h 23  HYPERLINK \l "_Toc511732159" 9.3.6 Comparison of Time for Building Samples  PAGEREF _Toc511732159 \h 23  HYPERLINK \l "_Toc511732160" 9.3.7 Comparison on a Real Data Set  PAGEREF _Toc511732160 \h 23  HYPERLINK \l "_Toc511732161" 9.3.8 Results for L2 Metric  PAGEREF _Toc511732161 \h 24  HYPERLINK \l "_Toc511732163" 10 Summary  PAGEREF _Toc511732163 \h 24  HYPERLINK \l "_Toc511732164" Acknowledgement  PAGEREF _Toc511732164 \h 25  HYPERLINK \l "_Toc511732165" References  PAGEREF _Toc511732165 \h 25  HYPERLINK \l "_Toc511732166" Appendix A: Review of Random Sampling Techniques  PAGEREF _Toc511732166 \h 26  HYPERLINK \l "_Toc511732167" Uniform Sampling  PAGEREF _Toc511732167 \h 26  HYPERLINK \l "_Toc511732168" Stratified Sampling  PAGEREF _Toc511732168 \h 26  HYPERLINK \l "_Toc511732169" Multivariate Stratified Sampling  PAGEREF _Toc511732169 \h 28  HYPERLINK \l "_Toc511732170" Weighted Sampling  PAGEREF _Toc511732170 \h 28  HYPERLINK \l "_Toc511732171" Appendix B: Counter-Example for Congress  PAGEREF _Toc511732171 \h 28  HYPERLINK \l "_Toc511732172" Appendix C: Proofs for COUNT and SUM Aggregates  PAGEREF _Toc511732172 \h 30  HYPERLINK \l "_Toc511732173" Proof of Lemma 3  PAGEREF _Toc511732173 \h 30  HYPERLINK \l "_Toc511732174" Proof of Lemma 1  PAGEREF _Toc511732174 \h 38  HYPERLINK \l "_Toc511732175" Proof of Lemma 5  PAGEREF _Toc511732175 \h 40  HYPERLINK \l "_Toc511732176" Appendix D: Binomial Distributions and Chernoff Bounds  PAGEREF _Toc511732176 \h 41  Introduction In recent years, decision support applications such as On Line Analytical Processing (OLAP) and data mining for analyzing large databases have become popular. A common characteristic of these applications is that they execute aggregation queries on large databases, which can often be expensive and resource intensive. Therefore, the ability to obtain approximate answers to such queries accurately and efficiently can greatly benefit the scalability of these applications. One approach to address this problem is to use precomputed samples of the data instead of the complete data to answer the queries. While this approach can give approximate answers very efficiently, it is easy to see that identifying an appropriate precomputed sample that avoids large errors on an arbitrary query is virtually impossible, particularly when we take into account the fact that queries may involve selections, GROUP BY and join. To minimize the effects of this problem, previous studies have proposed using the workload to guide the process of selecting samples [1,8,14]. The hope is that by picking a sample that is tuned to the given workload, we can ensure acceptable error at least for queries in the workload. Despite recognizing the importance of workload information in picking samples of the data, previous studies suffer from three significant drawbacks. First, although the proposed solutions have intuitive appeal, the lack of a rigorous problem formulation leads to solutions that are difficult to evaluate theoretically. Second, they do not attempt to formally deal with uncertainty in the expected workload, i.e., when incoming queries are similar but not identical to queries in the given workload. Third, most previous studies ignore the variance in the data distribution of the aggregated column(s). As the following example shows, ignoring data variance can lead to extremely poor quality of answers for aggregate functions such as SUM: Example 1. Consider a relation R containing two columns and four records {<1, 10>, <2, 10>, <3, 10>, <4, 1000>}. Assume that we are allowed to use a sample S of two records from R to answer the query Q: SELECT SUM(Revenue) FROM R. We answer a query by running it against S and scaling the result by a factor of two (since we are using a 50% sample). Consider a sample S1 = {<1, 10>, <3, 10>}. The estimated answer for Q using S1 is 40, which is a severe underestimate of the actual answer (1030). Now consider another sample S2 = {<1,10>, <4,1000>}. The estimated answer for Q using S2 is 2020, which is a significant overestimate. Thus, large variance in the aggregate column can lead to large relative errors. In contrast to most previous sampling-based studies, in this paper, we formulate the problem of precomputing a sample as an optimization problem, whose goal is to pick a sample that minimizes the error for the given workload. We show that when the actual workload is identical to the given workload (we refer to such a workload as fixed), we can achieve dramatically smaller errors using a deterministic solution to the optimization problem. Of course, such a solution is not resilient when the actual workload happens to deviate from the given workload. We therefore introduce a generalized model of the workload (lifted workload) that makes it possible to tune the choice of the sample so that approximate query processing using the sample is effective not only for workloads that are identical to the given workload, but also for workloads that are similar to the given workload (i.e., queries that select regions of the data that overlap significantly with the data accessed by the queries in the given workload) a more realistic scenario. We formulate selection of the sample for such a workload as a stratified sampling problem with the goal to minimize error in estimation of aggregates. Our formulation makes the problem amenable to exploiting known techniques in stratified sampling and optimization. As a consequence, we have developed a robust approach to the problem of approximate query processing of SPJ queries with GROUP BY and aggregation. We have implemented our solutions on Microsoft SQL Server 2000, addressing the pragmatic issues that are central to an effective solution that can be deployed in a commercial DBMS. The benefits of our systematic approach are amply demonstrated not only by theoretical results, but also experimentally on synthetic as well as on a deployed enterprise data-warehouse in our organization. We begin by discussing related work in Section 2. We present an overview of our architecture for approximate query processing in Section 3. Our deterministic solution for the special case of a fixed workload is presented in Section 4. We describe a model for lifting a given workload in Section 5. We formulate the problem of approximate query answering using stratified sampling in Section 6. We present our solution to the optimization problem for single-table selection queries with aggregation in Section 7, and describe extensions necessary for a broader class of queries in Section 8. We describe our implementation and experimental results in Section 9, and summarize in Section 10. Related Work There have been several previous approaches for building precomputed samples for approximate query processing [1,8,14], primarily based on randomized techniques. We discuss these studies here from a theoretical/conceptual point of view. In Section 9, we provide an extensive experimental comparison of these methods with our solutions. For a review of well-known classical sampling techniques, see Appendix A. First, as mentioned in the introduction, a shortcoming common to most previous work is that they assume a fixed workload, i.e., their techniques do not cope with uncertainty in the expected workload. Both [8] and [14] present a seemingly intuitive idea based on weighted sampling. Each record in the relation R to be sampled is tagged with a frequency, i.e. the number of queries in the workload such that the record must be selected to answer the query. Once the tagging is done, an expected number of k records are selected in the sample, where the probability of selecting a record t with frequency ft is  EMBED Equation.3  Thus, records that are accessed more frequently have a greater chance of being included inside the sample. However, as the following example shows, this approach can lead to poor quality. Consider a set of k queries {Q1, Qk} (where k is also the size of the sample) in the workload that reference disjoint partitions of records in R. Let a few queries reference large partitions and most queries reference very small partitions. Then, by the weighted sampling scheme described above most records in the sample will come from the large partitions. Therefore, with high probability, there will be no records selected from many of the small partitions. Thus the relative error in answering most of the queries will be large. A better strategy is to select one record from each partition and thereby answer all queries without error. Furthermore, [14] does not address the issue of variance of data in the aggregate column. However, a novelty of the paper is that it tackles the issue of maintaining and continuously refreshing a sample of records of R after a new query has been processed. The paper [8] attempted to address the problem of internal variance of data in the aggregate column (see Example 1). The basic idea is that outliers of the data (i.e. the records that contribute to high variance in the aggregate column) are collected into a separate index, while the remaining data is sampled using a weighted sampling technique. Queries are answered by running them against both the outlier index as well as the weighted sample, and an estimated answer is composed out of both results. This method too is easily seen to result in worse quality than our approach, since the concept of an outlier index + a (weighted) sample can be viewed as a special type of our approach using stratified sampling (see Appendix A), where the outliers form their own stratum that is sampled in its entirety. The idea of separately handling outliers has also appeared in the context of applying exploratory data analysis methods on data cubes [3,4]. The congressional sampling paper [1] has the most principled approach of the three papers. The authors advocate a stratified sampling strategy called Congress that tries to simultaneously satisfy a set of GROUP BY queries. Some key concepts of our paper (e.g. the concept of fundamental regions that we discuss later) have been influenced by it. However, their approach is still ad-hoc in the sense that even though they try to reduce the error, their scheme does not minimize the error for any of the well-known error metrics (see Appendix B for an example). Moreover, in our paper, we have addressed several issues ignored in [1] such as lifted workloads, internal variance in the data, and building a single sample for a heterogeneous mix of queries. There has been a large body of work on approximately answering a query by sampling on the fly rather than exploiting a precomputed sample and it offers another alternative approach. In general, on-the-fly sampling can be expensive, particularly in the presence of join and GROUP BY without extensive availability of statistics (histograms) and/or enhancements to the database engine. A notable example of on-the-fly sampling technique is in [16] to approximately answer SPJ queries with aggregation. In this work, they also identify enhancements to database engine needed to support progressive refinement of the approximate answer. In addition to sampling based methods, there have been other data reduction based approaches to approximate query processing, such as histograms [18,24] and wavelets [7,30,31]. As noted in [30], a general problem with histogram-based approaches is that they incur high storage overhead and construction cost as the dimensionality of the data increases. In [30,31], the authors argued the effectiveness of wavelets for handling aggregations over (high-dimensional) OLAP cubes. More recently, [7] showed how SQL operators can be applied directly on wavelet coefficients to efficiently produce approximate answers. There is an opportunity to exploit workload (specifically the lifted workload model) to enhance the effectiveness of wavelet-based techniques. More extensive theoretical and experimental comparisons of data reduction based approaches and sampling based approaches are necessary to identify their relative strengths and weaknesses. For example, although [7] compares their approach to sampling, the comparison is limited to uniform sampling. Note that most of the sampling techniques based on workload, including our work, do not use uniform sampling. Architecture for Approximate Query Processing Preliminaries We present an overview of our architecture for approximate query processing on a relational database. We consider queries with selections, foreign-key joins and GROUP BY containing aggregation functions such as COUNT, SUM, and AVG. We assume that a pre-designated amount of storage space is available for selecting samples from the database. These samples, possibly in conjunction with other base relations, will be used for answering the queries approximately but efficiently. The techniques for selecting samples can be randomized (e.g., we may sample uniformly at random) or deterministic (e.g., we may select the best sample that minimizes the total error in the approximate answers). As with previous sampling-based studies [1,8,14], we have taken the approach of exploiting the available workload (provided as an input) to find samples that work well for queries in the given workload. A workload W is specified as a set of pairs of queries and their corresponding weights: i.e., W = {, }, where weight wi indicates the importance of query Qi in the workload. Without loss of generality, we can assume that the weights are normalized, i.e., (iwi = 1. In practice, such a workload may be obtained using profiling tools available on most modern DBMSs that for logging queries that execute on the server.  Our Architecture Our architecture for approximate query processing is summarized in Figure 1. The inputs are a database and a workload W. For simplicity, we present our architecture for the case of a single relation R. There are two components in our architecture: (1) an offline component for selecting a sample of records from relation R, and (2) an online component that (a) rewrites an incoming query to use the sample (if appropriate) to answer the query approximately and (b) reports the answer with an estimate of the error in the answer. The novelty of this paper is in the first component. We present a method for automatically lifting a given workload, i.e., quantifying a generalized model of the workload. Our motivation stems from the fact that it is unrealistic to assume that incoming queries in the future will be identical to the given workload W. The key to lifting the workload is the ability to compute a probability distribution pW of incoming queries, i.e., for any incoming query Q, pW(Q) is the probability of Q. The subscript indicates that the distribution depends on W. Our algorithm then selects a sample that is resilient enough for such a lifted workload. We also show how we can select a sample that minimizes the error of answering queries in the (lifted) workload. This step is labeled Build Samples in the figure. An incoming query is rewritten to run against the samples instead of the base relation. For a multi-relation query, in addition to the samples, we may also reference other base relations to answer the query. As in previous work [1,8,14], we assume that each record in the sample also contains an additional column known as the ScaleFactor with each record. The value of the aggregate column of each record in the sample is first scaled up by multiplying with the ScaleFactor, and then aggregated. We note that alternative schemes, as in [1], are possible where the ScaleFactor column is maintained in a separate relation than the sample. Such schemes incur reduced update and storage overhead at the expense of increased run time overhead. The techniques described in this paper are applicable independent of the specific scheme used. In addition to the approximate answer, we can also report the variance (or even a confidence interval) for the approximate answer. Error Metrics We define the error metrics used to determine the quality of an approximate answer to an aggregation query. Suppose the correct answer for a query Q is y while the approximate answer is y. We focus on relative error instead of absolute error, since the former is usually a fairer measure across queries. Relative error is defined as  EMBED Equation.3  The squared error is defined as  EMBED Equation.3  Now consider a GROUP BY query Q that induces g groups in the data. Suppose the correct answer for the ith group is yi while the approximate answer is yi. The squared error in answering the query is  EMBED Equation.3  This error measure for a GROUP BY query has also been considered by [1,8]. In other words, a GROUP BY query can be treated as g SELECT queries, each of weight 1/g. Given a probability distribution of queries pW, the mean squared error for the distribution, MSE(pW), is defined as  EMBED Equation.3  where pW (Q) is the probability of query Q. The root mean squared error (RMSE), also known as the L2 error, is defined as  EMBED Equation.3  Other error metrics are possible e.g., using the L1 metric (defined as the mean error over all queries in the workload) or L( metric (defined as the maximum error over all queries). In this paper, although we optimize for the MSE due to its long tradition in statistics, we can easily extend our techniques to optimize for the L1 metric. In fact, while our algorithms minimize MSE, we found that these solutions also do very well for the L1 metric. Since most previous work in this area report the L1 metric, our experimental comparisons also report the L1 metric. The Special Case of a Fixed Workload In this section, we present a problem formulation and solution for the special case of a fixed workload, i.e., when the incoming queries are identical to the given workload. The motivation for presenting this case is to underscore the benefit of our approach of treating approximate query answering as an optimization problem. In fact, as shown below, this problem formulation allows us to use an effective deterministic scheme rather than the conventional randomization schemes considered in previous work. For simplicity, we describe the problem for the case of single table selection queries containing the COUNT or SUM aggregate. Problem Formulation  We frame the optimization problem FIXEDSAMP for the case of a fixed workload W. Recall that MSE(pW) (Section 3.3) is the mean squared error for the probability distribution of queries pW. MSE(W) is equivalent to MSE(pW) where a query Q has a probability of occurrence of 1 if Q(W and 0 otherwise. As described in Section 3.2, we need to associate additional column(s) with each record in the sample to allow scaling the values obtained by running the query on the sample. Observe that the problem formulation is general in the sense that it allows both randomized as well as deterministic solutions. Before presenting our solution to FIXEDSAMP, we first define the key concept of fundamental regions of a relation induced by a workload. Fundamental regions are important because they play a crucial role in determining an appropriate sample for the given workload. In fact, the concept of fundamental regions is also important in the context of our randomized sampling scheme that appears in Section 7. Fundamental Regions   For a given relation R and workload W, consider partitioning the records in R into a minimum number of regions F = {R1, R2, , Rr} such that for any region Rj, each query in W selects either all records in Rj or none. These regions are the fundamental regions of R induced by W. For example, consider a relation R (with aggregate column C) containing nine records (with C values 10, 20, , 90), as shown in Figure 2. Let W consist of two queries, Q1 (which selects records with C values between 10 and 50) and Q2 (which selects records with C values between 40 and 70). These two queries induce a partition of R into four fundamental regions, labeled R1, R4. The concept of finest partitioning into groups in [1] is similar to the concept of fundamental regions. In general the total number of fundamental regions r depends on R and W and is upper-bounded by min(2|W|, n) where n is the number of records in R. The algorithmic and implementation details of how to identify fundamental regions efficiently are discussed in Section 7.3.1. Solutions for FIXEDSAMP We present a deterministic algorithm called FIXED for solving FIXEDSAMP. Briefly, the algorithm has three steps. The first step identifies all fundamental regions. The second step selects the sample by picking exactly one record from each important fundamental region. The third step assigns appropriate values to additional columns in the sample records. We elaborate on these steps below. Step1 (Identify Fundamental Regions): The first step is to identify the fundamental regions in R induced by the given workload W. Let r be the number of fundamental regions. After Step 1, two cases arise that need to be separately processed: Case A (r d" k) and Case B (r > k). Case A (rd" k): For this case our algorithm selects a sample that can answer queries without any errors. Details are as follows. Step 2A (Pick Sample Records): We select the sample by picking exactly one record from each fundamental region. Thus for the example in Figure 2, we may pick the records with C values 10, 40, 60, and 80, i.e. one record from each fundamental region. Step 3A (Assign Values to Additional Columns): The idea is that each sample record can be used to summarize all records from the corresponding fundamental region, without incurring any error. More precisely, for a workload consisting of only COUNT queries, we need a single additional column in the sample records (called RegionCount), in which we store the count of the number of records in that fundamental region. This allows us to answer a COUNT query without any errors by running it against the sample and simply summing up the RegionCount column of records selected from the sample by the query. For example, if the queries in Figure 2 were COUNT queries, the sample records chosen in Step 2A will contain an extra RegionCount column with values 3, 2, 2, and 2 respectively. Likewise, for a workload consisting only of SUM queries, we need a single additional column in the sample (called AggSum) that contains the sum of the values in the aggregate column for records in that fundamental region. For example, if the queries in Figure 2 were SUM queries, the sample records chosen in Step 2A will contain an extra AggSum column with values 60, 90, 130, and 170 respectively. If the workload contains a mix of COUNT and SUM queries, we need both the RegionCount and the AggSum columns. Note that if we include both these columns, we can also answer AVG queries. Case B (r > k): This is a more difficult case. Our algorithm selects a sample that tries to minimize the errors in queries. Step 2B (Pick Sample Records): Since r > k, we select k regions and then pick one record from each of the selected regions. Our heuristic for selecting k regions is to sort all r regions by their importance and then select the top k. The importance of region Rj is defined as fj*nj2, where fj is the sum of the weights of all queries in W that select the region, and nj is the number of records in the region. The intuition is that fj measures the weights of the queries that are affected by Rj while nj2 measures the effect on the (squared) error by not including Rj. While more complicated measures of importance are possible, in our experiments we found that the above heuristic does very well. We then pick exactly one record from each selected fundamental region. Step 3B (Assign Values to Additional Columns): Next, for the selected sample records, we determine the values of the RegionCount and AggSum columns. We could of course naively do exactly what was done in Step 3A, i.e. we store in the RegionCount and AggSum columns of each sample record the count and sum of the records of the corresponding fundamental region. However, note that the extra column values of a sample record are not required to have any obvious relationship with some characteristic of the corresponding fundamental region; all we care is that they contain appropriate values so that the error for the workload is minimized. Thus, we view the problem of assigning values to the RegionCount and AggSum columns of the k records selected in Step 2B as the following optimization problem. We have 2*k unknowns: {RC1, , RCk} and {AS1, .ASk}. It is straightforward to express MSE(W) as a quadratic function of these 2*k unknowns. We minimize this function by partially differentiating with each variable and setting each result to zero. This gives rise to 2*k simultaneous (sparse) linear equations, which we solve using an iterative technique (based on the Gauss-Seidel method [15]). In our experiments (Section 9) we see that FIXED is significantly more accurate than all randomized schemes for the given workload. We note that the disadvantage of this deterministic method is that a per-query (probabilistic) error guarantee is not possible. Observe that if the incoming query is not identical to a query in the given workload (a realistic scenario), using FIXED can result in unpredictable errors. Therefore, our goal is to incorporate a measure of robustness in our solution by optimizing for a more generalized model of the workload that would allow incoming queries to be similar but not necessarily identical to the given workload. Lifting Workload to Query Distributions As mentioned earlier, we would like our approximate query processing scheme to not only perform well for incoming queries that exactly match one of the queries in the given workload, but also be resilient to the situation when an incoming query is similar but not identical to queries in the workload. In this section we tackle one aspect of the problem, i.e., defining this notion of similarity. More formally, we show how given W, we can define a lifted workload pW, i.e., a probability distribution of incoming queries. Intuitively, for any query Q (not necessarily in W), pW(Q) should be related to the amount of similarity (dissimilarity) of Q to the workload: high if Q is similar to queries in the workload, and low otherwise. In Sections 7 and 8 we show how to leverage such a probability distribution in our approximate query processing solution. Our notion of similarity between queries is not concerned with syntactic similarity of query expressions. Rather, we say that two queries Q and Q are similar if the records selected by Q and Q have significant overlap. We focus on the case of single-table selection queries with aggregation containing either the SUM or COUNT aggregate (this intuition is refined for GROUP BY and join queries in Section 8). Let us consider the simplest case when the workload W consists of exactly one query Q on relation R. Let RQ be the records selected by Q. Our objective is to define the distribution p{Q} (i.e., for pW, where W = {}). Since for the purposes of lifting, we are only concerned with the set of records selected by a query and not the query itself, we make a change in notation for convenience: instead of mapping queries to probabilities, p{Q} maps subsets of R to probabilities. For all R (R, p{Q}(R ) denotes the probability of occurrence of any query that selects exactly the set of records R . For the moment, assume two parameters  ( d"  d"1) and  (0 d"  d" ) have been specified. Informally, these parameters define the degree to which the workload  influences the query distribution. More formally, for any given record inside (resp. outside) RQ, the parameter  (resp. ) represents the probability that an incoming query will select this record. Given these two parameters, we can now derive p{Q}(R) for any R( R (i.e. the probability of occurrence of any query that exactly selects R). Figure 3 shows a Venn diagram of R, RQ and R, where n1, n2, n3, and n4 are the counts of records in the regions indicated. Equation 1 shows the derivation of p{Q}(R). Note that when n2 or n4 are large (i.e., the overlap is large), p{Q}(R) is high (i.e. queries that select RQ are likely to occur), whereas when n1 or n3 are large (i.e. the overlap is small), p{Q}(R) is low (i.e. queries that select RQ are unlikely to occur). Once p{Q} has been defined, pW can be easily derived, as shown in Equation 2.    EMBED Equation.3   EMBED Equation.3  Let us now discuss the problem of setting the parameters  and . As mentioned earlier, the parameters define the degree to which the workload W influences the query distribution pW. We elaborate on this issue by analyzing the effects of (four) different boundary settings of these parameters.  ( 1 and  ( 0: implies that incoming queries are identical to workload queries  ( 1 and  ( : implies that incoming queries are supersets of workload queries  ( and  ( 0: implies that incoming queries are subsets of workload queries  ( and  ( : implies that incoming queries are unrestricted Using the above scenarios as guidelines, it may be possible for skilled database administrators to analyze their workload patterns, and manually set the parameters to values that best model their workloads. However, we also present a simple automated approach for parameter setting. The basic idea is to split the available workload into two sets, the training workload and the test workload. The parameters are selected using a two-dimensional grid search approach (based on [26]) such that the lifted training workload (under these settings) most closely fits the test workload. The implementation details of this method appear in Section 9.1. The grid search approach is effective and scalable with data size for low dimensional optimization problems such as ours, and our experiments (Section 9) indicate the approach is promising. We are also investigating alternative approaches such as randomized search and gradient descent. The above represents a simple first attempt at lifting workloads in a rigorous manner. It is similar to kernel density estimation techniques for estimating probability distributions from samples [20,25]. The problem of automatically setting parameters  and  is similar to the problem of bandwidth selection in kernel density estimation. We are investigating whether known techniques for bandwidth estimation (bootstrap, cross-validation [25]) can be adapted in a scalable manner. Other methods for lifting a workload need to be studied in the future, e.g., modeling the query distribution as a mixture of Gaussians. In fact, the problem of lifting a workload is really orthogonal to the problem of approximate query processing, and we expect it to find applications in other areas. In the next few sections, we develop an approximate query processing scheme, which will attempt to minimize the MSE of the lifted workload, i.e., for pW (which depends on W,  and ). Rationale for Stratified Sampling We now state the problem of identifying an appropriate sample as a formal optimization problem. For simplicity, we state the problem when the workload contains queries that reference a single relation R. The formulation can be easily extended for multi-table queries (see Section 8).  In the above formulation, pW is any probability distribution function derived from the given workload W. For example, the lifting model presented in Section 5 can be used to obtain pW. In this section we show why uniform sampling cannot be effectively applied to SAMP, and justify our approach of adapting stratified sampling [13,19] to solve this problem. Stratified sampling is a well-known generalization of uniform sampling where a population is partitioned into multiple strata and samples are selected uniformly from each stratum, with important strata contributing relatively more samples (for a review of classical sampling results from statistics, please see Appendix A). Now consider the following selection query with aggregation on relation R defined in Example 1 (Section 1). Q1 = SELECT COUNT(*) FROM R WHERE ProductId IN (3,4). Recall that R is the relation {<1, 10>, <2,10>, <3, 10>, <4,1000>}. We define the population of a query Q (denoted by POPQ) on a relation R as a set of size |R| that contains the value of the aggregated column that is selected by Q, or 0 if the record is not selected. By this definition, POPQ1 = {0, 0, 1, 1}. Observe that POPQ1 has a mix of 1s and 0s and thus has a non-zero variance. Thus, a uniform sampling of POPQ1 would be a poor choice for this problem since it would incur non-zero error. However, if we partition R into two strata {<1, 10>, <2,10>} and {<3, 10>, <4,1000>}, we effectively partition POPQ1 into two strata {0, 0} and {1, 1}. Each stratum now has zero variance, and a stratified sampling strategy that selects at least one sample from each stratum will estimate Q1 with zero error. Note however, that this particular stratification may not work well for a different COUNT query whose population has a different distribution of 1s and 0s. For example, consider a query Q2 = SELECT COUNT(*) FROM R WHERE ProductId IN (1,2,3). POPQ2 = {1, 1, 1, 0} and is different from POPQ1. As can be seen by this example, each query defines its own population of the same relation R, and therefore the challenge is to adapt stratified sampling so that it works well for all queries. An effective scheme will need to stratify the relation such that the expected variance over all queries in each stratum is small, and allocate more samples to strata with larger expected variances. For SUM queries, stratification is also governed by the additional problem of variance in the aggregate column. For example, consider query Q3 = SELECT SUM(Revenue) FROM R WHERE ProductID IN (1,4). POPQ3 = {10, 0, 0, 1000} and therefore has large variance. Thus, a stratified sampling scheme partitions R into r strata containing n1, ., nr records (where (nj = n), with k1, , kr records uniformly sampled from each stratum (where (kj = k). As mentioned in Section 3.2, the scheme also associates a ScaleFactor with each record in the sample. Queries are answered by executing them on the sample instead of R. For a COUNT query, the ScaleFactor entries of the selected records are summed, while for a SUM(y) query the expression y*ScaleFactor is summed. If we also wish to return an error guarantee with each query, then instead of ScaleFactor, we have to keep track of each nj and kj individually for each stratum. Solution for Single-Table Selection Queries with Aggregation We now present STRAT, our solution to the problem SAMP (Section 6) for workloads consisting of single-table selection queries with aggregation. We present STRAT for queries containing the COUNT aggregate, and then describe the extensions necessary to deal with the more challenging SUM aggregate. In Section 8, we show how to extend STRAT for aggregation queries with join, nested subqueries and GROUP BY. Our solution consists of three steps. The first step, which we refer to as stratification, is determining (a) how many strata r to partition relation R into, and (b) the records from R that belong to each stratum. At the end of this step we have r strata R1, Rr containing n1, nr records such that (nj = n. The second step, called allocation, determines how to divide k (the number of records available for the sample) into integers k1, , kr across the r strata such that (kj = k. The third step, referred to as the sampling step, uniformly samples kj records from stratum Rj to form the final sample of k records. The sample so created is then used at runtime to approximately answer queries. The heart of the algorithm is in the first two steps, which are designed to minimize the errors in approximately answering queries in the lifted workload (pW). The third step is straightforward, and can be accomplished with one scan of relation R. Solution for COUNT Aggregate Stratification It may appear that the problem of stratification of R for a given workload W of COUNT queries is intractable since when r is not known, there are an exponential number of ways of stratifying R. However under large population assumptions (i.e., when n, the number of records in R, is large), the following lemma says that it is enough to partition R into fundamental regions (see Section 4.2 for the definition of fundamental regions) and treat each region as a stratum. Lemma 1: Consider a relation R with n records and a workload W of COUNT queries. In the limit when n tends to infinity, the fundamental regions F = {R1, R2, , Rr} represent an optimal stratification. The proof is described in Appendix C. Allocation Once the stratification has been done, the key remaining challenge is how to allocate the k records across the r fundamental regions (strata). Our main idea is to treat this problem as an optimization problem whose goal is to minimize the error over queries in pW. Observe that this is a significant point of departure compared to most previous work in this area, where this allocation step is done in an intuitive but informal manner. We assume that k1,, kr, are unknown variables such that (kj = k. We leverage the following two results to express MSE(pW) as a function of these variables and then select values for these variables that minimizes MSE(pW). First, using Equation 2 (Section 5), it is easy to see that MSE(pW) can be expressed as a weighted sum of the MSE of each query in the workload (as stated by the following lemma). Lemma 2:  EMBED Equation.3  Next, for any Q ( W, we express MSE(p{Q}) as a function of the kjs. Although obtaining a concise yet exact expression for this function is more difficult, under large population assumptions the following lemma (one of the principal results of this paper) shows how to obtain a succinct approximation for MSE(p{Q}). In our experiments, we have found that this formula for MSE(p{Q}) has yielded excellent approximation even when n is relatively small. Lemma 3: For a COUNT query Q in W, let  EMBED Equation.3  EMBED Equation.3  Then  EMBED Equation.3  The proof is described in Appendix C. Now that we have an (approximate) formula for MSE(p{Q}), we can express MSE(pW) as a function of the variables k1,, kr, using the result from the following corollary, which is obtained by combining Lemmas 2 and 3. Corollary 1:  EMBED Equation.3 , where each (j is a function of n1,,nr, , and . Intuitively, (j captures the  importance of a region; it is positively correlated with nj as well as the frequency of queries in the workload that access Rj. Once we have expressed MSE(pW) as a function of the unknown kj s, we are ready to minimize it. Lemma 4:  EMBED Equation.3  is minimized under the constraint  EMBED Equation.3 by setting  EMBED Equation.3  Proof: We first eliminate one of the variables, say kr, by replacing it with k (k1++kr1). If we partially differentiate  EMBED Equation.3 by k1,, kr1 respectively and set each derivative to zero, this results in r 1 equations. These equations can be easily solved to prove the lemma. % Lemma 4 provides us with a closed-form and computationally inexpensive solution to the allocation problem since (j depends only on (, ( and the number of records in each fundamental region. The proof exploits a technique similar to other well-known methods for minimizing functions of the form  EMBED Equation.3 that arise in different contexts (e.g. [2,13]). Note that an admissible solution in our case requires that each kj is an integer > 0. We discuss this issue in Section 7.3.3. For now, we assume that STRAT completes its allocation by dividing k into k1,,kr according to Lemma 4. Solution for SUM Aggregate We now highlight the extensions to the above solution required for queries containing only the SUM aggregate. The key difference arises due to the fact that for SUM, we also need to take into account the variance of the data in the aggregated column (see Example 1 in Section 1). The first effort to deal with variance in data for approximate query processing was the outlier-indexing technique presented in [8]. We use a more general and principled approach that adapts techniques from statistics for dealing with large variance. We note that both the stratification and allocation steps for the SUM are sufficiently different from COUNT, and need to be revisited. Before we get into the details of our solution for SUM, we discuss an interesting scenario in which our solution (and in fact, most sampling based solutions) will fail to work. Consider a relation R that has a mix of positive and negative numbers, and furthermore suppose a subset R exists whose SUM is close to zero (i.e. the negative values cancel the positive values), but whose variance is large. Even though a query Q that selects R may have a small probability of occurrence in the lifted distribution, if not answered exactly, its relative error can become infinite. Most sampling methods cannot handle such queries, and these queries need to be recognized and processed separately. As we shall show, this problem does not arise if the values in R are all strictly positive or strictly negative. The solution that we present is optimized only for such databases, and in principle can fail for certain kinds of queries on more general databases. However, we note that in our experiments our solution has worked consistently well for all kinds of databases. Stratification If we use the same stratification as in the COUNT case, i.e., strata = fundamental regions, we may get poor solutions for SUM since each stratum now may have large internal variance in the values of the aggregate column. Therefore, we use a bucketing technique where we further divide fundamental regions with large variance into a set of finer regions, each of which has significantly lower internal variance. We then treat these finer regions as the strata. For the step of further dividing a fundamental region, we borrow from statistics literature an approximation algorithm for the optimal Neyman Allocation technique (see Appendix A) for dividing a given population into a number of (say h) strata, such that each stratum has significantly lower internal variance. If a density distribution of the population is available (say f(y)), this algorithm computes the cumulative of the function  EMBED Equation.3  and chooses the strata boundaries so that they make equal intervals on this cumulative scale. We use this algorithm to divide each fundamental region into h strata, thus generating a total of h*r finer fundamental regions, which become our strata. In our implementation, we build a histogram for each fundamental region, which approximates the density distribution, after which the stratification into h strata is easily accomplished. This can be done by a single scan of R. We use an equi-width histogram in the above step, although other kinds of histograms are also possible. We set the value h to 6 as suggested in [13]. Let yj be the average of the aggregate column values of all records in new stratum Rj. For the remainder of this section we make the following simplifying assumption. Since the variance of the values within any stratum Rj is small (due to stratification), we assume that each value within the stratum can be approximated as yj. Thus R may be viewed as being partitioned into h*r strata, where each stratum Rj has nj records, each with the same aggregate column value yj. Although clearly an approximation, such a view of R makes the subsequent mathematical analysis considerably simpler (and at the same time does not sacrifice much of the accuracy of the approximate query answering procedure). Allocation The structure of the allocation step is similar to COUNT, i.e., it is expressed as an optimization problem with h*r unknowns k1, , kh*r. However, there is a key difference. Unlike COUNT, here the specific values of the aggregate column yj in each region influence MSE(p{Q}). The following lemma shows how to express MSE(p{Q}) as a function of the kjs (the lemma assumes that the aggregate column values of R are either all positive or all negative). Lemma 5: For a SUM query Q in W, let  EMBED Equation.3  EMBED Equation.3  Then  EMBED Equation.3  The proof is described in Appendix C. As with COUNT, MSE(pW) for SUM is functionally of the form  EMBED Equation.3 , although the exact value of (j is different from COUNT (each (j depends on the same parameters n1, & nh*r , , and , and additionally on the numbers y1, y2, & , yh*r). We can therefore use the same procedure for minimization as in Lemma 4. Pragmatic Issues Identifying Fundamental Regions During the offline process of building a sample, we use a technique that we refer to as tagging to identify fundamental regions in relation R for a workload W consisting of selection queries. Tagging (logically) associates with each record t ( R an additional column called TagColumn (of type varchar) that contains the list of queries in W that reference t. In our implementation, rather than adding TagColumn to R, we separate this column out into a different relation R for two reasons. First, from a pragmatic standpoint, users do not want to change the schema of their database if avoidable. Second, we found that it is significantly faster (3X-5X in our experiments) to update the TagColumn in a separate relation R. Records in R have a one-to-one correspondence with records in R. This is done by including the key column(s) of R in R. When a query Q ( W is executed, for each record in R required to answer Q, we append the query-id of Q to TagColumn of the corresponding record in R. When R is sorted by TagColumn, records belonging to the same fundamental region appear together. We experimentally evaluate the overhead of tagging in Section 9. We note that techniques reported in [14] can be used to further reduce the cost of tagging records. Also, for selection only queries, we can also use a bit vector representation for TagColumn (instead of varchar) where the number of bits is equal to the number of queries in the workload. In this representation, bit i is set if query Qi requires this record to answer the query. However, this representation is not possible for queries with GROUP BY since the tag also needs to encode the group. Finally, the following efficient method that requires only a single scan of R is also possible for single-table queries. For each record we tag it with all queries in the workload that select it. We can check if a query Q selects the record by applying the conditions in the WHERE clause in Q. Handling Large Number of Fundamental Regions To build the expression for MSE(pW), for each query Q in W the algorithm has to visit each fundamental region. If there are q queries in W and r fundamental regions, the product q*r can become quite large. We handle this scalability issue by eliminating regions of low importance immediately after they have been identified. We used a simple heuristic that removes regions with small fj*nj2 values, where fj represents the (weighted) number of queries that access this region. The intuition is that fj measures the number of queries that are affected by Rj, while the expected error by not including the region is proportional to nj2. For SUM queries, we used a similar technique, where the importance of each region is fj*Yj2 where Yj is the sum of the values of the aggregate column within the region. Our experiments show that this heuristic for pruning does not significantly affect quality. Obtaining Integer Solutions In Sections 7.1 and 7.2 we presented a solution to the optimization problem in which the kjs (number of records allocated to region Rj) could be fractional. In reality, however, we are required to pick an integral number of records from each region during the sampling step. In general if most of the kjs are greater than 1, then the following simple rounding scheme works adequately. We round down each kj to (kj(. The leftover fractions are accumulated, and redistributed in a greedy manner to the regions that increase the MSE the least. We are also investigating randomized rounding schemes (as discussed in [17]). Finally, we observe that no past work has addressed this important issue. Obtaining an Unbiased Estimator If many kjs are small (< 1), then after the rounding is performed the allocation algorithm may assign some regions with no samples. Moreover, fundamental regions that have been pruned out for scalability reasons as discussed above will also not receive any samples. This introduces a bias in the estimates, i.e. the expected value of the answer may no longer be equal to the true answer. However, the bias is small because the regions that do not get any samples are those that affect the MSE the least. Nevertheless, if we wish to construct an unbiased estimator, we need to make sure that every stratum has at least one sample allocated to it. A simple way of doing this is to merge (i.e., take the union of) the fundamental regions that are allocated some samples with the fundamental regions that are not allocated any samples, thus creating super-regions that become the eventual strata. Let R1, , Ra be the fundamental regions with non-zero allocated samples (say k1, , ka), and let Ra+1, , Rr be the remaining fundamental regions (i.e., with no allocated samples). We can employ a simple round-robin merging procedure, where Ra+1 is merged with R1, Ra+2 is merged with R2, and so on (if a < r a we cycle back to R1), until we have exhausted all the fundamental regions that have no allocated samples. At the end of this process we have a super regions, say R1, R2, , Ra, that become our eventual strata (where super-region Rj gets allocated kj samples). Since all the fundamental regions in the relation are part of some super-region, and each super-region has one or more sample records assigned to it, we overcome the problem of biased estimators. As future work we are exploring more principled merging approaches. In such approaches the key factors to account for in merging two fundamental regions are: (a) the internal variance in the values of the fundamental regions (b) the frequency with which a fundamental region is included by queries in the workload, and (c) the mean value of the aggregate column in a region. The last factor is important because if two fundamental regions with widely differing means are merged, it can cause the internal variance of the merged super-region to be high. Putting it All Together Figure 4 summarizes the key steps in STRAT and analyzes their complexity. The tagging step (Step 1) is I/O bound and dominates the running time of STRAT in practice (see Section 9); its running time is dependent on the number of queries in the workload. Steps 2-3 identify the fundamental regions in the relation for the given workload W and can be accomplished in time O(n*log(n)) where n is the size of the relation. Thus, Steps 1-3 constitute the stratification step of STRAT. Steps 4-5 constitute the allocation step (which is CPU bound) and runs in time O(q*h*u), where q is the number of queries in W, u is the number of fundamental regions remaining after pruning. Finally, Step 6 is the sampling step that actually generates the sample(s) from the source relations, and can be done in once scan of each source relation.  Extensions for More General Workloads Extensions for a Heterogeneous Mix of Queries Let us consider a workload that contains a mix of COUNT and SUM(y) queries. The lifting model (see Section 4) can be extended for such workloads easily. We need to make sure that each term MSE(p{Q}) is set up appropriately to reflect the type of query Q in the workload since, as explained above, analysis for COUNT and SUM differ. Once these expressions are set up, minimizing the resulting MSE(pW) is straightforward. Now consider a mix of queries such as SUM(x), SUM(y), SUM(x*y+z), etc. (where x, y, z are different columns from the same relation). We cannot directly apply the technique described in Section 7.2.1 for further stratifying each fundamental region to reduce variance because that technique works only for one-dimensional data. That is, if we stratify with the objective of reducing variance of x, the resulting stratification may not reduce variance of y. What we need is a solution to the multivariate stratified sampling problem (see Appendix A). Although several works in statistical literature have developed algorithms for this problem, for our purposes we found that the following simple (yet efficient) heuristic was adequate. First we treat each expression (such as x*y+z) that appears in a query in the workload as a new (derived) column x. Thus our workload is reduced to queries of the form SUM(x) where x is either one of the original columns or a derived column. Let X = {x1, x2, , xm} be the set of (original or derived) columns that appear in the workload. We associate a weight wi with each column xi (where wi is the sum of weights of all queries that aggregate on xi). Essentially, the weight defines the importance of the column. Next, we stratify each fundamental region into h strata by using a simple variant of the h-means clustering algorithm [21]. This algorithm takes as inputs n multi-dimensional points and a parameter h, and produces h clusters such that the sum of the squared distances of each point from its cluster center is minimized (actually the h-means algorithm only produces a local optima). In our application we treat each record as an m-dimensional point, and the output clusters are the strata. Our implementation differs from the classical h-means algorithm in the following ways. We use a skewed notion of distance, i.e., the (squared) distance between two points t1 = (x1,1, x1,2, , x1,m) and t2 = (x2,1, x2,2, , x2,m) is defined as EMBED Equation.3 . Thus the more important dimensions play a more dominant role in the distance. The intuition is that by minimizing the sum of such squared distances between points and their cluster centers, we will be able to significantly reduce the sum of the variance along all dimensions for all strata. Since the number of dimensions m can become potentially large, we adopt a simple dimensionality reduction technique, where we discard all but a few of the most important columns. As part of our ongoing work, we are exploring other alternative dimensionality reduction techniques that take into account the correlations between the columns. The other complication is that the h-means algorithm may perform a large number of iterations over the dataset before it converges to a local optimum. We avoid this problem by first selecting a small uniform sample of the records in each fundamental region, then running the h-means algorithm for a small constant number of iterations on this sample, and finally assigning each record of the fundamental region to its closest cluster center found in step (b). We can also extend our techniques described in Section 7 to handle cases when the workload consists of single-table selection queries with aggregation but where each query can potentially reference a different relation. Although it may appear that we first need to partition the available memory for the sample across the tables and then pick samples from each table, our techniques are general enough to solve this problem in one step (as in Section 7 for single-table queries). In other words, once the relations have been tagged to get the entire set of fundamental regions across all relations, we can set up MSE(pW) similar to the single-table case and minimize it. The fact that the space requirement for each record of different relations is different must be taken into consideration in the allocation step. We can also extend in a straightforward manner our techniques to handle cases when the workload consists of aggregation queries with nested sub-queries. GROUP BY Queries We first show how workloads containing GROUP BY queries can be lifted (see Section 5 for how a workload containing pure selection queries can be lifted). Consider a GROUP BY query Q with weight w in the workload. Let Q partition R into g groups: G1, Gg. Within each group Gj, let Sj be the set of records selected. We adopt the following simple lifting model: replace Q in the workload with g separate selection queries (each of weight w/g) that select S1, , Sg respectively, and use the techniques in Section 5 for lifting the resultant workload. Once we know how to lift a workload containing GROUP BY queries, adapting our algorithm for handling such a workload is straightforward. The tagging step (see Section 7.3.1) treats each GROUP BY query Q as a collection of g selection queries with aggregation, and tags the records with the group that they belong to. During the tagging process, for GROUP BY columns of integer data types, we append a double in addition to the query-id, where c is the column id of the GROUP BY column and v is the value of that column in record t. For non-integer data types, we treat the value of the GROUP BY column as a string and use a string hashing function to generate an integer value. As described in (Section 7.3.1), when R is sorted on TagColumn, all records belonging to the same fundamental region appear together. JOIN Queries Our algorithm can be naturally extended to a broad class of queries involving foreign key joins over multiple relations. Let us say that a relation is a fact relation in the schema if it references (i.e., contains the foreign keys of) one or more reference relations but is not referenced by any other relation. A relation is a dimension relation if it is does not contain foreign keys of any other relation. Thus, a relation that is neither a fact relation nor a dimension relation must be referenced by one or more relations and must contain foreign keys of one or more relations. We define star query to be a query that is (a) over the join of exactly one source relation and a set of dimension relations each of which is referenced by the source relation (b) Group By and aggregation over a column of the source relation and (c) may have selections on source and or dimension relations. Star queries are widely used in the context of the decision support queries. In this section, we discuss how the technique that we have proposed in this paper can be extended for star queries in a straightforward manner. Our approach to handling star queries is as follows. We intend to obtain a sample only over the source relation. When a query is posed, we can then use the sample over the source relation to join the dimension relations in their entirety with the sample to compute the aggregate (with appropriate use of ScaleFactor). This method is reasonable because typically the source relation is a large fact relation (where sampling helps), while the other relations are smaller dimension relations. Let us now consider how to pick a sample over the source relation. First, note that our model for lifting (see Section 5) will be based on subsets of the source relations selected in the workload, much like selection queries with aggregation. However, note that a record t in the source relation is deemed useful for a query Q in the workload only if t contributes to at least one answer record of Q, i.e., t must successfully join with other dimension relations and satisfy all the selection conditions in the query as well. For example, consider a query Q: SELECT Sum(Sales) FROM Sales S, Product P WHERE S.ProductId = P.ProductId AND P.ProductType = Clothes AND S.Month = Jan. In this query Sales is the fact relation and Product is the dimension relation. During the tagging step of Sales for query Q, we only tag the records from Sales that join with Product and satisfy both the selection conditions in Q. The tagging step itself is no different from the technique used for single relation queries described in Section 7.3.1. For a workload that consists of star queries over multiple source relations, the technique described in Section 8.1 for selection queries (with aggregation) over multiple relations is used. We note that an alternative approach is to compute join synopses as in [2], which results in reduced run-time cost at the expense of increased storage requirements due to additional columns from the join. Once again, allocation of k between the different synopses can be done by setting up MSE(pW) and minimizing it. Extensions for Other Aggregates In principle, a sample created using any algorithm (including STRAT) can be used to answer a query containing any aggregate function. However, since samples chosen by STRAT are optimized for workloads involving COUNT and SUM queries, the errors may be more for queries that involve other aggregates. Observe that a query Q involving AVG(y) can be estimated at run-time without bias as SUM(y)/COUNT. Optimizing for workloads that contain AVG queries is therefore more difficult since MSE(p{Q}) is more complicated to compute. In view of this difficulty, in our implementation we used a simple heuristic of treating an AVG query (with weight w) as a pair of SUM and COUNT queries (each with weight w/2). Implementation and Experimental Results We have implemented STRAT and FIXED on Microsoft SQL Server 2000 and conducted experiments to evaluate their effectiveness. We compared their quality and performance with the following previous work: (a) uniform random sampling (USAMP) (b) weighted sampling (WSAMP) [8,14], (c) outlier indexing combined with weighted sampling (OTLIDX) [8], and (d) congressional sampling (CONG) [1]. We describe the implementation of the previous work, our experimental setup, the results of the experiments, and draw conclusions. Implementation The key implementation aspects of FIXED and STRAT have been discussed in Sections 4, 7 and 8. We now briefly describe our grid search approach (Section 5) for automatically determining the appropriate values of  and  for a workload W. We divide the workload into two equal halves called the training and test set respectively. We divide the two-dimensional space 0.5 (  ( 1, 0 (  ( 0.5 into a grid in which each dimension is divided into a fixed number of intervals. For each point (, ) in the grid, we compute a sample for the training set and estimate the error for the test set. We pick the grid point with the lowest error for the test set as our setting for  and . Our implementation scales well with data size since we can obtain samples for multiple grid points in one scan of the relation. We now briefly describe our implementation of the previous work. For uniform sampling (USAMP), each record is accepted with probability equal to the sampling fraction. We generate a uniform random sample in one scan of the relation R using the reservoir sampling technique [29]. For weighted sampling (WSAMP) [8,14] the probability of accepting a record is proportional to the frequency with which the record is selected by queries in the workload. We calculate this frequency for each record using the tagging technique described in Section 7.3.1. The key difference is that rather than keeping track of the list of queries that select the record, we only need a single counter (an integer) for the TagColumn to keep track of the frequency. For the outlier-indexing method (OTLIDX), we implemented the technique described in [8]. The paper does not address the following issue: for a given sample size, how many records of the sample to allocate for the outlier index, and how many to the weighted sample? To give OTLIDX the best possible choice of alternative settings, we tried different strategies for partitioning the sample for different databases and workloads 25% for outliers-75% for weighted sample, 50%-50% and 75%-25%. We use the 50%-50% strategy since it performed well for most workloads. We also implemented the congress algorithm (CONG) described in the paper [1]. The algorithm takes as input a set G of GROUP BY columns and builds a sample for answering queries on any subsets of G (including (). For each subset of G, it determines the best allocation for each of the finest groups in the relation. The final allocation for a group is proportional to the maximum allocation for that group over all subsets of G. Since the algorithm for Congress that takes into account selections in the workload is not publicly available, in our experiments we only evaluate Congress for workloads consisting of pure GROUP BY queries (i.e., no selections). Experimental Setup Hardware/OS All experiments were run on a machine having an x86 550 MHz processor with 256 MB RAM and an internal 18GB hard drive running Microsoft Windows 2000. Databases We used the popular TPC-R benchmark [27] for our experiments. One of the requirements of the benchmark however, is that the data is generated from a uniform distribution. Since we were interested in comparing the alternatives across different data distributions, we used the publicly available program [11] for generating TPC-R databases with differing data skew. For our experiments we generated 100MB TPC-R databases by varying the Zipfian [32] parameter z over values 1, 1.5, 2, 2.5, and 3. We report a few relevant characteristics of the data in the aggregation column used. First, the ratio of the maximum to the minimum value in the aggregation column was varied between approximately 9000 and 250000 for the different databases. Second, there is no correlation between values in the aggregation column (picked from the Zipfian distribution) and their frequency in the data. Workloads We generated several workloads over the TPC-R schema using an automatic query generation program. The program has the following features that can be turned on: (i) aggregations on the fact table (lineitem), (ii) foreign-key joins between the fact table and a dimension table (part or supplier), (iii) grouping and (iv) selection. We experimented with three classes of workloads containing aggregation: (a) W-SEL (Selections, Foreign-Key Joins). (b) W-GB (Group By, Foreign-Key Joins) (c) W-SEL-GB (Selections, Group By, Foreign-Key Joins. Thus, e.g., W-SEL-GB-100 indicates a workload from the W-SEL-GB class containing 100 queries. The selection conditions were on the following columns: l_shipdate, l_orderkey, l_tax, l_discount, p_partkey, p_size, p_retailprice, s_acctbal, s_suppkey. As in [1], we used the grouping columns l_shipdate, l_returnflag and l_linestatus. The aggregate column was l_extendedprice, and the aggregation expressions used were COUNT and SUM. For each workload, we used the first half of the workload as the training set that was used to determine the sample, and the second half as the test set. We controlled the degree of similarity between the training and test set using the following two parameters: (a) the set of columns on which conditions are allowed in the training set and in the test set, and (b) for each column on which a selection is defined, control the range of the selection condition. Parameters We varied the following parameters in our experiments: (a) Skew of the data, z (b) The sampling fraction f was varied between: 0.1% - 10%. (c) Workload size was varied between 25 - 800 queries. All numbers reported are the average over multiple runs. Error Metric As with previous work, we report the average relative error over all queries in the workload, i.e., L1 metric (Section 3.3). We have found in our experiments that similar trends also hold for the RMSE (L2) error metric (see Section 3.3). Results Quality vs. Sampling Fraction We compare the quality (errors) of the various techniques for the COUNT and SUM aggregates as the sampling fraction is varied keeping the workload (W-SEL-GB-100) and data skew (z = 2) fixed. As we see from Figures 5 and 6, for the test set (for COUNT and SUM aggregates respectively), the errors for STRAT are relatively low even with as little as 1% sampling whereas errors with other methods (USAMP, WSAMP, OTLIDX) are significantly higher. The key point to note for the SUM aggregate is that STRAT is able to achieve better quality than OTLIDX by taking into account the variance in the data values in a more principled way.  EMBED Excel.Chart.8 \s  EMBED Excel.Chart.8 \s   EMBED Excel.Chart.8 \s  EMBED Excel.Chart.8 \s  Next, we compare the quality of the various alternatives for the training set itself. We see the effectiveness of our stratification algorithm from Figure 7 (for the COUNT aggregate) and Figure 8 (for the SUM aggregate), where STRAT gives errors close to 0 once the sample size exceeds the number of fundamental regions induced by the workload. For comparisons with CONG, we consider workloads with only GROUP BY queries (i.e., no selection). Figure 9 (test set) shows that for the COUNT aggregate, STRAT performs best among all methods. We note that CONG also does significantly better than the other methods. The reason STRAT is more accurate than CONG is that despite attempting to account for all groups, CONG still allocates too many records to large groups and not enough for small groups, whereas STRAT is able to balance the allocations better. For GROUP-BY queries with the SUM aggregate, we see form Figure 10 (test set) that once again STRAT performs best among all methods. However, OTLIDX appears to perform better than CONG since unlike STRAT and OTLIDX, CONG does not take into account the data variance when allocating samples. Quality Vs. Overlap between Training Set and Test Set We vary the degree of overlap of the minimum and maximum values of the range from which selection conditions are generated. The degree of overlap is an informal measure of correlation. For example, a degree of overlap of 0% (negative correlation) implies that for each column in a selection condition, the range of values from which selection conditions can be chosen for the test and training set for each column are disjoint, whereas 100% overlap (positive correlation) implies that the ranges are the same. From Figure 11 we see that for small overlap, as expected STRAT (( = 0.90, ( = 0.01) gives higher errors than other methods. However, for moderate to large overlaps, STRAT is significantly better.  EMBED Excel.Chart.8 \s  EMBED Excel.Chart.8 \s   EMBED Excel.Chart.8 \s  EMBED Excel.Chart.8 \s  Automatically Determining the Lifting Parameters ( and ( For a given workload W-SEL-GB-100 and sampling fraction of 1%, Figure 12 shows how the error for the test set varies with ( and ( (see Sections 5, 9.1). We see that the error varies gradually, which indicates that our grid search approach is promising. STRAT vs. FIXED We compare STRAT (with ( = 0.99 and ( = 0.01) with FIXED for the given workload (i.e., on the training set) to illustrate the benefits of our deterministic solution. Note that the setting of ( and ( imply that we expect queries that are very similar to the given workload. We use the W-SEL-GB-100 workload, sampling fraction f = 0.2%, and varied the data skew; we report errors for the COUNT aggregate. We found that across all data skew values, FIXED gives significantly lower error (difference in error varied between 13%-29%) since the deterministic method is able to exploit the greater freedom it has in optimizing the samples (see Section 4). We note that (a) unlike STRAT, FIXED has the drawback that we cannot report a standard error for the estimate, (b) for higher sampling fractions STRAT also approaches near-zero errors (Figure 7). Quality vs. Data Skew In this experiment we compared the quality of the different methods as the skew of the data (z) is varied between 1 and 3, keeping the workload (W-SEL-GB-100) and sampling fraction (1%) fixed, for the SUM aggregate. We find (see Figure 13) that for moderately skewed to highly skewed data (z > 1), STRAT gives significantly lower errors than other methods (by about 20%). For low skew data (z = 1), the other methods are comparable to STRAT.  EMBED Excel.Chart.8 \s  EMBED Excel.Chart.8 \s  Comparison of Time for Building Samples We compare the time to build the sample for WSAMP, OTLIDX, and STRAT for three different workloads of 100 queries each. We report numbers for the 100 MB database, data skew z = 2. Figure 14 shows that the additional time (relative to WSAMP) taken by STRAT to tag the database (Section 7.3.1) for the given workload is small. The difference between the tagging for WSAMP and STRAT is that in STRAT we additionally need to record the query-id information (and for GROUP BY queries, the group information). Finally, for a 1% sample, we report that the time to actually pick the sample after tagging was 15 sec, 70 sec, and 36 sec respectively for WSAMP, STRAT and OTLIDX for the W-SEL-GB-100 workload. Thus, the total time to build a sample is dominated by the time taken to tag the relation for the given workload. Comparison on a Real Data Set We compare the quality of various approaches on a real data warehouse within our organization, used to track sales of products. We used ( = 0.90 and (=0.01 for STRAT. We used a portion of the database of approximately 0.84 million rows; training and test sets of 25 real queries used by the application each. These queries typically contained 3-6 GROUP BY columns and 2-5 selection conditions per query. Figures 15 and 16 show (respectively for the test set and the training set), STRAT performs consistently better than other methods for this real data set. Results for L2 Metric We present results using the L2 metric (RMSE) of our experiments for the W-SEL-GB-100 workload for the test set for the COUNT aggregate (Figure 17) and the SUM aggregate (Figure 19). For corresponding numbers with the L1 metric, please see Figures 6 and 7 respectively. As we see from the figures, the results have similar trends as those reported with the L1 metric. The only difference is that the errors are relatively smaller (for all methods) using L2.  EMBED Excel.Chart.8 \s  EMBED Excel.Chart.8 \s   EMBED Excel.Chart.8 \s   EMBED Excel.Chart.8 \s  Summary In this paper, we present a comprehensive solution to the problem of picking precomputed samples for approximately answering aggregate queries and show how it can be implemented in a commercial database system. Our solution not only works well for the given workload, but via a novel technique for lifting a workload, our solution can be tuned to work well even for workloads that are similar but not identical to the given workload. Our solution is robust since it also handles the problems of data variance, heterogeneous mixes of queries, GROUP BY and foreign-key joins. We are trying to generalize our algorithms for workloads containing joins other than foreign-key joins. We will also compare our methods against other approaches to approximate query processing that are not based on precomputed samples (such as online sampling, histograms, and wavelet based approaches). Acknowledgement We are thankful to Venkatesh Ganti for his thoughtful comments on this paper. References Acharya S., Gibbons P. B., Poosala V. Congressional Samples for Approximate Answering of Group-By Queries. Proc. of ACM SIGMOD, 2000. Acharya S., Gibbons P. B., Poosala V., Ramaswamy S. Join Synopses for Approximate Query Answering. Proc. of ACM SIGMOD, 1999. Barbar D., and Sullivan M. Quasi-Cubes: Exploiting Approximations in Multidimensional Databases. SIGMOD Record, Vol. 26 No. 3, Sep. 1997. Barbar D. and Wu. X. Using Approximations to Scale Exploratory Data Analysis in Datacubes. Proceedings of the 1999 ACM SIGKDD Int'l Conference on Knowledge Discovery and Data Mining, San Diego, CA, Aug. 1999. Bethel J. Sample Allocation in Multivariate Surveys. Survey Methodology, 1989. Causey B. D. Computational Aspects of Optimal Allocation in Multivariate Stratified Sampling. SIAM J. of Scientific and Statistical Computing, Vol. 4 No 2, 1983. Chakrabarti K., Garofalakis M., Rastogi R., Shim K. Approximate Query Processing Using Wavelets. Proc. of VLDB 2000. Chaudhuri S., Das G., Datar M., Motwani R., Narasayya V. Overcoming Limitations of Sampling for Aggregation Queries. Proc. of IEEE Conf. on Data Engineering, 2001. Chaudhuri S., Das G., Narasayya V. A Robust, Optimization- Based Approach for Approximate Answering of Aggregation Queries. Proc. of ACM SIGMOD, 2001. Chaudhuri S., Motwani R., Narasayya V. Random Sampling Over Joins. Proc. of ACM SIGMOD, 1999. Chaudhuri S., Narasayya V. Program for TPC-D Data Generation with Skew.  HYPERLINK "http://research.microsoft.com/dmx/" http://research.microsoft.com/dmx/ Chromy J. W. Design Optimization with Multiple Objectives. Proc. of Survey Research Section, American Statistical Association, 1987. Cochran W.G. Sampling Techniques. John Wiley & Sons, New York, Third edition, 1977. Ganti V., Lee M. L., Ramakrishnan R. ICICLES: Self-tuning Samples for Approximate Query Answering. Proc. of VLDB, 2000. Golub G., Loan C. Matrix Computations. Johns Hopkins University Press, 1989. Hellerstein J., Haas P., Wang H. Online Aggregation. Proc. of ACM SIGMOD, 1997. HochBaum B. Approximation Algorithms for NP-Hard Problems. PWS Publishing, 1997. Ioannidis Y., Poosala V. Histogram Based Approximations of Set-Valued Query Answers. Proc. of VLDB 1999. Lohr, S. Sampling: Design and Analysis. Duxbury Press, 1999. Marron J. S. Smoothing Methods for Learning from Data. Proc. of ACM SIGKDD Int'l Conference on Knowledge Discovery and Data Mining, 1998, Tutorial. Mitchell T. Machine Learning. McGraw Hill, 1997. Motwani R. and Raghavan P. Randomized Algorithms. Cambridge University Press, 1995. F. Olken. Random Sampling from Databases. Ph.D. Dissertation, Computer Science, UC Berkeley, 1993. Poosala V., Ganti V. Fast Approximate Answers to Aggregate Queries on a Data Cube. Proc. of the 1999, Intl. Conf. on Scientific and Statistical Database Management. Silverman, B. W. Density Estimation. Chapman and Hall. 1986. Thisted R. A. Elements of Statistical Computing. Chapman and Hall. 1988. TPC Benchmark R. Decision Support. Revision 1.1.0.  HYPERLINK "http://www.tpc.org" http://www.tpc.org. Valliant R., and J. Gentle. An Application of Mathematical Programming to a Sample Allocation Problem. Computational Statistics and Data Analysis, 25, 1997, 337-360. Vitter J. Random Sampling with a Reservoir. ACM Transactions on Mathematical Software, 11(1), 1985. Vitter J., Wang M. Approximate Computation of Multidimensional Aggregates of Sparse Data using Wavelet. Proc. of ACM SIGMOD, 1999. Vitter J., Wang M., Iyer B. Data Cube Approximation and Histogram via Wavelets. Conf. on Information and Knowledge Management, 1998. Zipf G.E. Human Behavior and the Principle of Least Effort. Addison-Wesley Press Inc, 1949. Appendix A: Review of Random Sampling Techniques Classical sampling techniques are usually applied to estimate aggregates of a single given population (such as the average per capita income of a country). The reason sampling is used is because the population is usually too large to allow exact aggregate computation. Given certain constraints (such as an upper bound on the number of samples), the challenge in such problems is to design a sampling strategy that is as accurate as possible. We present relevant results on two well-known techniques, uniform sampling and stratified sampling (these results are from [13,19]). We also briefly discuss weighted sampling, which may be view as an approximation of stratified sampling. Uniform Sampling Consider a large population, i.e. a set of real numbers R = {y1,, yn}. Let the average be y, the sum be Y and the variance be S2. Suppose we uniformly sample k numbers. Let the mean of the sample be . Lemma 6: The quantity is an unbiased estimator for y, i.e.  EMBED Equation.3 The quantity EMBED Equation.3 is an unbiased estimator for Y, i.e.  EMBED Equation.3  The variance (also known as standard error) in estimating y is  EMBED Equation.3  The variance in estimating Y is  EMBED Equation.3  The relative squared error in estimating Y is  EMBED Equation.3  The above squared error formulas assume that n is much larger than k (more accurate formulas that are sensitive to the finiteness of n are also known). As can be seen, the squared errors of the estimates depend directly on the variance in the data, and inversely on the number of samples. Stratified Sampling One can often do better than uniform sampling by exploiting available (partial) knowledge of the population. For example, information on last years per capita income distribution may be available, and can be assumed to be highly correlated with this years distribution. One can identify regions of high variance in last years data, and design a sampling strategy where one samples more from such regions. Such a scheme can be a highly accurate estimator for the current population. One such strategy is known as stratified sampling. Here the current population is partitioned into r strata, with the jth stratum Rj containing nj numbers that have sum Yj and variance S2j. Suppose we uniformly sample k1,, kr numbers from each of the R1,, Rr strata respectively. Let the means of the respective samples be 1, , r. Lemma 7: Define as follows  EMBED Equation.3  The quantity is an unbiased estimator for y, i.e.  EMBED Equation.3  The quantity EMBED Equation.3 is an unbiased estimator for Y, i.e.  EMBED Equation.3  The variance in estimating y is  EMBED Equation.3  The variance in estimating Y is  EMBED Equation.3  The relative squared error in estimating Y is  EMBED Equation.3  As with uniform sampling, the error formulas here assume that each nj is much larger than the corresponding kj. Stratified sampling can be better than uniform sampling because different strata can be designed to reduce the variance. The issues in stratified sampling are: how to select r (the number of strata), how to partition the population into r strata, how to allocate a total of k samples over all strata so as to minimize the error. If perfect information about the population is available, then more the strata, the better (only limited by the fact that k samples have to be distributed among r strata). Answering these questions require the availability of (partial) information about the current population. Lemma 8: Given a population R = {y1,, yn}, k and r, the optimal way to form r strata and allocate k samples among all strata is to: Sort R and select strata boundaries so that  EMBED Equation.3  is minimized For the jth strata, set the number of samples kj as follows (this is also known as the Neyman Allocation)  EMBED Equation.3  However, the above lemma does not answer the question of how to select r. If we have complete knowledge of the current population, it is easy to see that the more strata the better. However we usually have only partial knowledge of the population (e.g. we may have last years population distribution which does not fully correlate with this years population), so stratified sampling starts getting inaccurate beyond r = 6 [13]. An efficient procedure described in [13] to approximate the optimal stratification is as follows: if a density distribution of the population is available (say f(y)), compute the cumulative of the function  EMBED Equation.3  and choose the strata boundaries so that they make equal intervals on this cumulative scale. Multivariate Stratified Sampling Now suppose the population R = {y1,, yn}is a set of m-dimensional vectors, i.e. each yi = [yi,1, yi,2, , yi,m]. Suppose we want to estimate the means along each dimension. Given k and r, we wish to form r strata and allocate k samples among all strata such the sum of the squared errors along each dimension is minimized. This multivariate generalization of the Neyman Allocation has been tackled in several works [5,12,28]. In our application we encounter a variation of this problem, which we solve using a simple clustering based heuristic (see Section 8.1). Weighted Sampling Weighted sampling can be viewed as an approximation of stratified sampling. Each number in the entire population has to be examined in order to select the sample. Assume that we are given a parameter k, and that each yt in the population has been assigned a weight wt (indicating its importance). Each yt is selected to be included in the sample with probability  EMBED Equation.3 . When weights are the same, this reduces to an approximation of uniform sampling. To ensure that exactly k records are picked, we can use the reservoir sampling technique described in [29]. Appendix B: Counter-Example for Congress We describe a scenario in which the algorithm Congress [1] is sub-optimal. Consider two GROUP BY-COUNT queries Q1 and Q2 over a relation. Let Q1 define only one group, g1 (i.e. the entire relation R), while Q2 defines two groups, a large group g21 with n21 records (where n21 H" n) records and a very small group g22 with the remaining n22 = n  n21 records. As the paper [1] assumes, let the expected query distribution be such that each GROUP BY query is equally likely (but the selection conditions may vary, under the assumption that for each query the per-group selectivity is the same for all groups). Let k (k > 0) be the number of records to be selected in the sample. Assume a large population, i.e. that even n22 is large compared to k. Congress divides R into two strata R1 and R2 (essentially identical to g21 and g22 respectively). It allocates k samples between the two as follows:  EMBED Equation.3   While this allocation has seemingly intuitive appeal, it does not minimize any of the well-known error metrics for the expected query distribution (such as MSE, L1, or L(). For example, suppose we wanted to minimize the MSE of the expected query distribution. Let k1 and k2 be the (unknown) allocation of the samples in the two strata R1 and R2. MSE of queries like Q1 is proportional to  EMBED Equation.3  MSE of queries like Q2 is proportional to  EMBED Equation.3  Thus overall MSE is proportional to  EMBED Equation.3  Using simple differentiation techniques, the overall MSE is minimized if  EMBED Equation.3   Clearly these values are not the same as that allocated by Congress. Instead of MSE, suppose we wanted to minimize the L1 error of the expected query distribution. L1 error of queries like Q1 is proportional to  EMBED Equation.3  L1 error of queries like Q2 is proportional to  EMBED Equation.3  Thus overall L1 error is proportional to  EMBED Equation.3  Using simple differentiation techniques, the overall L1 error is minimized if  EMBED Equation.3   Once again, these values are not the same as that allocated by Congress. Finally, consider the L( metric. L( error of queries like Q1 is proportional to  EMBED Equation.3  L( error of queries like Q2 is proportional to  EMBED Equation.3  Thus overall L( error is proportional to  EMBED Equation.3  It is easy to see that the optimal solutions are  EMBED Equation.3   Once again, these values are not the same as that allocated by Congress. Thus, the above simple example shows that the allocation scheme suggested by Congress is sub-optimal for several common error metrics. Appendix C: Proofs for COUNT and SUM Aggregates We first prove Lemma 3 then prove Lemmas 1 and 5. Proof of Lemma 3 We assume that (, (, r, k1, k2, , kr are all constants, while n, n1, n2, , nr may vary. Q will always represent a fixed COUNT query from the workload, whereas Q will always represent an incoming query drawn randomly from the distribution p{Q}. The sets RQ ( R and R ( R will represent the sets of records selected by Q and Q respectively. Recall from Section 5 that the distribution p{Q} actually maps subsets of R to probabilities. Thus, in the rest of the proof we will sometimes view a query simply as the subsets of records it selects (e.g. Q and R may be used interchangeably). Since we view queries as subsets of records, we see that MSE(p{Q})is the expected value of SE(R) for a subset R randomly drawn from the distribution p{Q} (recall the exact definitions of squared error and mean squared error from Section 3.3). That is, Our task is to expand and simplify the RHS of Equation (3) and show that it is approximately equal to ApproxMSE(p{Q}). We first provide an intuitive explanation of why the lemma is true. Later we provide the formal details of the proof. Assume each nj is large (the proof for the more general case where we assume only n to be large appears later). Let Q be a query randomly drawn from the distribution p{Q}. Due to the nature of our lifting model, the number of records selected by Q in each fundamental region follows a binomial distribution. Since each nj is large, the corresponding binomial distributed is sharply concentrated around its mean value. Consequently an overwhelming number of queries from the distribution p{Q} will select approximately (*nj (resp. (*nj) records from Rj, where Rj is a fundamental region inside (resp. outside) RQ. Thus, MSE(p{Q}) can be approximated as the MSE of all such queries since the contribution from the other queries is negligible. Consider the jth term in the left summation in the numerator. It represents the expected squared error in estimating the count of (R(Rj), i.e., in estimating the sum of the portion of POPQ that corresponds to Rj (see Section 6 for a definition of POPQ). This may be derived using Lemma 7 presented in Appendix A. Similarly the right summation in the numerator represents the expected squared error in estimating the count of (R((R\ RQ)). Thus the numerator represents the expected squared error in estimating the count of R. Dividing by the denominator represents the expected relative squared error in estimating the count of R. We now provide the formal details of the proof. We first partition the set of all 2n subsets of R into a (large) number of groups as follows. Consider r integers m1, m2, , mr, such that 0 d" mj d" nj. These integers define a group G(m1, m2, & , mr) as follows: That is, G(m1, m2, & , mr) contains all subsets of R that select exactly m1 records from R1, m2 records from R2, and so on. Clearly, the number of groups is  EMBED Equation.3  We can also derive the size of each group (i.e., the number of subsets in each group):  Consider a subset R that belong to any given group, say G(m1, m2, , mr). We show that the squared error SE(R) may be derived as follows. Consider the jth term in the numerator. It represents the expected squared error in estimating the count of (R(Rj), i.e., in estimating the sum of the portion of POPQ that corresponds to Rj (see Section 6 for a definition of POPQ). This portion of POPQ may be viewed as a population of size nj, with mj 1s and nj ( mj 0s. It is easy to derive the variance of such a population to be  EMBED Equation.3 . Since each region has kj samples allocated to it, we can use Lemma 7 from Appendix A to show that the entire numerator represents the squared error in estimating the count of R. The denominator represents the expected count of R. Thus the ratio represents the relative squared error in estimating the count of R. Our task is to show that the expected value of SE(R) approaches ApproxMSE(p{Q}) in the limit when n tends to infinity. Note that SE(R) equals ApproxMSE(p{Q}) if we replace each mj by (*nj or (*nj, depending on whether Rj is inside or outside RQ. It is easy to see that SE(R) is the same for each subset R in the group G(m1, m2, , mr). Let us denote this as SE(m1, m2, , mr). Our next task is to expand p{Q}(R) for any given subset R of group G(m1, m2, , mr). Using Equation 1 in Section 5, we can derive p{Q}(R) as follows.  Note that p{Q}(R) is the same for each subset R in the group G(m1, m2, , mr). Let p(m1, m2, , mr) denote the probability that a randomly drawn subset R belongs to G(m1, m2, , mr). Using Equations (4) and (6), it is easy to see that By rearranging factors we get Observe that in the above equation the fundamental regions inside RQ are treated differently from the fundamental regions that are outside RQ. In the interest of uniformity, we adopt the following notation. Let each fundamental region Rj be associated with a parameter j, such that if Rj is inside RQ then j =  and if Rj is outside RQ then j = . Equation (7) may then be rewritten as The quantity  EMBED Equation.3  is the probability that a random subset R will select exactly mj records from Rj. Let us denote this as p(mj).  If we view mj as a random variable, p(mj) is a binomial distribution with a mean of *nj (see Appendix D). Thus p(m1, m2, & , mr) is simply a product of different binomial distributions. Let C be the cartesian product  EMBED Equation.3  Equation (3) may be rewritten as Using Equations (5) and (8), we get Let us label a fundamental region Rj as small if 0 < nj <  EMBED Equation.3 , and large otherwise (i.e., if  EMBED Equation.3 d" nj d" n). For each large fundamental region Rj, define quantity j as  EMBED Equation.3  Let C1 ( C be the cartesian product defined as  EMBED Equation.3 , where if Rj is small then lj = 0 and uj = nj, and if Rj is large then lj = (1  j)jnj and uj = (1+ j)jnj. Let C2 be the set of vectors defined as C \ C1. Define MSE1 and MSE2 as follows:  Clearly we have EMBED Equation.3  We shall first show that in the limit when n tends to infinity, MSE2 goes to 0. If we examine Equation (5), we can derive a crude (but simple) upper bound for SE(m1, m2, , mr) as follows:   Thus we get Using Chernoff bounds (see Appendix D), the above is  EMBED Equation.3  It is quite easy to show that as n tends to infinity, each of the above terms goes to zero. To see this, consider the jth term. If we take the natural log of the numerator, we get an expression that is linear in ln(n), whereas if we take the natural log of the denominator, we get an expression that is quadratic in ln(n). Thus when n tends to infinity, the denominator grows asymptotically more rapidly than the numerator. Thus we conclude that  Next, we turn our attention to MSE1. We shall show that when n tends to infinity, the ratio  EMBED Equation.3  Recall the definition of ApproxMSE(p{Q}) from the statement of the lemma. Using j instead of  and , we can rewrite it as follows. Using Equations (5), (9) and (11), we get  We first show that we only need be concerned with large fundamental regions. Dividing the numerator and denominator by n2, we get For small fundamental regions Rj, we know that in the limit when n tends to infinity, both nj/n and mj/n tend to zero. Thus in the limit the above expression reduces to  Multiplying the numerator and denominator by n2, we get back We know that for each large fundamental region, (1 j)jnj d" mj d" (1+ j)jnj. We first calculate an upper bound for the limit. The RHS of Equation (12) can be upper-bounded as For each large Rj, it is easy to see that  Thus, in the limit the jth terms in the numerator will approach the corresponding jth terms in the denominator of Equation (13). Thus we have  We next calculate a lower bound for RHS of Equation (14). The RHS can be lower-bounded as   For each large fundamental region, we note that  As for the remaining portion of Equation (15), in the limit the jth terms in the numerator will approach the corresponding jth terms in the denominator of Equation (15). Equation (15) thus reduces to  Combining Equations (10), (14) and (16), we have  EMBED Equation.3  This concludes the proof of Lemma 3. % Proof of Lemma 1 Let G = {G1, G2& .., Gs} represent an optimal stratification of R. Consider the stratification H = {H1, H2, & .Hu} = {Rj(Si | 1d" j d" r, 1 d" i d" s}. Since stratifying an optimal stratification any further does not reduce the MSE, H also represents an optimal stratification. We first describe the simple case where the workload consists of a single query Q. Let us use the notation MSEE(p{Q}) to denote MSE(p{Q}) for a stratification E. We shall show that the minimum value of MSEH(p{Q}) is asymptotically the same as the minimum value of MSEF(p{Q}). MSEH(p{Q}) can be asymptotically calculated according to Lemma 3 (even though Lemma 3 was proven for the stratification F, it can be extended for any stratification that represents a further stratification of F, such as H). Let h1, h2, , hu be the optimal allocation of the k samples in the strata H1, H2, ., Hu respectively. By Lemma 3, we get In minimizing MSEH(p{Q}) we can ignore the denominator. Thus is equivalent to minimizing the following expression.  EMBED Equation.3  Using techniques similar to those in Lemma 4, we see that this gets minimized when the hjs take on the following values. Plugging these values back in MSEH(p{Q}), we see that the minimum value of MSEH(p{Q}) is asymptotically equal to Now let us consider the stratification F. Let k1, k2, , kr be the optimal allocation of the k samples in the fundamental regions R1, R2, , Rr respectively. Similar to above, we see that MSEF(p{Q}) gets minimized when the kjs take on the following values. Plugging these values back, we can similarly derive the minimum value of MSEF(p{Q}) to be asymptotically equal to Since H represents a further stratification of F (i.e. each stratum of H is wholly contained within some stratum of F), it is easy to see that asymptotically  The above arguments can be extended to the case when the workload contains more than one query. This concludes the proof of Lemma 1.% Proof of Lemma 5 As in the earlier proof of Lemma 3, we assume that (, (, r, k1, k2, , kr are all constants, while n, n1, n2, , nr may vary. Q will always represent a fixed COUNT query from the workload, whereas Q will always represent an incoming query drawn randomly from the distribution p{Q}. Also recall that we view R as being partitioned into h*r strata, where each stratum Rj has nj records, each with the same aggregate column value yj (which is positive). Since the proof is very similar to that of Lemma 3, we only highlight the important differences. Our notation will be similar to that used in Lemma 3. The equation corresponding to Equation (5) may be easily derived as As before, let us define MSE1 and MSE2 such that We shall first show that in the limit when n tends to infinity, MSE2 goes to 0. If we examine Equation (17), we can derive a crude (but simple) upper bound for SE(m1, m2, , mh*r) as follows: Important: Note that in the above derivation (especially in the first simplification), it is critical that all the yjs are of the same sign (i.e., either all positive or all negative). Using arguments similar to Lemma 3, we can conclude that  The rest of the proof is very similar to that of Lemma 3 and we omit the straightforward details. It can be shown that when n tends to infinity, the ratio  EMBED Equation.3  Thus we have  EMBED Equation.3  This concludes the proof of Lemma 5. % Appendix D: Binomial Distributions and Chernoff Bounds Consider a coin with bias  (i.e., when tossed the probability of head is ). Let m be the number of heads that occur when the coin is tossed independently n times. If we view m as a random variable, its probability distribution is as follows.  This is the well-known binomial distribution [22]. The expected value of m is  EMBED Equation.3  The variance of m is  EMBED Equation.3  As n gets large, it is known that the probability distribution gets tightly concentrated around its mean. This is quantified by the following Chernoff bound (where c is a constant) [22]  EMBED Equation.3   A conference version of this paper appeared in SIGMOD 2001 [8].  Appendix A contains a review of several classical sampling results from statistics.  In general, we allow a small constant number of additional columns with each record (e.g., see deterministic solution in Section 4).  This notation makes it convenient to give a single probability to the (infinite) set of queries that only syntactically differ in their WHERE clauses, yet select the same R. Note that the domain of p{Q} is finite, i.e. the power set of R. PAGE  PAGE 6  EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3  (17)  EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3  (16)  EMBED Equation.3   EMBED Equation.3   EMBED Equation.3  (14)  EMBED Equation.3   EMBED Equation.3  (13)  EMBED Equation.3  (12)  EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3  (11)  EMBED Equation.3  (10)  EMBED Equation.3  (15)  EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3  (9)  EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3   EMBED Equation.3  (8)  EMBED Equation.3  (7)  EMBED Equation.3   EMBED Equation.3  (6)  EMBED Equation.3  (5)  EMBED Equation.3   EMBED Equation.3  (4)  EMBED Equation.3   EMBED Equation.3  (3) Minimizing L( Minimizing L1 Minimizing MSE Congress Figure 17: L2 error for COUNT aggregate Figure 18: L2 error for SUM aggregate Figure 15. Error vs. Sampling Fraction (Test Set) Figure 16. Error vs. Sampling Fraction (Training Set) Figure 13. Variation in Data Skew: SUM Aggregate Figure 14. Comparison of running time to build sample Figure 11. Varying overlap between training set and test set Figure 12. Error vs. lifting parameters for test set Figure 9. GROUP BY only workload. COUNT Aggregate Test Set Figure 10. GROUP BY only workload. SUM Aggregate Test Set Figure 7. COUNT Aggregate Training Set Figure 8. SUM Aggregate Training Set Figure 5. COUNT Aggregate Test Set Figure 6. SUM Aggregate Test Set Figure 4. Algorithm STRAT For each query Q ( W, tag records in R used to answer query Q using the tagging algorithm described in Section 7.3.1 Let R1,, Ru be the fundamental regions after pruning out unimportant fundamental regions (see Section 7.3.2). For SUM queries, further divide each fundamental region Rj into h finer regions using the algorithm in Section 7.2.1. For each query Q ( W, compute (j of each (finer) region Rj referenced in Q, according to the formulas in Section 7.1.2 and 7.2.2. At the end of this step, we have computed an (j for each Rj. Solve the optimization problem of distributing k records to regions using the technique in Section 7.1.2. Let kj be the number of records allocated to region Rj. Perform stratified sampling to pick kj records from region Rj and generate a sample of R. Problem: SAMP Input: R, pW (a probability distribution function specified by W), and k Output: A sample of k records, (with the appropriate additional column(s)) such that the MSE(pW) is minimized (2) (1) Figure 3 R RQ R n4 n3 n2 n1 Figure 2. Fundamental Regions R4 80 90 R3 60 70 R2 40 50 R1 10 20 30 Q2 Q1 R Problem: FIXEDSAMP Input: R, W, k Output: A sample of k records (with appropriate additional columns) such that MSE(W) is minimized. Database Tables Samples Workload Build Samples Offline Online Answer set with error estimate Rewrite and Execute Incoming Query Figure 1. Architecture for Approximate Query Processing hmvwķįįyķįj}UmHnHujUmHnHumHnHu&j>*B*UmHnHphu0JmHnHuj0JUmHnHu mHnHuCJ jCJU5CJ OJQJ\^JaJ6CJ]aJCJaJ j0JU OJQJ^JCJ*UVhs   $ & Fxa$ $a$  lv0 M d  q  V'  V'  V' $d 7$8$H$]^a$d 7$8$H$]^ $ & Fxa$      * + , - . / 0 1 2 N O P Q R S ɽɯɽכɽɍɽyɽ&j>*B*UmHnHphujqUmHnHu&j>*B*UmHnHphujwUmHnHujUmHnHumHnHu mHnHu0JmHnHuj0JUmHnHu&j>*B*UmHnHphu,       + , - G H I J K L M N O k l m n q r  ïޡÍj_UmHnHu&j>*B*UmHnHphujeUmHnHu&j>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHujkUmHnHu.      ( ) * + . / B C D ^ _ ` a b c d e f ɽɯɽכɽɍɽyɽ&j>*B*UmHnHphujSUmHnHu&j>*B*UmHnHphujYUmHnHujUmHnHumHnHu mHnHu0JmHnHuj0JUmHnHu&j>*B*UmHnHphu,          : ; < = > ? f g h ïޡÍjA UmHnHu&j >*B*UmHnHphujG UmHnHu&j>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHujMUmHnHu.      N O P j k l n o p q r s ɽɯɽכɽɍɽyɽ&j >*B*UmHnHphuj5 UmHnHu&j >*B*UmHnHphuj; UmHnHujUmHnHumHnHu mHnHu0JmHnHuj0JUmHnHu&j >*B*UmHnHphu,   &'(*+,-./KLMNST^_`z{|~ïޡÍj#UmHnHu&j>*B*UmHnHphuj)UmHnHu&j >*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHuj/ UmHnHu. -;P+^+SXR^ V'  V'  V'   45689:;<=YZ[\ablmnɽɯɽכɽɍɽyɽ&j>*B*UmHnHphujUmHnHu&j>*B*UmHnHphujUmHnHujUmHnHumHnHu mHnHu0JmHnHuj0JUmHnHu&j>*B*UmHnHphu, -./IJKMNOPQRnoïޡÍjUmHnHu&j>*B*UmHnHphuj UmHnHu&j>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHujUmHnHu.opqvw  $%&()*+,-IJKLQRqrsɽɯɽכɽɍɽyɽ&jv>*B*UmHnHphujUmHnHu&j|>*B*UmHnHphujUmHnHujUmHnHumHnHu mHnHu0JmHnHuj0JUmHnHu&j>*B*UmHnHphu,;<=WXY[\]^_`|}ïޡÍjUmHnHu&jj>*B*UmHnHphujUmHnHu&jp>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHujUmHnHu.}~  $%&()*+,-IJKLOP\]^xɽɯɽכɽɍɽyɽ&jX>*B*UmHnHphujUmHnHu&j^>*B*UmHnHphujUmHnHujUmHnHumHnHu mHnHu0JmHnHuj0JUmHnHu&jd>*B*UmHnHphu,xyz|}~ 012LMNPQRSTUqrïޡÍjUmHnHu&jL>*B*UmHnHphujUmHnHu&jR>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHujUmHnHu.rstwx!"#$)*567Qɽɯɽכɽɍɽyɽ&j: >*B*UmHnHphujUmHnHu&j@>*B*UmHnHphujUmHnHujUmHnHumHnHu mHnHu0JmHnHuj0JUmHnHu&jF>*B*UmHnHphu,QRSUVWXYZvwxy~ïޡÍj"UmHnHu&j.">*B*UmHnHphuj!UmHnHu&j4!>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHuj UmHnHu.$%/01KLMOPQRSTpqrsxyɽɯɽכɽɍɽyɽ&j%>*B*UmHnHphuj$UmHnHu&j"$>*B*UmHnHphuj#UmHnHujUmHnHumHnHu mHnHu0JmHnHuj0JUmHnHu&j(#>*B*UmHnHphu,;<=WXY[\]^_`|}~ïޡÍj'UmHnHu&j'>*B*UmHnHphuj&UmHnHu&j&>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHuj%UmHnHu.56;<=>XYZ\]^_`a}~׻ׅ׳wj)UmHnHu&j)>*B*UmHnHphuj(UmHnHujUmHnHumHnHu jg0JmHnHu jd0JmHnHu mHnHu0JmHnHuj0JUmHnHu&j (>*B*UmHnHphu'_N=\Hjlmn{ d x7$8$H$ $ & Fxa$ V'  V'  V'  V' 5678=>efgɽɯɽכɽɍɽyɽ&j+>*B*UmHnHphuju+UmHnHu&j*>*B*UmHnHphuj{*UmHnHujUmHnHumHnHu mHnHu0JmHnHuj0JUmHnHu&j)>*B*UmHnHphu, #$+,-GHIKLMNOPlmnoqryz{ïåޗÃujc.UmHnHu&j->*B*UmHnHphuji-UmHnHu0JH*mHnHu&j,>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHujo,UmHnHu. 678:;<=>?[\]^ȼȮȼȼȌȼxȼjjQ1UmHnHu&j0>*B*UmHnHphujW0UmHnHu&j/>*B*UmHnHphuj]/UmHnHujUmHnHumHnHuj0JUmHnHu&j.>*B*UmHnHphu0JmHnHu mHnHu)#$%&9:;UVWYZ[\]^z{|}мКx&j3>*B*UmHnHphujE3UmHnHu&j2>*B*UmHnHphujK2UmHnHu&j1>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHu/3456^_`z{|~ïޡÍj36UmHnHu&j5>*B*UmHnHphuj95UmHnHu&j4>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHumHnHujUmHnHuj?4UmHnHu*%&'ABCEFGHIJfghiyz{ϵ߯ׯכύ߯ׯykj!9UmHnHu&j8>*B*UmHnHphuj'8UmHnHu&j7>*B*UmHnHphu mHnHuj-7UmHnHujUmHnHumHnHu0JmHnHuj0JUmHnHu&j6>*B*UmHnHphu*GHIcdeghijkln{!!b#j#''N'ɽɯɽɽɍɽ{{s5CJ\aJ6CJ]aJCJaJ jUj;UmHnHu&j:>*B*UmHnHphuj:UmHnHujUmHnHumHnHu&j9>*B*UmHnHphu0JmHnHu mHnHuj0JUmHnHu-{/$'))..133578C<?BeEI J.JL]O^O`OaObOcO$d a$ d 91&+DY d x7$8$H$N'W'Y'`'((((8)9)r)s)))'*(*q**\,k,L.g.3344777777777777888888888<<j?k????@ AAEEF FI.J8LCLpL}LMؽددCJaJj;CJEHUjK> CJUV jCJUCJH* 6CJ]CJj0JCJUaJ B*CJph CJH*aJCJaJ6CJ]aJCMM N NNNNNNNNN N!N2N3N5NXNYNNNNNN^O_OmOqOOPPPPRRS&S'S(S)S_S`SaSSSTTU VVVVVVVV~XX&Y'YHYIYXYfYrYYYYYYj~> UV jCJUCJH*jCJH*U 5CJH*\CJaJjUmHnHu jSCJ 6CJ]CJH*CJ 5CJ\EcOdOeOfOgOhOiOjOkOlOmOnOoOpOqOOTXXYYZ-ZZ[[(\ d 7$8$H$ d x7$8$H$$d a$YYYYZZZ(Z)Z*Z+ZZZ\ZZZZZZZZZZZ [ [ [ [[[[[[[[[[\\\\\(\)\<\=\>\?\G\H\I\r\\\\\\\\\\챨j~> UVjECJEHUj1~> UV 5CJH*\jBCJEHUj~> UVCJH*jd@CJEHUj~> UV 6CJ]CJ jCJUj>CJEHU;(\@\n\\\\ _1_aaaaaaadeeeeeeeee d 7$8$H$ $d xa$ d x7$8$H$$d a$\\\] ]R]S]]]^^O^R^^^^^__ _1___``aaaabb#b&b'b(b)bbbbbbbbbbbbbbboddeeeeeeeeeeGfHfKfLfjUmHnHuCJaJj56U\]aJ jCJ 5CJH*\ 5CJ\ jU 6CJ] jCJH*CJH*CJ jCJUjHCJEHUAeehhiikbl2mnomttwlz}*RJ d 7$8$H$ $d xa$ d x7$8$H$$d a$LfRfSfofpfffffffwgxggggg^h_hdhehwhhiiii5i6i7i8i:i;iCiDiiikkkkllllllmmmm m"m(m*m2m>mBmDmFmJmn n!n"n5noooo?oWpbp+q6qqqrrvs|sstttmtstttut5\ 5CJH*\CJH* 6CJ] 5CJ\CJH*CJXutwtxtztttttuuuuuu uuuuuuuuuuvvvv v v v:v;vXvYvZvvvvvvvvvvw wwwwwx`xlxpxvxxxxxzzzzzz{{%{&{-{.{7{8{>{?{c{f{g{h{{{||}||(R 5CJ\CJH*CJH*5\CJ 6CJ]Z$%&' XZrtz|JL;<?NOŠÊҊӊԊ֊׊؊ڊۊ܊<=@UVW[\]ًڋۋߋ 45STXjkl jCJjCJH*UCJH* 5CJ\ 5CJH*\CJ 6CJ]TԌՌ֌׌،ΎЎHJ\^(*Bߜȿ쵪짡 jCJ 5CJH*\ 6CJ] 5CJ\CJjVMCJEHUaJjz~> CJUVjJCJUaJj= CJUVaJjCJUaJjCJUaJmHnHuCJaJjCJUaJmHnHu5،D4ٕBߜZ  & F& $d x^a$ $d xa$ $d 7$8$H$a$ d 7$8$H$ߜIJ(%TVwyԡ֡HNFG7@PS{}æЦҦ>@RSTYZ[klmnqrz{|ɨʨHS jSCJCJH* 5CJ\ 5CJH*\ 6CJ]CJ jCJUUSstuz{|کɫfgpqvwȬ$%&+,-9:LMNOxЭѭŮƮǮ"Oǯȯ;HI%-IJbcƱDZ z{ 5CJ\ 5CJH*\ jSCJCJH* 6CJ]CJX ک"?O% gLX T~$d a$$a$x $d xa$%&'@K GJKLM!$gopqǵȵɵ*:45ILTUlmrst jCJ jPEHUj~> UV jU 5CJ\ jSCJCJH* 5CJH*\ 6CJ]CJNt.1234UVW[\]ʸ˸̸߸ 246Ⱥʺ̺PR¾·¾¾ȱȜ¾¾Ȝ¾Ⱦ jaCJ j[EHUjB~> UV 5CJ\ 5CJH*\CJH* 6CJ]CJ5 jXEHUj3i> UV jTEHUjCSc> UV jU jREHUjCc> UV=лһԻ )*+,OPcdefqr׼ؼܼݼ޼ !"&'+ijRT468Z\žԨ jdCJ jaCJ jfEHU jcEHUj~> UV j`EHUjq~> UV j:^EHUjM~> UV jU 5CJ\5\CJH* 6CJ]CJ 5CJH*\=\`bPQdefgXZ_`adef~em ^m$%Z[|}~TUCJ\ 6CJ\]6CJ jkCJUjF= CJUV jCJUCJH* 6CJ] jhEHUjM~> UV jU jgCJCJF3^m:dx0;$SXpq [w/O$d a$$a$$xa$x $d xa$KLM0;()*DGILx{}$%89:<OPQRXYlmnop»jM~> UV 5CJH*\5 jsEHUj3i> UV jzoEHUj09s> UV j?mEHUjCc> UV jU 5CJ\CJH*CJ 6CJ]BPRT  dk'_`pqjkKTWXdejk[\]^_ 5CJH*\ jCJ 5CJ\ 6CJ]CJH* jaCJCJ jU j]vEHUQ_`apqrQRST[w /OWXYlp9<#$%14:;6CJ jCJ jCJ 6CJ]CJCJH*CJH*XOITVWXYZ[\]^_`aZ8?8d  $d xa$ d 7$8$H$ d x7$8$H$HIJKLMVWtuwxTUruvwz=@ABCCc`abcde4566CJ 5CJH*\jUmHnHu 5CJ\ 6CJ]CJCJH*T689:?@B%/noWX  3456z|GQJ[ 5CJH*6CJjxCJEHUju> UV jCJUCJH* 6CJ]CJRJ[ 7"1X&&%&&&9*D*//001#$d a$d  $d xa$ BCIJTUZ[pqxy%&,/cd!"GHuvQRhq`e   # Q \ lnTY]doy}&+V[kr  6CJ\] 6CJ]CJH*CJ\CJ6CJY7""1DFLNZ\bdXL^!!B$C$%&&&]'q'((9*D*++X+\+`+h+,---- -----!-*-,-2-4-A-C-L-N-W--------- j6] jCJ OJQJ^JCJH* 6CJ]6CJCJCJ 5CJH*R-//8090T0V000Z1[111111 222444444444444444444444444444;5H5t99V:a:;;;;m<o<Ľ jgCJ jdCJ jUjku> UV jUjku> UV 5CJ\ jeCJUjku> UVCJ j{Ujku> UV jUjUmHnHu jCJUCJH* 6CJ]CJ911 2444444T6t99m<<<<<<<!=>/>|AAMC$Pa$$d a$$a$$xa$ $d xa$o<p<<<<<<<<<<<<<<<<<<<<<<<<====!=====>/>F>G>S>T>>>>>t?u?|AABBCCMCOCPChCĿסכסכסכווו 6CJ] jgCJ jdCJ jg jdCJ jUj8ou> UV jgUjRou> UVjUmHnHuCJ 5CJ\ j Ujq}= UV jUjou> UV jU7hCiCjClCCCCCCCCCaDbDFGGGGG2I?I@IIIgIhIqIuI%J&JJJKKKKK0K1K2K4KLKMKNKOKPKRKSKkKݿݿݿݿݴj5CJU\mHnHu jUjlu> UV jtUjlu> UVjUmHnHuCJH*H* jgCJ jdCJ 6CJ] 5CJ\CJ jUjmu> UV jU jUj = UV0MCCCCCCFG2IIIKPKKKKOObOmOOqPPQRR$ & F%a$ & F( & F & Fd  $d xa$$d a$kKlKmKnKoKpKKKKKKOObOmOOO'PTPPPQWQQR,R{RR SpSSSUTTTTUUUJUKULUnUoUUUVVqVVVV3WFWnWWWX&XCXhXXX Y 5CJKHOJQJUV\^JaJ jU j.U-j pu> 5CJKHOJQJUV\^JaJBR7SSrTTrUULVVWbWWX[XX!YuYYZZ[q[\|\\]]^ & Fd $ & F%a$tYuYYYY)ZZZZZ[&[8[9[Y[Z[[[m[n[[["\B\\\]M]]]]^```/`j`{``` a aaa'a(aLaMakalaaaaaaaaaaaaaaaaaaaa bbƽjE8= CJUVjQCJEHUj3= UV jCJUCJ 5CJ\CJH*CJH*jCJU jCJU 6CJ]CJE^``aaa6bNbbbbb c$c%cGd[dgggggh d x7$8$H$ d 7$8$H$ $d `a$$d a$ & Fd  $d xa$bbb6b7bJbKbLbMbNbjbxbbbbbbbbbbbbbbbbb c c c!c"c#c$c%cRcSchcicccGd^fqffffffffúⲩ⡘═H*6]6CJ]aJCJaJaJjCJEHUj4= UVj CJEHUj4= UVj CJEHUj4= UV 6CJ]joCJEHUj4= UVCJ jCJUCJ jCJUjjCJEHU3ffffffffggg g!g0g4g=g>gCgDggggggggggggggggggggggggggg hhhhhhh1h2h3h4hZh[hnh증jCJEHUj7= CJUV jCJUjCJEHUj<9= UVCJ jCJU jWEHUj6= UV jU 6CJ]CJ 5\aJaJH*H*6]:hZhrhhhhhi)i*ikll:mSmTmFpgprr2tt & Fd  $d `a$ $ & F'd a$ $d xa$ d x7$8$H$ d 7$8$H$$d a$nhohphqhrhhhhhhhhhhhhhhhhhii%i&i'i(i)i*iminioiiiiHjIjjjjj]k^k_kkkkkllllll!l¹䱨ۋCJH* 5CJ\5CJ5\H*6]CJaJjtCJEHUj == UVjCJEHUj<= UVjCJEHUj== UV 6CJ]CJCJ jCJUjCJEHUj4= UV6!l"l#l&l'l,l-lGlHl]l^lllllllllllm'm8m:m;mNmOmPmQmmmnnooooooooooFpgpppppppppppppppppppqq!q"q4q5qJqjU'CJEHUjF= CJUVj$CJEHUj>= CJUVj"CJEHUj== CJUV jCJUCJCJH* 6CJ]FJqKqrrvswsssssssssstt-t.t/t0ttttttuuuuuuuuuuuuvvv*v,v0vPvRvVv\v^vvvvvvvvvvvv{x|x~xxxxxy&y'y-y.yIyJyLyQyRyTyqyryyyyyji> UVjz)CJEHUj2~> UV jCJUCJH*CJ 6CJ]Stuyyyyy {O{{{{||2|4|5|6|{||"}j}}} d x7$8$H$ d 7$8$H$ $d xa$d $d a$ & Fd yyyyyOzRzUzVz\z]zzzzzzz{{ { { {{"{#{7{8{K{L{M{N{O{R{d{e{y{z{{{{{{{{{{{{{{{{|||ƽ쳪쥝쓊j5CJEHUj~> CJUV6CJ]aJCJaJj2CJEHUj~> CJUVj0CJEHUj~> CJUV jCJH*CJH* 6CJ]jUmHnHuCJ jCJUj+CJEHU6||.|/|0|1|2|3|5|{||||||||||| } }}} }!}#}$}<}=}Q}R}e}f}g}h}x}y}}}}}}}}}}}}}~~j4i> UVj@CJEHUj~> CJUVj=CJEHUj~> CJUVj;CJEHUj~> CJUVCJH* 6CJ]CJaJjCJUmHnHuj]8CJEHUjh> UVCJ jCJU3}}~~~~`~~~~Roр10 $d xa$ & Fx & Fd  d x7$8$H$$d a$~~~~~w~x~~~~~~~~~~~~~~~~~  :;NOPQoЀр췮줛쓊߅5CJCJaJjOCJEHUjXi> UVj{LCJEHUj~> CJUVjICJEHUjS~> CJUVj|GCJEHUj~> CJUVCJH* jCJH*jCJUmHnHuCJ jCJUjaDCJEHU1"#%&')*+012Ձ؁ghk12knopsɃʃ̓/0)*+ąŅȅɅʅ^_` %&'(*12345EFOPjUmHnHu jCJCJH* 6CJ] jgCJCJ jdCJTˆ؈ڈԊՊ#$%'()./0VXZ^`blnpŒČƌ&(*HJNPRpr΍ЍVWZ[ jRCJUjBqc> CJUVCJH* jCJU jCJ 6CJ]CJCJH*R΍VXYZȑÒJKLMNO۔ܔ $d xa$Ŏǎ\]ӏՏ()*+,-.0rs ,-KMYbdg|}~ÒĒےݒ jgCJ jdCJjoTCJEHUjB~> UV jCJU j-CJ jCJCJCJH* 6CJ]M689:;=>?DEFjknӓԓד./02349:;BCDEFHIJOPQޔߔCD>BX\hj– jCJUCJCJH* 6CJ]]ܔݔޔ–@DHJ™ę4xz|~Țʚ̚ $d xa$–Ė02XZ\^ޗ2468:@BJLbdf,.0248:<FHJ,.0246~",<>@FHnprtܛj;\CJEHUje> UV jYCJUje> CJUVCJH* 6CJ]jVCJEHUj3Jj> UVCJ jCJUG̚ΚКҚr6vžĞƞJLMNOPQSTU $d xa$ܛޛ>@fhrt*,.0JLjlnĝƝ؝ڝޝ   npvx乲 jbCJUjbh> CJUV jCJ jI`CJUj e> CJUVCJH* 6CJ]CJ jCJUjB^CJEHUj> UVFƞȞnp+-./02349:;JKQRUVpؠ٠-.vw֢ע ȿjQoCJEHUjvMj> UVjfCJEHRUj~> UVjUmHnHu jueCJUj#e> CJUV jCJUCJCJH* 6CJ]DUbcdefghij_vxy֢ $d xa$ $&)*KLefghinopqr  (*.02402df  IJ*+efJ jqCJUj=i> CJUVjUmHnHuCJH* jCJUCJH*CJ 6CJ]P   $d xa$  dlnprtIKLM $d xa$PrI4LDZ & F $d xa$JNPr:<BDNP\^`prxzIJʮͮήϮҮRUVWX[^abcdgBCDFGHMNOrsΰѰҰӰ԰װ45H jUjUmHnHu 6CJ] jCJCJH*CJ 5CJ\UHIJKLM˱̱ BCqrsuvw|}ƲDzʲ˲ѲҲ"#$IJĴƴԵֵ/0235689:<=>C jgCJ jdCJ 5CJ\CJH* 6CJ]jUmHnHuCJ jU jtEHUj[|> UVLDZȱɱʱ˱=>?@ABEFGHI³óijĴȴʴֵ $d xa$ֵWոָ׸Z3LY $d xa$ & FCDE_`bcdfghmnoLMNOlmrstWXŸȸɸ׸ظxz{|}34GHIJYZmnopr jzCJUj=i> CJUVjzwCJEHUjvMj> UV jCJUCJCJH* 6CJ]OYqr-/0123ghɾʾ7$a$ d  7$8$H$ d 7$8$H$ & Fd  $d xa$rҽԽ-.2J_~žƾǾȾ;ξXfno78CJH* 6CJ]CJj0JCJU j0JU jEHUj~> UV jUjCJUaJj4e> CJUVaJj|CJUaJj֪e> CJUVaJjCJUaJjUmHnHu6CJ]aJCJaJ47`a78!h]h!&`#$abuvwx 345689LMNO/ jEHUja|> UV jEHUjGs> UV jTEHUj9e> UV jEH|UjT"t> UV joEHUjܔi> UV jфEHUjoe> UV jU0J mHnHu0J j0J UCJCJH*5/012  *+,-45HIJKKL_`abdexyz{ž jREHUj> UV j(EHUjm> UV jEHUji> UV jEHUj[|> UV jEHUj[|> UV jEHUjY|> UV jtEHUjIa|> UV jU jEHUjY> UV034JKcd OPmnPQdefgnož jEHUjMj> UV jHEHUj UV j1EHUj+Nj> UV jEHUjmNj> UV jEHUjNj> UV jEHdUjFOj> UV jEHUji> UV jU j#EHUji> UV078JKcd|}opm89LMNOKL_`abdexyz{}~,ž jEHUj@i> UV j>EHUj9e> UV jjEHUj> UV jdEHUj> UV jEHUjmt> UV j=EHUj+> UV jEHUjܔi> UV jU jEHUjɘi> UV0,-./pqno+,?@ABDEXž jEHUji> UV jEHUji> UV jr EHUj"Jj> UV j EHUjLJj> UV jEHUj4d> UV j|EHUje> UV jEEHUjJj> UV jU jEHUje> UV0mn*+CD"#VWef~  *+:;DEF$d a$XYZ[#$789:WXklmnfgz{|} ()*+:;DFRSno{ž 5CJ\ 5CJH*\ j5CJH*\ 5CJ\ j*EHUjd> UV j'EHUjlc> UV j$EHUjrc> UV jY!EHUjd> UV jEHUjd> UV jU jEHUjfTc> UV3Fno345kl[\]$a${|45jkl[\]'(),34:; !,-^_- 6CJ] 6CJ\ jaCJ6CJCJH* 5CJ] jCJCJ5CJ 5CJ\ 5CJ\ 5CJH*\Kp/-.<jk$a$ & F h(d^`($d\$a$$a$-.5<AFGH{|$&')/124;=>@FHIKTVWZ[]_`hsx}5CJCJ6H*65H* 5CJ\CJ 5CJ\ 5CJH*\ 6CJ]CJ56CJ\]5CJ 56CJ]J$%(),/0347: "$ !a$" !:;<?@CFGJKNQTUXY\]_`s$a$$a$  "#*+JK_`pq 5CJ\ 6CJ] 5CJ\5\ 5CJ\ 6CJ]CJ 56CJ]#  "#*+JK_`pq$a$/ =!"#$%9 0001hP/ =!8"8#$%0 `'6 001hP/ =!8"8#$%0 `'301hP/ =!8"8#$%0 `'6 001hP/ =!8"8#$%0 `'}DyK _Toc511732115}DyK _Toc511732115}DyK _Toc511732116}DyK _Toc511732116}DyK _Toc511732117}DyK _Toc511732117}DyK _Toc511732118}DyK _Toc511732118}DyK _Toc511732119}DyK _Toc511732119}DyK _Toc511732120}DyK _Toc511732120}DyK _Toc511732121}DyK _Toc511732121}DyK _Toc511732122}DyK _Toc511732122}DyK _Toc511732123}DyK _Toc511732123}DyK _Toc511732124}DyK _Toc511732124}DyK _Toc511732125}DyK _Toc511732125}DyK _Toc511732126}DyK _Toc511732126}DyK _Toc511732127}DyK _Toc511732127}DyK _Toc511732128}DyK _Toc511732128}DyK _Toc511732129}DyK _Toc511732129}DyK _Toc511732130}DyK _Toc511732130}DyK _Toc511732131}DyK _Toc511732131}DyK _Toc511732132}DyK _Toc511732132}DyK _Toc511732133}DyK _Toc511732133}DyK _Toc511732134}DyK _Toc511732134}DyK _Toc511732135}DyK _Toc511732135}DyK _Toc511732136}DyK _Toc511732136}DyK _Toc511732137}DyK _Toc511732137}DyK _Toc511732138}DyK _Toc511732138}DyK _Toc511732139}DyK _Toc511732139}DyK _Toc511732140}DyK _Toc511732140}DyK _Toc511732141}DyK _Toc511732141}DyK _Toc511732142}DyK _Toc511732142}DyK _Toc511732143}DyK _Toc511732143}DyK _Toc511732144}DyK _Toc511732144}DyK _Toc511732145}DyK _Toc511732145}DyK _Toc511732146}DyK _Toc511732146}DyK _Toc511732147}DyK _Toc511732147}DyK _Toc511732148}DyK _Toc511732148}DyK _Toc511732149}DyK _Toc511732149}DyK _Toc511732150}DyK _Toc511732150}DyK _Toc511732151}DyK _Toc511732151}DyK _Toc511732152}DyK _Toc511732152}DyK _Toc511732153}DyK _Toc511732153}DyK _Toc511732154}DyK _Toc511732154}DyK _Toc511732155}DyK _Toc511732155}DyK _Toc511732156}DyK _Toc511732156}DyK _Toc511732157}DyK _Toc511732157}DyK _Toc511732158}DyK _Toc511732158}DyK _Toc511732159}DyK _Toc511732159}DyK _Toc511732160}DyK _Toc511732160}DyK _Toc511732161}DyK _Toc511732161}DyK _Toc511732163}DyK _Toc511732163}DyK _Toc511732164}DyK _Toc511732164}DyK _Toc511732165}DyK _Toc511732165}DyK _Toc511732166}DyK _Toc511732166}DyK _Toc511732167}DyK _Toc511732167}DyK _Toc511732168}DyK _Toc511732168}DyK _Toc511732169}DyK _Toc511732169}DyK _Toc511732170}DyK _Toc511732170}DyK _Toc511732171}DyK _Toc511732171}DyK _Toc511732172}DyK _Toc511732172}DyK _Toc511732173}DyK _Toc511732173}DyK _Toc511732174}DyK _Toc511732174}DyK _Toc511732175}DyK _Toc511732175}DyK _Toc511732176}DyK _Toc511732176}Dd8J  C A?  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~      !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|~x#$Root Entrys Fܶf%Data }H-WordDocumentrObjectPoolufܶf_1048660911dFffOle CompObjfObjInfo  #&'*-./2569<=@CFIJKLMNOPSVWX[^_cfilopsx{|}~ FMicrosoft Equation 3.0 DS Equation Equation.39q\ kf t f uu " () FMicrosoft Equation 3.0 DS EqEquation Native x_1048498573 F`af`afOle CompObj fuation Equation.39qI E(Q)=y"y'y FMicrosoft Equation 3.0 DS Equation Equation.39qObjInfo Equation Native  e_1048498687FPʲfPʲfOle  CompObj fObjInfoEquation Native v_1048498845F@HͲf@HͲfZؾ SE(Q)=y"y'y() 2 FMicrosoft Equation 3.0 DS Equation Equation.39q؊ SE(Q)=1gy i "y iOle CompObjfObjInfoEquation Native  'y i () 2i " FMicrosoft Equation 3.0 DS Equation Equation.39qk0Wl MSE(p W )=p(Q)SE(Q) Q _1048498993xIF@ϲf@*ҲfOle CompObjfObjInfoEquation Native _1048499100F0tԲf0tԲfOle !CompObj "f" FMicrosoft Equation 3.0 DS Equation Equation.39qf RMSE(p W )= MSE(p W ) ObjInfo!$Equation Native %_1036314029 S$F0ֲf0ֲfOle ( FMicrosoft Equation 3.0 DS Equation Equation.39q p {Q} (R')= n 2 (1") n 1  n 3 (1") n 4CompObj#%)fObjInfo&+Equation Native ,_1048506490A)F /ٲf /ٲfOle 0CompObj(*1fObjInfo+3Equation Native 4 FMicrosoft Equation 3.0 DS Equation Equation.39q p W (R')=w i p {Q i } (R') i=1q "_1048499229@.F ۲f ۲fOle 7CompObj-/8fObjInfo0: FMicrosoft Equation 3.0 DS Equation Equation.39q MSE(p W )=w i MSE(p {Q i } ) i=1q " FMicrosoft Equation 3.0 DS EqEquation Native ;_10466926253F6f6fOle >CompObj24?fuation Equation.39qS ApproxMSE(p {Q} )= FMicrosoft Equation 3.0 DS Equation Equation.39qObjInfo5AEquation Native Bo_1046696771P8FffOle DCompObj79EfObjInfo:GEquation Native H_1047121715Z=FffPԢ n j  2 k j 1"() R j "R Q  " +n j  2 k j 1"() Rj"R\R Q  " n jR j "R Q  " +n jR j "R\R Q  " () 2 FMicrosoft Equation 3.0 DS Equation Equation.39q 'lim n!" MSE(p {Q} )ApproxMSE(p {Q} Ole QCompObj<>RfObjInfo?TEquation Native U)=1 FMicrosoft Equation 3.0 DS Equation Equation.39q MSE(p W )= j k j1d"jd"r "_1048499522EBFffOle YCompObjACZfObjInfoD\Equation Native ]_1048499533GFgfgfOle `ObjInfoFHaXx>w;  j k j1d"jd"r " FMicrosoft Equation 3.0 DS Equation Equation.39qEquation Native bt_1048499569,NKFffOle dCompObjJLefObjInfoMgEquation Native hc_1048499612PFmfmfOle jG3<5 k j =k 1d"jd"r " FMicrosoft Equation 3.0 DS Equation Equation.39qpJ k j =k  j  i1CompObjOQkfObjInfoRmEquation Native n_1036404475UFpfpfd"id"r " () FMicrosoft Equation 3.0 DS Equation Equation.39q"p  f(y)  FMicrosoft Equation 3.0 DS EqOle qCompObjTVrfObjInfoWtEquation Native u>_1047738672iZFffOle vCompObjY[wfObjInfo\yuation Equation.39q`0 n j  2 k j y j  2 1"() R j "R Q  " +n j  2 k j y j  2 1"() Rj"R\R Equation Native z|_1047911630_FffOle CompObj^`fQ  " n j y jR j "R Q  " +n j y jR j "R\R Q  " () 2 FMicrosoft Equation 3.0 DS Equation Equation.39qObjInfoaEquation Native _1047882692d!FffOle  d w i x 1,i "x 2,i () 1d"id"m' "  2 !FMicrosoft Excel ChartBiff8Excel.Chart.89qOh+'0@CompObjcfbObjInfoWorkbookeg)SummaryInformation(h      !"%?'()*+,-./0123456789:;<=>BCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @\pVivek Narasayya Ba=  =9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1nArial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)                + ) , *     `D Chart1selGB_20_100_0.resAdHoc`iZR3  @@  u SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100u a  @M\\research\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` ;$` e%` e&` e'` v(` v)3dScM23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&(Q4$% MP+3O&'Q4FAA3OQt 3 b#M43*#M! M4% M3Oz&%Q .Sampling Fraction (%)'4% / MZ3O$N&&Q :Relative Error (L1 Metric)'4523  NM43" R 03OR 8% Mp73O&)Q4444% O M3O3&$Q IError Vs. Sampling Fraction: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44e????????????????@@@@@@@@$@$@$@$@ezPPVn?/H?Cp?;]?ҌE?LT?$ӡn?ިo?P&?!!?ދ?C+j? ĭ?PC?֌ ra?*b?"p?'.+=?2Pl?Ωd?OYM]? ?gA(?ْU?"J %?镲 q?\7V?$I?e> $ @  dMbP?_*+%"??UT````````` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA    _      _~ $@zPPVn?/H?Cp?;]?_~ 4@ҌE?LT?$ӡn?ިo?_~ ?P&?!!?ދ?C+j?~ ? ĭ?PC?֌ ra?*b?~ @"p?'.+=?2Pl?Ωd?~ @OYM]? ?gA(?ْU?~ $@"J %?镲 q?\7V?$I?       ~ $@bL{)?2=a?}vuŌ?ˠDt?~ 4@.&????Eb?~ ? v?ߢ?|DL$zY?ɐck?~ ?9τ&?Zc!?yTE?!!3?~ @f+/?O9&?Nwx6?yrM?~ @OYM]?@3?Nwx?Ӻ ?~ $@@I?}͑?מY?Է?8;TDXNXPNVVVVV4BVVVVVV-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`- - - -. . . . .~ /$@/1~٭?/+H3?/z6>W?/ފ5?~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?$4,4BVVVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV(  v  <NMM? ]`  @"??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`l  @"l??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`\  @"\??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`  @"??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`  @"??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee >@;;;7 H\t  viveknar Vivek NarasayyaMicrosoft Excel@[M@8&՜.+,0 PXp x Microsoft Corp. selGB_20_100_0.resAdHocChart1  WorksheetsChartsDocumentSummaryInformation84_1047882698qk!FufufOle PRINTjm&^0 !FMicrosoft Excel ChartBiff8Excel.Chart.89qOh+'0@H\t  viveknar Vivek NarasayyaMicrosoft Excel@[M@a&  + F   -''  Arialw@ 8 UwUw0- Arialw@\ UwUw0----- Arialw@ UwUw0-Arialw@  UwUw0-------"System 0-'- ---  !!---'--- -- - $MMM----'-- - I@ MM---'-- - -@- M MM---'-- -  -- M -MMee==---'-- -  !!---'-- - 8---'-- - 8- -  ??11ccgg{- ??8c8ckk- w??0c0cfft.?.?XX}c}c--  $~1~---'---  8 $---'---  8? $?T? *?---'---  81 $1F1---'---  8c $cxc0Nc---'---  8g $R g|gR---'---  8{ $f{q{f---'---  8- -  1r---'-- - 8 ---'-- - 8? T*---'-- - 8 2---'-- - 88c Mx#N---'-- - 8k  V---'-- - 8 nq---'-- - 8w-  -   $b1b---'-- -  8  $---'-- -  8?  $?T*?---'-- -  8  $---'-- -  80c  $cxENEc---'-- -  8f  $Q {{Q---'-- -  8t  $_q_---'-- -  8-  -  4q-r1r1---'-- - 8- -  ---'-- - 8.?- FW)-.?*.?CT.?C*.?T---'-- - 8X- pB-XCXmXmXC---'-- - 8}c- {gM-}chN}cx}cN}chx---'-- - 8-  -  ---'-- - 8- p-q  q---'-- - 8---'-- - 8---'-- - ---------'-- -  : F2 L*Error Vs. Sampling Fraction: SUM Aggregate-)-%-%<))))%%))-09-))%)%%12 (W-SEL-GB-100 Test Set, z=2)A--)40%%%(%%-%"'%----'-- - ----'-- - -----'-- - -  2 |0% 2 D0.2%% 2 ZD0.4%% 2 D0.6%% 2 %D0.8%%---'-- - ----'-- - -  2 0.1%% 2 0.2%% 2 0.5%% 2 1% 2 Q2% 2 5% 2 a10%%---'-- - -------'-- - =Qe '2 coSampling Fraction (%)-%<))))%%));----'-- - --------'-- - g:j  Arialw@s i UwUw0- 2 ]zRelative Error '-  Arialw@s j UwUw0- 2 + (L1 Metric)r- ----'-- - --- - A---'---  ?---'---  ?-  & &&\- $\k&\5M&\2 USAMP!%---'-- -  ?---'-- -  ?- v vv\-  kgM2 ^WSAMP.%---'-- - ?---'-- - ?-  \-  $\kM\2 STRAT!---'---  ?---'---  ?-   \- - (nL-\M\%k\%M\k2 OTLIDX$ !---'-- - ?---'-- - ----'-- - -- -   !!-- ' - '  'CompObjbObjInfoloWorkbookA%SummaryInformation(npHI @\pVivek Narasayya Ba=d/+< =89X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1nArial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)                + ) , *     `D Chart3selGB_20_100_0.resAdHoc`iZR3  @@  u SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100u a  @M\\research\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` v$` v%` v&` v'` v(` v)3d[#23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&(Q4$% MP+3O&'Q4FASm3OL J 3 b#M43*#M! M4% N ;M3Oz&%Q .Sampling Fraction (%)'4% v} MZ3O$N&&Q :Relative Error (L1 Metric)'4523  NM43" A *3OB % Mp73O&)Q4444% O M3O$&$Q GError Vs. Sampling Fraction: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44e????????????????@@@@@@@@$@$@$@$@e1~٭?+H3?z6>W?ފ5?Ct ?.T ?[ ?$z?v~k?uX?1>^?r0C?K."?}A?Ĵo?I?*dq?}?QۆQ /5B? @  dMbP?_*+%"??UT````````` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA    _      _~ $@zPPVn?/H?Cp?;]?_~ 4@ҌE?LT?$ӡn?ިo?_~ ?P&?!!?ދ?C+j?~ ? ĭ?PC?֌ ra?*b?~ @"p?'.+=?2Pl?Ωd?~ @OYM]? ?gA(?ْU?~ $@"J %?镲 q?\7V?$I?       ~ $@bL{)?2=a?}vuŌ?ˠDt?~ 4@.&????Eb?~ ? v?ߢ?|DL$zY?ɐck?~ ?9τ&?Zc!?yTE?!!3?~ @f+/?O9&?Nwx6?yrM?~ @OYM]?@3?Nwx?Ӻ ?~ $@@I?}͑?מY?Է?8;TDXNXPNVVVVV4BVVVVVV-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`- - - -. . . . .~ /$@/1~٭?/+H3?/z6>W?/ފ5?~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?$4,4BVVVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV(  v  <NMM? ]`0  @"0??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`  @"??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`p  @"p??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`  @"??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`  @"??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee >2@;;;7 DocumentSummaryInformation84_1047882725s!F`'f`'fOle CompObjrub՜.+,0 PXp x Microsoft Corp. selGB_20_100_0.resAdHocChart3  WorksheetsCharts !FMicrosoft Excel ChartBiff8Excel.Chart.89qObjInfoWorkbooktv1SummaryInformation(wDocumentSummaryInformation84Oh+'0@H\t  viveknar Vivek NarasayyaMicrosoft Excel@[M@~&՜.+,0 PXp x Microsoft Corp. @\pVivek Narasayya Ba=T^* =G9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1iArial1Arial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)                + ) , *     `D Chart2selGB_20_100_0.resAdHoc`iZR3  @@  u SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100u a  @M\\research\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` v$` v%` v&` v'` v(` g )远3dT#23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&'Q4$% MP+3O&&Q4FA?S3OX 8 3 b#M43*#M! M4% E 6M3Oz&$Q .Sampling Fraction (%)'4%  MZ3O$N&%Q :Relative Error (L1 Metric)'4523  NM43" D (3OC z% Mp73O&(Q4444% Pk 3M3O&)Q MError Vs. Sampling Fraction: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44e????????????????@@@@@@@@$@$@$@$@ebL{)?2=a?}vuŌ?ˠDt?.&????Eb? v?ߢ?|DL$zY?ɐck?9τ&?Zc!?yTE?!!3?f+/?O9&?Nwx6?yrM?OYM]?@3?Nwx?Ӻ ?@I?}͑?מY?Է?e> Է @  dMbP?_*+%"??UT````````` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA    _      _~ $@zPPVn?/H?Cp?;]?_~ 4@ҌE?LT?$ӡn?ިo?_~ ?P&?!!?ދ?C+j?~ ? ĭ?PC?֌ ra?*b?~ @"p?'.+=?2Pl?Ωd?~ @OYM]? ?gA(?ْU?~ $@"J %?镲 q?\7V?$I?       ~ $@bL{)?2=a?}vuŌ?ˠDt?~ 4@.&????Eb?~ ? v?ߢ?|DL$zY?ɐck?~ ?9τ&?Zc!?yTE?!!3?~ @f+/?O9&?Nwx6?yrM?~ @OYM]?@3?Nwx?Ӻ ?~ $@@I?}͑?מY?Է?8;TDXNXPNVVVVV4BVVVVVV-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`- - - -. . . . .~ /$@/1~٭?/+H3?/z6>W?/ފ5?~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?$4,4BVVVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV(  v  <NMM? ]`  @"??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`P  @"P??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`@  @"@??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`  @"??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`  @"??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee >@;;;7 selGB_20_100_0.resAdHocChart2  WorksheetsCharts !FMicrosoft Excel ChartBiff8Excel.Chart.89qOh+'0@H\t  _1047882729sz!F0:f0:fOle CompObjy|bObjInfoWorkbook{}SummaryInformation(~DocumentSummaryInformation84_1047883522!FKfKf      !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGJMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @\pVivek Narasayya Ba=h<L =9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1iArial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)                + ) , *     `D Chart4zselGB_20_100_0.resAdHoc`iZR3  @@  u SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100u a  @M\\research\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` v$` v%` v&` v'` v(` v)@3dH&23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&(Q4$% MP+3O&'Q4FA83O] ' 3 b#M43*#M! M4% S+ M3Of&%Q &Sampling Rate (%)'4% }MZ3O$b&&Q 8Relative Error (L1 Metric)'4523  NM43" O  3OO % Mp73O&)Q4444% )QN M3O$&$Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44e????????????????@@@@@@@@$@$@$@$@e9EGr?bqm?Dt?&`6?&pnj?=Kez?f?rfB?le?*;.R?>VP?g#M)?͏O?QG?W\9?>'I?JiW?ė"n?() ,?Ry=?(\?"?K8? ]?oG8-x?]k?iUMu>&b?e> & @  dMbP?_*+%"??UT````````` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA    _      _~ $@zPPVn?/H?Cp?;]?_~ 4@ҌE?LT?$ӡn?ިo?_~ ?P&?!!?ދ?C+j?~ ? ĭ?PC?֌ ra?*b?~ @"p?'.+=?2Pl?Ωd?~ @OYM]? ?gA(?ْU?~ $@"J %?镲 q?\7V?$I?       ~ $@bL{)?2=a?}vuŌ?ˠDt?~ 4@.&????Eb?~ ? v?ߢ?|DL$zY?ɐck?~ ?9τ&?Zc!?yTE?!!3?~ @f+/?O9&?Nwx6?yrM?~ @OYM]?@3?Nwx?Ӻ ?~ $@@I?}͑?מY?Է?8;TDXNXPNVVVVV4BVVVVVV-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`- - - -. . . . .~ /$@/1~٭?/+H3?/z6>W?/ފ5?~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?$4,4BVVVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV(  v  <NMM? ]`  @"??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`   @" ??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`!  @"!??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]``!  @"`!??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`P"  @"P"??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee >.@;;;7 viveknar Vivek NarasayyaMicrosoft Excel@[M@J&՜.+,0 PXp x Microsoft Corp. selGB_20_100_0.resAdHocChart4  WorksheetsChartsOle CompObjbObjInfoWorkbookL !FMicrosoft Excel ChartBiff8Excel.Chart.89qOh+'0@H\t  viveknar Vivek NarasayyaMicrosoft Excel@[M@(՜.+,0 Pxw @\pVivek Narasayya Ba= _n =9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)                + ) , *     $  `fChart1-#selGB_20_100_0.resAdHoc`iZR  3  @@  3 SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100PURE GBCONGNEG CORR (Test Set)POS CORR (Test Set)z=2 Running Time W-SEL-100W-GB-100TaggingSampling" fa  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` B` C` D` E` F` G3dm23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D$% MP+3O&FQ4$% MP+3O&EQ4FAys3O=G e 3 b#M43*#M! M4%  M3Oz&CQ .Sampling Fraction (%)'4% /8MZ3O$N&DQ :Relative Error (L1 Metric)'4523  NM43"  83O <% Mp73O&GQ4444% R M3O3&BQ BError Vs Sampling Fraction: COUNT Aggregate W-GB-100 Test Set, Z=2'44e????????????????????@@@@@@@@@@$@$@$@$@$@eog_y?t??M^i?k3 ??i^`V?C?pa?r?GL ?TO? a*V?nJy?EИ?fH?-ex?RG?ŭ?B?  R? Rr%?A !?!<8b-?3?^SH?d]?3O> bG?;?%\#?Zc!?_>Y1\>'2?3Y?e> 3Y @  dMbP?_*+%"??U} I }  }  }  ````````` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA    _      _~ $@zPPVn?/H?Cp?;]?_~ 4@ҌE?LT?$ӡn?ިo?_~ ?P&?!!?ދ?C+j?~ ? ĭ?PC?֌ ra?*b?~ @"p?'.+=?2Pl?Ωd?~ @OYM]? ?gA(?ْU?~ $@"J %?镲 q?\7V?$I?       ~ $@bL{)?2=a?}vuŌ?ˠDt?~ 4@.&????Eb?~ ? v?ߢ?|DL$zY?ɐck?~ ?9τ&?Zc!?yTE?!!3?~ @f+/?O9&?Nwx6?yrM?~ @OYM]?@3?Nwx?Ӻ ?~ $@@I?}͑?מY?Է?8;TDXNXPNVVVVV4BVVVVVV-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`- - - -. . . . .~ /$@/1~٭?/+H3?/z6>W?/ފ5?~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?$4,4BVVVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV``a`b`c`d`e`f`g`h`l`m`n`o`p`q`r`s`t`` ` ` ` `a a a a a a~ b$@bog_y?bt?b?bM^i?bk3 ?~ c4@c?ci^`V?cC?cpa?cr?~ d?dGL ?dTO?d a*V?dnJy?dEИ?~ e?efH?e-ex?eRG?eŭ?eB?~ f@f  R?f Rr%?fA !?f!<8b-?f3?~ g@g^SH?gd]?g3O>g bG?g;?~ h$@h%\#?hZc!?h_>Y1\>h'2?h3Y?l l l l lm m m m m m~ n$@nA?n|ԛ?n>?ni&?nwٯ;?~ o4@o'p?oN?o|?o/.Ui?o~Ϛ?~ p?pYe?p5?p+ٱA?p Pj?p>9 ?~ q?q5؀?q2?qy76?q$F?qn2d?r@PL@rt|8c?r6w\&?r?r)Wx?~ s@sݔZ ?s j֍?sZQf?soe2?st_lW?~ t$@tؼZ`?t9?t'eRC>t^?tI)?(2TBPhhhhhhhBPhhhh^h````````````````````        ~ $@%z?x҆?txH?N ^?~ 4@}?s?*.?5x_?~ ?l\?Ȳ`?4?~ ?g?\Z {?DKO?6?~ @ ??B˺?Qv0b?~ @TTJg??l g?0*D?~ $@ t?Ӝ?4?#G?,h|((BVVVVVVV((BVVVVVV`````````````            *@.@@H@@=@ *d@.@$@L@d@=@ *@.@@J@@=@       @d@@  .@.@.@   @$@@  M@@S@Q@   @d@@ B@B@B@V,R^<<<FFBFB8( (  v  <NMM? ]`l  @"l??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`  @"??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`  @"??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`  @"??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`  @"??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee ~v  <NMM?_ n]`<  @"<??3` %` &` '` (` )` *)3d23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D $% MP+3O&'Q4$% MP+3O&(Q4FA(M3O 3 b#M43*#M! M4% b JUM3Oj&)Q &Sampling Rate (%)'4% r.MZ3O&&*Q BAvg. Relative Error (L1 Metric)'4523  NM43" Y &3OY &% Mp73O&%Q4444% DN" lM3O(&&Q >Error Vs Sampling Rate: COUNT Aggregate W-GB-100 Test Set, Z=2'44eee ~v  <NMM?o- ~]`  @"??3` v+` v,` v-` v.` v/` v0+3d2L23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% MP+3O&/Q4$% MP+3O&.Q4FA  3O.B r 3 b#M43*#M! M4%  ?UM3Oj&,Q &Sampling Rate (%)'4% qtMZ3O&&-Q BAvg. Relative Error (L1 Metric)'4523  NM43" a 3Oa % Mp73O&0Q4444% ^N.M3O$&+Q |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44eee ~v   <NMM? ] `  @"??3` v2` v3` v4` v5` v6` v7п;3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&6Q4$% MP+3O&5Q4FA+9 3O7|  3 b#M43*#M! M4%  MM3Of&3Q &Sampling Rate (%)'4% kIMZ3O&4Q 8Relative Error (L1 Metric)'4523  NM43"  u3O t% Mp73O&7Q4444% O;M3O$&2Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-NEG Test Set, z=2)'44eee ~v   <NMM?0Z P] `   @" ??3` v8` v9` v:` v;` v<` v=?:3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&<Q4$% MP+3O&;Q4FA^>R 3OCn / 3 b#M43*#M! M4%  FM3Oj&9Q &Sampling Rate (%)'4% lydMZ3O&:Q 8Relative Error (L1 Metric)'4523  NM43"  f3O % Mp73O&=Q4444% ]J3M3O&&8Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-POS Test Set, z=2)'44eee xp   6NMM?p ] `   @" ??3` v1` v>` v?` v@` vA?3d23 M NM4 3Q ;  WSAMP TaggingQ ;Q ;Q3_4E4 3Q ;  WSAMP SamplingQ ;Q ;Q3_4E4 3Q ;  STRAT TaggingQ ;Q ;Q3_4E4 3Q ;  STRAT SamplingQ ;Q ;Q3_4E4 3Q ;  OTLIDX TaggingQ ;Q ;Q3_4E4 3Q ; "OTLIDX SamplingQ ;Q ;Q3_4E4D $% MP+3O&AQ4$% MP+3O&@Q4FAT;3O=  3 b#M43*#M! M4%  7KM3O;&>Q Workload'4% j:MZ3Oq&>Q (Running Time (sec)'4523  NM43"  +3O +% Mp73O&?Q4444% K ZM3OA(&1Q x:Comparison of Running Time for Creating Sample (f=1%, z=2)'44eee >]@  7 SummaryInformation(DocumentSummaryInformation84_10368116331!FPfPfOle Xp x Microsoft Corp. selGB_20_100_0.resAdHocChart1  WorksheetsCharts !FMicrosoft Excel ChartBiff8Excel.Chart.89qPRINT|;CompObjbObjInfoWorkbook?   ?   ''  Arialw@6 UwUw0- Arialw@ UwUw0----- Arialw@ UwUw0-Arialw@q UwUw0-------"System 0-'- -- }!!---'--- - - $----'-- - R[9 9rr---'-- - R[-  ---'-- -  -  -99rr__uu---'-- - }!!---'-- - ---'-- - - -  ??Z0Z0FF\- *//]0]0FF\- <00FF\800FF-\- f00FF\- - $/2---'-- -  ? $*?T?*---'-- -  Z0 $0EEZ0oZ0E---'-- -   $oo---'-- -  F $Fn[F1Fn---'-- -   $---'-- -  \ $\q\G\---'-- -  *- -   ?/---'---  / D---'---  ]0 rEH---'---   u---'---  F [v1---'---   ---'---  \ qG---'---  <- -   $'/QQ'---'-- -   $---'-- - 0  $0E0---'-- -   $---'-- - F  $F[1F---'-- -   $---'-- - \  $\qG\---'-- - 8-  - P2"-8#8M/8M8#/---'-- - - ----'-- - 0- H-00E00E---'-- - - ----'-- - F- ^0-F1F[F1F[---'-- - - 4-11---'-- - -\- EtF--\G-\Bq-\BG-\q---'-- - f- - ~2P-fQf{/f{fQ/fQf{---'-- - - ----'-- - 0- H-00E00E0000---'-- - - ----'-- - F- ^0-F1F[F1F[FFFF---'-- - - ----'-- - \- tF-\G\q\G\q\\\\---'-- - ---'-- - ---'-- - --------'-- -  ]:A ?2 LK%Error Vs Sampling Rate: SUM Aggregate-)-%-%<)))0%%-09-))%)%%(2 ZW-GB-100 Test Set, z=2A40%%%(%%-%"'%----'-- - ---'-- - ----'-- -   2 t}0% 2 E0.2%% 2 E0.4%% 2 JE0.6%% 2 E0.8%% 2 }1%---'-- - ---'-- -   2 0.1%% 2 w0.2%% 2 0.5%% 2 1% 2 42% 2 5% 2 710%%---'-- - ------'-- - 7 !2 ISampling Rate (%)-%<)))0%%;----'-- - -------'-- - ;k  Arialw@i U UwUw0- 2 {Relative Error -  Arialw@i V UwUw0- 2 M (L1 Metric)r- ----'-- - -- - S[---'---  QZ---'---  QZ-   @- $@M@3@2 USAMP %---'-- -  QZ---'-- -  QZ- B BB@-  OM532 +WSAMP,%---'-- - QZ---'-- - QZ-  @-  $@M3@2 xSTRAT ---'---  QZ---'---  QZ-   @- - P2-@3@M@3@M2 OTLIDX" ---'-- - QZ---'-- - QZ- ) ))@- - 9P2-)@3)@6M)@63)@M)@@)@6@ 2 CONG " "---'-- - QZ---'-- - ---'-- - - -  }!!-- '  '  '      !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvzyvu}~ @\pVivek Narasayya Ba=o~ =<X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1iArial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)                + ) , *     $  `fChart2#selGB_20_100_0.resAdHoc`iZR  3  @@  3 SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100PURE GBCONGNEG CORR (Test Set)POS CORR (Test Set)z=2 Running Time W-SEL-100W-GB-100TaggingSampling" fa  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` vB` vC` vD` vE` vF` vG远3d23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% M 3O&FQ4$% M 3O&EQ4FA 3OW>Y 3 b#M43*#M! M4% 1 GM3Of&CQ &Sampling Rate (%)'4% w MZ3O$N&DQ :Relative Error (L1 Metric)'4523  NM43" 6 k53O6 s% M,3O&GQ4444% P| M3O$&BQ |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44e????????????????????@@@@@@@@@@$@$@$@$@$@eA?|ԛ?>?i&?wٯ;?'p?N?|?/.Ui?~Ϛ?Ye?5?+ٱA? Pj?>9 ?5؀?2?y76?$F?n2d?Q?t|8c?6w\&??)Wx?ݔZ ? j֍?ZQf?oe2?t_lW?ؼZ`?9?'eRC>^?I)?e> I @  dMbP?_*+%"??U} I }  }  }  ````````` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA    _      _~ $@zPPVn?/H?Cp?;]?_~ 4@ҌE?LT?$ӡn?ިo?_~ ?P&?!!?ދ?C+j?~ ? ĭ?PC?֌ ra?*b?~ @"p?'.+=?2Pl?Ωd?~ @OYM]? ?gA(?ْU?~ $@"J %?镲 q?\7V?$I?       ~ $@bL{)?2=a?}vuŌ?ˠDt?~ 4@.&????Eb?~ ? v?ߢ?|DL$zY?ɐck?~ ?9τ&?Zc!?yTE?!!3?~ @f+/?O9&?Nwx6?yrM?~ @OYM]?@3?Nwx?Ӻ ?~ $@@I?}͑?מY?Է?8;TDXNXPNVVVVV4BVVVVVV-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`- - - -. . . . .~ /$@/1~٭?/+H3?/z6>W?/ފ5?~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?$4,4BVVVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV``a`b`c`d`e`f`g`h`l`m`n`o`p`q`r`s`t`` ` ` ` `a a a a a a~ b$@bog_y?bt?b?bM^i?bk3 ?~ c4@c?ci^`V?cC?cpa?cr?~ d?dGL ?dTO?d a*V?dnJy?dEИ?~ e?efH?e-ex?eRG?eŭ?eB?~ f@f  R?f Rr%?fA !?f!<8b-?f3?~ g@g^SH?gd]?g3O>g bG?g;?~ h$@h%\#?hZc!?h_>Y1\>h'2?h3Y?l l l l lm m m m m m~ n$@nA?n|ԛ?n>?ni&?nwٯ;?~ o4@o'p?oN?o|?o/.Ui?o~Ϛ?~ p?pYe?p5?p+ٱA?p Pj?p>9 ?~ q?q5؀?q2?qy76?q$F?qn2d?r@PL@rt|8c?r6w\&?r?r)Wx?~ s@sݔZ ?s j֍?sZQf?soe2?st_lW?~ t$@tؼZ`?t9?t'eRC>t^?tI)?(2TBPhhhhhhhBPhhhh^h````````````````````        ~ $@%z?x҆?txH?N ^?~ 4@}?s?*.?5x_?~ ?l\?Ȳ`?4?~ ?g?\Z {?DKO?6?~ @ ??B˺?Qv0b?~ @TTJg??l g?0*D?~ $@ t?Ӝ?4?#G?,h|((BVVVVVVV((BVVVVVV`````````````            *@.@@H@@=@ *d@.@$@L@d@=@ *@.@@J@@=@       @d@@  .@.@.@   @$@@  M@@S@Q@   @d@@ B@B@B@V,R^<<<FFBFB8( (  v  <NMM? ]`@7  @"@7??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% M 3O&Q4$% M 3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % M,3O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`7  @"7??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% M 3O&Q4$% M 3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % M,3O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`8  @"8??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% M 3O&Q4$% M 3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % M,3O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`8  @"8??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% M 3O&Q4$% M 3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% M,3O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`9  @"9??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% M 3O&#Q4$% M 3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % M,3O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee ~v  <NMM?_ n]`:  @":??3` %` &` '` (` )` *)3d23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D $% M 3O&'Q4$% M 3O&(Q4FA(M3O 3 b#M43*#M! M4% b JUM3Oj&)Q &Sampling Rate (%)'4% r.MZ3O&&*Q BAvg. Relative Error (L1 Metric)'4523  NM43" Y &3OY &% M,3O&%Q4444% DN" lM3O(&&Q >Error Vs Sampling Rate: COUNT Aggregate W-GB-100 Test Set, Z=2'44eee ~v  <NMM?o- ~]`:  @":??3` v+` v,` v-` v.` v/` v0+3d2L23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% M 3O&/Q4$% M 3O&.Q4FA  3O.B r 3 b#M43*#M! M4%  ?UM3Oj&,Q &Sampling Rate (%)'4% qtMZ3O&&-Q BAvg. Relative Error (L1 Metric)'4523  NM43" a 3Oa % M,3O&0Q4444% ^N.M3O$&+Q |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44eee ~v   <NMM? ] `;  @";??3` v2` v3` v4` v5` v6` v7п;3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% M 3O&6Q4$% M 3O&5Q4FA+9 3O7|  3 b#M43*#M! M4%  MM3Of&3Q &Sampling Rate (%)'4% kIMZ3O&4Q 8Relative Error (L1 Metric)'4523  NM43"  u3O t% M,3O&7Q4444% O;M3O$&2Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-NEG Test Set, z=2)'44eee ~v   <NMM?0Z P] `;  @";??3` v8` v9` v:` v;` v<` v=?:3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% M 3O&<Q4$% M 3O&;Q4FA^>R 3OCn / 3 b#M43*#M! M4%  FM3Oj&9Q &Sampling Rate (%)'4% lydMZ3O&:Q 8Relative Error (L1 Metric)'4523  NM43"  f3O % M,3O&=Q4444% ]J3M3O&&8Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-POS Test Set, z=2)'44eee xp   6NMM?p ] `$%  @"$%??3` v1` v>` v?` v@` vA?3d23 M NM4 3Q ;  WSAMP TaggingQ ;Q ;Q3_4E4 3Q ;  WSAMP SamplingQ ;Q ;Q3_4E4 3Q ;  STRAT TaggingQ ;Q ;Q3_4E4 3Q ;  STRAT SamplingQ ;Q ;Q3_4E4 3Q ;  OTLIDX TaggingQ ;Q ;Q3_4E4 3Q ; "OTLIDX SamplingQ ;Q ;Q3_4E4D $% M 3O&AQ4$% M 3O&@Q4FAT;3O=  3 b#M43*#M! M4%  7KM3O;&>Q Workload'4% j:MZ3Oq&>Q (Running Time (sec)'4523  NM43"  +3O +% M,3O&?Q4444% K ZM3OA(&1Q x:Comparison of Running Time for Creating Sample (f=1%, z=2)'44eee >f@  7 SummaryInformation(DocumentSummaryInformation84_1047883602!FefefOle Oh+'0@H\t  viveknar Vivek NarasayyaMicrosoft Excel@[M@`~X՜.+,0 PXp x Microsoft Corp. selGB_20_100_0.resAdHocChart2  WorksheetsCharts !FMicrosoft Excel ChartBiff8Excel.Chart.89qOh+'0@H\t  viveknar Vivek NarasayyaMicrosoft Excel@PRINT|.CompObjbObjInfoWorkbookؿK U <   y''   Arialw@ UwUw0- Arialw@M$ UwUw0----- Arialw@ UwUw0---------"System 0-'- y-- X!!---'--- y-- $----'--- 6 }}00IIbb---'--- y6-  ---'---  y-  -}}00IIbb))AA)))::))---'--- X!!---'--- /---'--- ,--  ==7- @@^=^=}}7- X""==%%dd{7<^^|=|=7--  $ss---'---  , $~~---'---  ,= $=yR=(=y---'---  , $---'---  , ${y{---'---  ,7 $7L7"7---'---  ,- -  3 ---'-- - ,@ U+---'-- - ,^= sRI(---'-- - ,} h---'-- - , y---'-- - ,7 L"---'-- - ,X-  -   $CmmC---'-- -  ,"  $ 77 ---'-- -  ,=  $=R(=---'-- -  ,%  $::---'-- -  ,d  $OyyyO---'-- -  ,{7  $7fL"7f---'-- -  ,<- -  T&-<'<Q<Q<'---'--- ,^- vH-^I^s^s^I---'--- ,|=- Uf'-|=g(|=R|=(|=gR---'--- ,- ----'--- ,- x-yy---'--- ,7- O!-7"7L7"7L---'--- ,---'--- /---'--- y--------'--- = :2 M"Error vs. Overlap: COUNT aggregate'# -  #*-**# ## #  +2 R(z=2, f=1%,W-SEL-GB-100)" " 37''#-* ----'--- y---'--- y----'--- y  2 L0$ 2 0.1  2 Y0.2  2 0.3  2 0.4  2 r0.5  2 %0.6  2 0.7  2 0.8  2 >0.9  2 L1$ ---'--- y---'--- y  2 J0% 4 2 J[20% 4 2 J40% 4 2 J60% 4 2 JT80% 4 2 J100% 4---'--- y-------'--- _ <2 #Degree of overlap between test set * # ##  ## - #  2 and training set ## ### ----'--- y------'--- \^  Arialw@ UwUw0- 2 TlRelative Error #-  Arialw@ UwUw0- 2 * (L1-Metric)r- ----'--- y-- - 6---'---  5---'---  5-   O- $2 [USAMP$!!)!---'-- -  5---'-- -  5- B BOB-  R22 '[WSAMP2!!)!---'-- - 5---'-- - 5-  O-  $2 |[STRAT!$!---'---  5---'---  5-   O-- -2 [OTLIDX'$!---'--- 5---'--- y---'--- y- - X!!-- ' y '  '      !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstyz{|}~ @\pVivek Narasayya Ba=  =w9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1.Times New Roman1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1xArial1Arial1Arial1Arial1Arial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)"Yes";"Yes";"No""True";"True";"False""On";"On";"Off"                + ) , *     $  P 8 P x "  A `Chart3'selGB_20_100_0.resAdHoc`iZR3  @@   SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100PURE GBCONGNEG CORR (Test Set)POS CORR (Test Set)z=2 Running Time W-SEL-100W-GB-100TaggingSamplingf=1%, z=2, W-SEL-GB, SUMMSSalesSUM (Test Set)SUM (Training Set) L2 metric"V aa  @M\\research\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` g xr` gxs` g t` gxu` Qv?3dJ/y23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&rQ4$% MP+3O&sQ4FA83O& 3 b#M43*#M! M4%  M 3O*&vQ j3Degree of overlap between test set and training set'4% `zGMZ3OL&uQ :Relative Error (L1-Metric)'4523  NM43"  3O % Mp73OQ4444% Y M3O&tQ z;Error vs. Overlap: COUNT aggregate (z=2, f=1%,W-SEL-GB-100)'44e????????333333?333333?333333?333333?????????e唀?͎T?ʃ9?bc?S ?>?O?'*V?v=T?'vU?'?m3?Z`?%9?Yk(?Z?M(D?D?qS?.o?9`?ŐL*?n?gx?e> gx @  dMbP?_*+%M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??U} I }  }  }  ````````` ` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA&   _        _           _~ $@zPPVn?/H?Cp?;]?~ $@9?8d˩?h4 ;?n!a?_~ 4@ҌE?LT?$ӡn?ިo?~ 4@1zn+?\u??ލA?ݰm?_~ ?P&?!!?ދ?C+j?~ ? ?T?g?9?~ ? ĭ?PC?֌ ra?*b?~ ?Xm_u?J\߇?o?, ƈ?~ @"p?'.+=?2Pl?Ωd?~ @^hHK?`;O?.KR?4fS?~ @OYM]? ?gA(?ْU?~ @PۆQ?Ɋ?s(CUL?@"?~ $@"J %?镲 q?\7V?$I?~ $@/kb?{24?P3ͅ?ɍ"k ?   &    "~ $@bL{)?2=a?}vuŌ?ˠDt?"~ 4@.&????Eb?"~ ? v?ߢ?|DL$zY?ɐck?"~ ?9τ&?Zc!?yTE?!!3?  ~ @f+/?O9&?Nwx6?yrM?   ~ @OYM]?@3?Nwx?Ӻ ?    ~ $@@I?}͑?מY?Է?~ $@rsB??N?2Yх?:0BTDXNXPN>ttPh||| `!`"`#`$`%`-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`" ~ 4@ ib ? 7O,? vnT ? 6jA?!uCU?!x=jc?!VcEl?""~ "?"Yy?"Jͫ?"6Ĉ?"5'.?"#~ #@#̩M?#Hn)?#1p(P?#6W?/ފ5?"/~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?0 ||||||Ph|VVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV``a`b`c`d`e`f`g`h`l`m`n`o`p`q`r`s`t`` ` ` ` `a a a a a a~ b$@bog_y?bt?b?bM^i?bk3 ?~ c4@c?ci^`V?cC?cpa?cr?~ d?dGL ?dTO?d a*V?dnJy?dEИ?~ e?efH?e-ex?eRG?eŭ?eB?~ f@f  R?f Rr%?fA !?f!<8b-?f3?~ g@g^SH?gd]?g3O>g bG?g;?~ h$@h%\#?hZc!?h_>Y1\>h'2?h3Y?l l l l lm m m m m m~ n$@nA?n|ԛ?n>?ni&?nwٯ;?~ o4@o'p?oN?o|?o/.Ui?o~Ϛ?~ p?pYe?p5?p+ٱA?p Pj?p>9 ?~ q?q5؀?q2?qy76?q$F?qn2d?r@PL@rt|8c?r6w\&?r?r)Wx?~ s@sݔZ ?s j֍?sZQf?soe2?st_lW?~ t$@tؼZ`?t9?t'eRC>t^?tI)?(2TBPhhhhhhhBPhhhh^h````````````````````````              ~ $@%z?x҆?txH?N ^?    ~ 4@}?s?*.?5x_? ~ $@IDA?fd?_,?N ^?~ ?l\?Ȳ`?4?    ~ ?g?\Z {?DKO?6? ~ $@) h?5#?R8ߡ?E&?~ @ ??B˺?Qv0b? ~ 4@=?V?k)?ʤ6?~ @TTJg??l g?0*D? ~ ?M(#?|?5^?A C?dwW?~ $@ t?Ӝ?4?#G? ~ ?Ϲ[?ڱ?P,cy?"?" ~ @9`?ŐL*?n?gx?" ~ @9??EИ?$(~k?o?" ~ $@{O崧?e?F =b?JaL?4>ZZ~|DDh||```````````````````````` (            *@.@@H@@=@ *d@.@$@L@d@=@ *@.@@J@@=@&        @d@@  @d@@ @d@@ .@.@.@  @$@@  @$@@" M@@S@Q@"  @d@@" B@B@B@" (    "~ 9@"k? $>?b ?0!?  ~ I@W? (G`?gA(?uHg`?    ~ Y@K."?}A?Ĵo?I? ~ 9@3ۃ?פ`?γ?ti?~ i@x^*6u?!g? ?E`o`? ~ I@g?b?9*7QKs?IaL?~ y@4?V?t5=((?"070? ~ Y@nt?ZӼc?Ĵo?;KTo?~ @om?ʉv?JPB?m? ~ i@n!a?vۅ:?'3? x#?" ~ y@8L?QH2w??!3?/HM?" ~ @%A ?Wel?i2 ?Q0c ?4l Dl^^^T|^Z^ZDh|``@``,`,`,`,`,`,```,`,`,`,`,`,```,`,        ~ 4@n?S?;On?tV?~ ?Zd;?~  R@L7A`?/$?~ ?V-?&1? +?"~?~ @|?5^?S?bX9?Zd;?~ @v?J +?~ =@V-?~ $@n?S?Cl?x?      ~ 4@S㥛?~jt?~jt?~  T@~ ?"~?K7?%C?S?~ ?MbX9?Zd;?7A`?S㥛?~ @Zd;O?"~?w/?d;O?~ @x&1?~ ?/$?ʡE?~ $@oʡ?uV?Mb?~jt?      4@X@V@n?ˡE?~ ?? +?V-?B`"?.((BVRVVRV(BRVVVRV(B@`,`,`,`,```````~ ?#~j?K?v/?~jt?~ @V-?rh|?Mb?v/?~ @rh|?rh|?y&1|?Zd;?~ $@v/?S㥛?~ ?Mb?    ~ 唀?͎T?ʃ9?bc?~ 4@S ?>?O?'*V?~ D@v=T?'vU?'?m3?~ N@Z`?%9?Yk(?Z?~ T@M(D?D?qS?.o?~ ?9`?ŐL*?n?gx?vVVVRBVVVVV  (  v  <NMM? ]`  @"??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`@  @"@??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`0  @"0??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`  @"??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`p   @"p ??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee ~v  <NMM?_ n]`   @" ??3` %` &` '` (` )` *)3d23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D $% MP+3O&'Q4$% MP+3O&(Q4FA(M3O 3 b#M43*#M! M4% b JUM3Oj&)Q &Sampling Rate (%)'4% r.MZ3O&&*Q BAvg. Relative Error (L1 Metric)'4523  NM43" Y &3OY &% Mp73O&%Q4444% DN" lM3O(&&Q >Error Vs Sampling Rate: COUNT Aggregate W-GB-100 Test Set, Z=2'44eee ~v  <NMM?o- ~]`   @" ??3` v+` v,` v-` v.` v/` v0+3d2L23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% MP+3O&/Q4$% MP+3O&.Q4FA  3O.B r 3 b#M43*#M! M4%  ?UM3Oj&,Q &Sampling Rate (%)'4% qtMZ3O&&-Q BAvg. Relative Error (L1 Metric)'4523  NM43" a 3Oa % Mp73O&0Q4444% ^N.M3O$&+Q |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44e????????????????????@@@@@@@@@@$@$@$@$@$@eA?|ԛ?>?i&?wٯ;?'p?N?|?/.Ui?~Ϛ?Ye?5?+ٱA? Pj?>9 ?5؀?2?y76?$F?n2d?Q?t|8c?6w\&??)Wx?ݔZ ? j֍?ZQf?oe2?t_lW?ؼZ`?9?'eRC>^?I)?e ~v   <NMM? ] `   @" ??3` v2` v3` v4` v5` v6` v7п;3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&6Q4$% MP+3O&5Q4FA+9 3O7|  3 b#M43*#M! M4%  MM3Of&3Q &Sampling Rate (%)'4% kIMZ3O&4Q 8Relative Error (L1 Metric)'4523  NM43"  u3O t% Mp73O&7Q4444% O;M3O$&2Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-NEG Test Set, z=2)'44eee ~v   <NMM?0Z P] `   @" ??3` v8` v9` v:` v;` v<` v=?:3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&<Q4$% MP+3O&;Q4FA^>R 3OCn / 3 b#M43*#M! M4%  FM3Oj&9Q &Sampling Rate (%)'4% lydMZ3O&:Q 8Relative Error (L1 Metric)'4523  NM43"  f3O % Mp73O&=Q4444% ]J3M3O&&8Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-POS Test Set, z=2)'44eee xp   6NMM?p ] `  @"??3` v1` v>` v?` v@` vA?3d23 M NM4 3Q ;  WSAMP TaggingQ ;Q ;Q3_4E4 3Q ;  WSAMP SamplingQ ;Q ;Q3_4E4 3Q ;  STRAT TaggingQ ;Q ;Q3_4E4 3Q ;  STRAT SamplingQ ;Q ;Q3_4E4 3Q ;  OTLIDX TaggingQ ;Q ;Q3_4E4 3Q ; "OTLIDX SamplingQ ;Q ;Q3_4E4D $% MP+3O&AQ4$% MP+3O&@Q4FAT;3O=  3 b#M43*#M! M4%  7KM3O;&>Q Workload'4% j:MZ3Oq&>Q (Running Time (sec)'4523  NM43"  +3O +% Mp73O&?Q4444% K ZM3OA(&1Q x:Comparison of Running Time for Creating Sample (f=1%, z=2)'44eee ~v   <NMM?P ] `  @"??3` JB` C` D` JEA3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&EQ4$% MP+3O&DQ4FAc k3OE 3 b#M43*#M! M4% x u@M3O(&CQ R'Number of queries in workload W-SEL-GB'4% i MZ3O6&CQ L1 Error'4523  NM43" k t3Ok s% Mp73OQ4444% V M3O0&BQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44eee ~v   <NMM?0@] `D  @"D??3` hF` hG` hH` I` JQ3dh23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&HQ4$% MP+3O&IQ4FAF= 3O5  3 b#M43*#M! M4%  CIM3Ox&JQ &Sampling Rate (%)'4% dMZ3Oj&JQ &Error (L1 metric)'4523  NM43"  w3O % Mp73O&FQ4444% V ;M3O(&GQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-NEG-100, Test Set)'44eee ~v  <NMM?]`  @"??3` hK` hL` hM` hN` hOQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&NQ4$% MP+3O&MQ4FA4 3O,x R 3 b#M43*#M! M4% A :M3Oj&LQ &Sampling Rate (%)'4% dxMZ3Ob&LQ &Error (L1 metric)'4523  NM43"  >3O % Mp73O&OQ4444% VM3O$&KQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-POS-100, Test Set)'44eee ~v  <NMM? M]`  @"??3` hQ` hR` hS` T` UQ3d6{23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&SQ4$% MP+3O&TQ4FAg< 3O_ 3 b#M43*#M! M4% K WM3O&UQ &Sampling Rate (%)'4% R~MZ3O*f&UQ :Relative Error (L1 metric)'4523  NM43"  w3O % Mp73O&RQ4444% )VW ;M3O(&QQ KError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Test Set)'44eee ~v  <NMM?f i]`  @"??3` hV` hW` hX` hYQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&YQ4$% MP+3O&XQ4FAY 3O  3 b#M43*#M! M4%  :M3Oj&WQ &Sampling Rate (%)'4% RYMZ3O&Q&WQ :Relative Error (L1 metric)'4523  NM43"  >3O % Mp73OQ4444% )VW ;M3O(&VQ OError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Training Set)'44eee ~v  <NMM? xx]`  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` hZ` h[` h\` h]` h^Q3d]23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&^Q4$% MP+3O&]Q4FA` 3Oq  3 b#M43*#M! M4% K CIM3Ox&\Q &Sampling Rate (%)'4% @lMZ3O([&\Q :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&ZQ4444% FV M3O80&[Q FError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100, z=2, Test Set)'44eee ~v  <NMM?@P+]`  @"??3` h_` ha` hb` hc` hdQ3d23 M NM4 3Q: USAMPQ ;%Q ;%Q3_4E4 3Q: WSAMPQ ;%Q ;%Q3_4E4 3Q: STRATQ ;%Q ;%Q3_  NM  d4E4 3Q: OTLIDXQ ;%Q ;%Q3_  NM  Nd4E4D$% MP+3O&aQ4$% MP+3O&dQ4FA 3O. 3 b#M43*#M! M4% f+ CIM3Ox&cQ &Sampling Rate (%)'4% -lMZ3O([&cQ :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&_Q4444% 4VB M3O=2&bQ CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100,z=2, Test Set)'44eee ~v  <NMM? `pZ]`  @"??3` `` he` hf` hg` hhQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&hQ4$% MP+3O&gQ4FA{ 3O_ 3 b#M43*#M! M4% L M3O$&fQ \,Number of queries in training set (W-SEL-GB)'4% gM Z3O&`Q 8Relative Error (L1 Metric)'4523  NM43"  ?>3O >% Mp73OQ4444% 1V?M3O&&eQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44eee xp  6NMM?`p]`T  @"T??3` hk` hl` hm` hnQ3d23 M NM4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: OTLIDXQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_4E4D $% MP+3O&kQ4$% MP+3O&lQ4FAFgL 3O5] 3 b#M43*#M! M4% I OIM3OA&nQ Workload'4% dMZ3Oz&nQ (Running time (sec)'4523  NM43"  k3O k% Mp73OQ4444% VM3O&mQ .Cost of Tagging (z=2)'44eee xp  6NMM?p ]`  @"??3` hi` hj` ho` hp` hqQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&qQ4$% MP+3O&pQ4FAj{ 3OYu S 3 b#M43*#M! M4% ) ;M 3O%(&oQ GDegree of overlap in condition ranges between test set and training set'4% lMZ3O([&oQ :Relative Error (L1-Metric)'4523  NM43"  w3O % Mp73O&iQ4444% VC M3O!0&jQ ?Error vs. Correlation: COUNT aggregate (z=2, f=1%,W-SEL-GB-100)'44eee >@7 7 SummaryInformation(DocumentSummaryInformation84_1047883576]!Ffp؄fOle [M@$(՜.+,0 PXp x Microsoft Corp. selGB_20_100_0.resAdHocChart3  WorksheetsCharts !FMicrosoft Excel ChartBiff8  "2W^˂iP!i;v`!W^˂iP!i>@8 v xڕ=KAg61D(b6i&N4zF0AMccim%l~c晛{wfo$&o0iIl{Șa)6db(/&x CHɐHELN˵GFQA˖6ɴ+F^_: Dye _nC=/m<17m4^3nz*4vfi~m=PguŹq.q[ >/p1s޹V7F7u)jK喼znw۹i]4o_pP*Ro}UDdJ  C A? "2$+Fl@/S>v`!$+Fl@/@ PVYxcdd``g 2 ĜL0##0KQ* W)d3H1)fY+@m[W£S [֏>ٿw:M~gZ @myՓiUUY=HIzʼANkO,iڂ Dd J  C A? "22/EecS$Cv`!/EecSr8xڥ;KAg6j.1 ",|@ !́FAcpV X"l_蹳w񎽛vAh|bѵ1m1tGZ=,}/.; * P0DU#(pc0Z[WTF]z(^}c8D϶k,]ESd'5ė^~)rW^ gݚCZgqu jl4g3eoJ'jY^[lwNTm)u;g^,Sw[ן $8ՓVy_W >wXCx#YA1JO)OE-Ւ 3Ķ 4nDd 0J  C A? "2GPT3DQKAEv`!GPT3DQKALYkrxcdd``^ @c112BYL%bpuf<y3갛?] j Q! ~ Ay 1j.?sHE`%p?i`_ٞy@Q ks ~Q욊%) 0\>_A_˅*Uށ"Mw\ q `pjfdbR ,.I@XP- zDd( J  C A? "2 쏹5 o?;QE߫bHv`! 쏹5 o?;QE߫>@1~xڕR=KA^sa!Z(Bbx3 ,,--4?B,J,llT< `X3A`5`J B)cpN@:-eJ0[8ir}^k PkQwR'J񽊪4 )' 3ZwDZ{J[=6ZB:WbgI~(8~߭w w|KHXx\ǂ ˌ)WDZ[åPFߤXˢP3~QY: =ؒ5\˛/ ;mWy_yWYبzl9@V{9HEݞf?|[2 ycDd WP  S A? "2 #;tQFcJv`! #;tQFc@dhxڥ=O@ǟ;@[Q8`YI\(%H !111o aM'?8P﹖ "M}~wߪ P ߋ{íe)޻Aި}GkZWUy4< G|hYoĊ&b!7zS!J2UvWZ\\ 4u';c]7TTD?4q|q_W6/ IG^r~9Mu| /J~.,F3_Ofvq%?kj&5|b \g'|yy) u*WD _…!.A7w}#[C`5_iDd J   C A ? " 2'ڼhԫuU!Yv`!'ڼhԫuU  p(+xcdd``fe 2 ĜL0##0KQ* WôEA@RcgbR V~OĒʂT@D22Bar`fm wfjQ9A $37X/\!(?717k0@ddeĵɌ$cX bV5|)._ʯe|ʅ,_x" 3 9@W@\<2pjL̈́۸Aj(.c8]F7NĞܽ!0AY  Q! ~ Ay ĺEp?? aiF_fg`c4w 4.J1P0 30bQB;Ha0G NTh`ic 9&iH!{\ KьLLJ% 4?š;Dd 0  # A 2 \spq" wNĉA~^v`!\spq" wNĉA(+xڥS/CA]?V48!HmPфV4/.O8:9ps$gG_a_v|3|oB@"/0jT P"Dk!y^l:?/H{ZFAx*Œgk ؜ BFLf=h)NǤ&Ve;[/tj2w@'pPc"$S,'5<ȍkO|A2(:ə|c|!NTƢ_gH3f-#>(?:r__I/ɯwz xAۉS~'R :(:+EC!BuY48lO%nmLŔ˼9TO1; ќ, 5X-,e`wIE;+ݘw՚GDd0J  C A? "2Jq*3QX av`!}Jq*3QX2` kKxcdd`` @c112BYL%bput/ @2@penR~CP J7k30I] Hqda<y.\Ic3waq%CiG\ jFM [ DYd++&1~!&"&fA L>H? ٕ0~%~( 8> + ! v 0y{iI)$5ba\~bO}Dd8J  C A? "2T:p]OXK(?:r__I/ɯwz xAۉS~'R :(:+EC!BuY48lO%nmLŔ˼9TO1; ќ, 5X-,e`wIE;+ݘw՚Dd 0  # A2 \spq" wNĉAhv`!\spq" wNĉA(+xڥS/CA]?V48!HmPфV4/.O8:9ps$gG_a_v|3|oB@"/0jT P"Dk!y^l:?/H{ZFAx*Œgk ؜ BFLf=h)NǤ&Ve;[/tj2w@'pPc"$S,'5<ȍkO|A2(:ə|c|!NTƢ_gH3f-#>(?:r__I/ɯwz xAۉS~'R :(:+EC!BuY48lO%nmLŔ˼9TO1; ќ, 5X-,e`wIE;+ݘw՚%Ddl|J  C A? "28}J#lNA!c^kv`![8}J#lNA!:`0)xڕJAƿrxi!Z(S(O Z\ $,}<` Csaof,mj؂[su)& ( UGjkr͊elp0bǢ%r#4q޶l?X'n${Z.K6R`{BxvJ/)͔ԟt>t=K(ЇNv9|sܷUǯ>[PǮ”#} {XMߎw`f.4^nN~;Dd | J  C A ? "2W:$ն^kow$ymv`!qW:$ն^kow$ `0?xcdd``dd``baV d,FYzP1n:cbov`!Z Cm?>c @ F)(xڥOHQ{fwfvuVW7[ak.AFiDeשs]|jnS%7Re~G~k,Kg 9eNC\GniMSqҲy'˛v&n(~!aKn~Yޤ~M/9ug7nAy~c@ $o4ܜM>fiM{^J7ע{'NI(ś ff:͋l~f^@5UnX_O**m{ԉZY]Ncqt׸Z!O9 `_A\AFS "Je(JBl#]Dd J  C A ? "2'ڼhԫuUsv`!'ڼhԫuU  p(+xcdd``fe 2 ĜL0##0KQ* WôEA@RcgbR V~OĒʂT@D22Bar`fm wfjQ9A $37X/\!(?717k0@ddeĵɌ$cX bV5|)._ʯe|ʅ,_x" 3 9@W@\<2pjL̈́۸Aj(.c8]F7N(?:r__I/ɯwz xAۉS~'R :(:+EC!BuY48lO%nmLŔ˼9TO1; ќ, 5X-,e`wIE;+ݘw՚DdDJ  C A? "2hԼΊ񈌤8s.yv`!hԼΊ񈌤8sr (xcdd``db``baV d,FYzP1n:,56~) @ k77p10D3@0&dT20<i`7LYI9 @\Vo_ŀ΀H9@ڈfF%1r-n#d"N M 2 M-VK-WMcNPs?0sTUBL :BL8@|8?YS++D?2ą%Jb lgpw2@{p2 Bŧ?= psDoeE3C%]]RQQ k鮼 ;9b*- `pgdbR ,.I@X`W1- Ddl J  C A? "2< *wXT{ {v`! *wXT{p1B@.x[]lTE>.U1jdEbĴU Pm\iMƦ,B`tK,JTI_L4i1!U"jⓏ`L|IbBB;svޙ݅6ws97?wjA tV%O-K]Tx)l m٢BXyUߘV:;:^--?\WYUg,j=gaAކҴYTENb+KԿ[܀=d1k ~ ѪľǬ^µٳ(7%JiK͡HL]n^FUP+=\=uon'Ki{fLdG&Ki{ N , 8'djgִ] \µpm'\EU$\eU&\Skptۇ.µpMIup%\kp91rdnn(%dw.9`U[}ŕ8дkUG>}z }(FrԾznV{}5Ў{ xg}w%f)NJWt.3Cg6ܓ(w=lg?23(VH ӎ2v3{; 0/򃥜ЈOt :8cUeW=LOnΕL\$" Oq!aW sJ$y7nׇ73pQ[spi{WřaI-Q;jF'p8t8$a: o%gCke87`08/^f^kqk]Yrbahem)1i߮s9v{dpx8al4g佦%i; @j ۰y#zg_?%|-PՒO,ݩgt@66 ~7t8nHz׿"gbkW#LћF((J(վeU_}ꎒ|[f1:%'K⒪S%.ԯ_']g&|2AqPzcJ gJI$/QټG9w=XzGp!hhB/oSѣ4-fͼrP^~y =cKyƞl׳}Ns ٪O3/6ː56we. [?jA7w^7ĭ A< Ddl J  C A? "2 Ľ¥Ԗ"v`!Ľ¥Ԗ"1<-x[klU>3DM0 J(vK-Xں-ekۭŢMTG H LOIJB 1 j{޻;3wfcl{sν}VŰ?~B*/)j*%TEeb'تxxZ%2~#ZE6]nSc|S*/Eeɻ\Zx}y{^S|(P6<߉ehWbHU1#G{7P"/v>:yh-o+FV:rvR7a,qY5 Yo %J#H~5==2F$!3JiNyjȆ |K )YB@,L _BY* %XSk:DEDEDq2dB#d=B6)dBvA.5!&d얐{B㼂f&͜%G7eQgh?0.Ũ36b֖K^^[L^e2^ px)`x)&㥰)ov56 \W,p)pu:3Js9Վ& uLfi,2cOv;C!a7.ƅ݄v iYin!d3mOx;lX^¾y&  Z0H.!},4) ZQ8N.!=@ey Zp>.!,ct@p>\Bg}Da>{\B=uaecQ$GLH9v"lE)ħjRRmF҂\Ӏ<YG,ql8Eҁ|E.qK XFُ;бđ3/g:d^dwTf$ZzL.d[RLϋV ?@~A˅xyG9-- f!11I:f811Ej܀#'hTm؄'Q!ewP>|22G"Neд30y8zL+J?sUN' XeV;镨l6۞t;rE#%B|+Pry=܊iYPϝuy!33Y"WŐͻ2^ ܉٥($B}6/eG 潒^;2As'BdϦ 7 _fG+aǬ2^!UH#wY"ıӼ6cUfD=w"Dv \k}Cyq7UoMS0M>d^ծ;  ee)9ïԊ%x1::<mD74M 'Dmxw2rCfll~(e$Z~ p[ZGjb)}4ˢ e{vUGZcH4|%2Y]ދ5x{$"AOE/g(&Կ'mV;YjV~&`Q˻|AGȫ Wu&_j/(^ C B4Wqe73y/lT%ש2CrJ6Dc_(e5 i<y7og]4kkiq޷K[^Ʌ{bln@iri$ӭ焇Q5Y}%o !xf 5&CZ2jc`fqdj4ƓG͖atԨDr ;w{41{[<ƝdEt9J% .y3"+MtSQ>Ev @;v(NgL{ڠ"i49B⩉ط'W>;bӗYYR橩1SQsp3f'Ӽ(/E9pʓ%\th1/c"/ccX$)#q8-gce#RJq[ YlS<fVvƴ2b7;.=2,hG(q!wDX?Km̪7􈭠-Cl~,56k{6]T+B65v޾s1[/ 2@6;\C Ou Dd? J  C A? "24 /kpn} \v`! /kpn}b1=8.`x[]hW>3t-XV5%Ik VIvjcL4fS4 #j,BR > ( "[j s{vg&L3|{37D|s,.`?",izX%DtM&th>,=~V,?1͐4옟TizlOaqÞ_Z=+E֧gV+C@چS,K\fGFɖ`=jo$4m~\'}t_W5h}Z?i\FsD)z'X$Nf1o*"#ȾL)!R{,BYC&U;JDZ~hrV6Kؚݩ-XDZ;^B r5k]$忋wQ,vF2Yd$&q'9#$dWIvd7Ivdmo#xioH~d},r>*T@ m;v SFTk yk7K&,-pZd)B&KqLk d¢ k}5DugzWJ4&\Äkp,WC1LA5H @ @ @ @ ːm".Mv $dw5G.k3rfZV[m.l#XS(>mvH{[]Ⱦuf̽ʝ(Lk3d#d9!; 0m>wxLx'd  #dwC7\9~d?'d ׁik;![DD[~vVb6F} .+<)Zҧ1k"x%0d^do ;pM䓃%`w`2(7Xa`967XLa`B8 iFRf"%Mh.LJXB)7 |!Q3Wf]Q"Ef=D5j\FDsx{j"!EWTDt EPy2lٙ Y^ռ^!G`YٕV?`]fۈzށ(DU{_1u:B; =ȑv]o'y!.'Bi8k*DHJ"/[wz2^ۥ'B#9 "$@ڏcuv]o?y!gSO&k\}U!BrGH7p8f@=o"D(7`!9#WGƭhlnF=o"v ?W7TG(Y/]fˡ7-VX鉾L&c2?VFS Uyk1-]e ){a$a3%^9LohXv4#74nJlNM%FF# _ _ Y\+!~m9Ga$}_gr׷$R,C7_8G{ E?jڤmc{ɂ 7?wov;6;9|~6COL?o"y7"Gj%xKKpJ7yD/lJ9.i+\ ( ɏබ=\]u.sU4fְý:~?pPy5k4s։qqbY "Q#OԑTW;RYRf|—ef_Vj_X+0/dcFPWjWiU'kK$ݒ=#psj15,sv_tGSB1NAf9Zb'@; jI7X뽪μB\O5ymw@P{-6{gIoD[x(*m~UKChK)KeY*j5KF_ @[eK?KTO^j_^YNe\󒡼dL~ұI 1#Yel%їݚY0N=y٨~u66획i4Fzq(''!澷¬9L~Vҋ'0v]oJ[[QlA,u]u{dVL_i!ks>('jw[h Dd! J  C A? "2 ;e\u S[.v`!;e\u S[ 1AH.x[ohEmRr5*?-Ejc.5^3=kb?łxZ)@ABjOBP'h ڊٙw7{ڬٹ߽f̼/@;<)KY.y2 4LQj7/C҃`njrYĤxmQID갘.)M;?NÑqD QϠÞ1Z0:"3͒ᢶȩPtY~q!׷.@ zO~\'*V}ںQt6&Zf@ܿ_@?; ,Ilz cm->P-҂#GzZT[+'RU҆TCmh A~.jŚ1|KeWe~G_ ߁#Х0]5ZzG??E'ԑrxy K%,K4aӄ"a;G%.+aVv켄{aMd-QYs4PDoA-T'-ex5j_ ,%MpB`0Œd)anYRsykձq0"$^k;NF(Ev<W%m8Dr+GJīDJīDJīDJīDJIofH#[ w"HQ\U>S8G\: LK?l5M ٛ1PGEx&I B_k~N@>&5{:$fr1AFgIm3>3wL*}KHdc6Ov~φ,'FyxfH#8x;CiMQm7 <B'8\v`tي%.>12$$.|e7F,Fp}moa| kjH\IeVYyԳfKyP yH7OL.*b x\dwR#xhO3]z$'|-O**W |[_XiimVlp^:[cx\g7;*yT!W8ҋ-߀j >qT9vјϽ>~ ?—bWCn_o)\(hgf= K֫3 76? e3wIgZ1Н4Ԝd^4q.(vf5јt>3>|gbwFǦ4x8Ԉ-7ZvYn-xmV6et0c'lCуF Vf{$d1>d8ݗzت){d{;j7m_;^A^Q7+oB^r(z N=~&VGL4R~gkR =ئ{F==[x#qo#sND;F{z0y0ZQjhqFpiς"Dz+wP9[{V,8nkͦ#rv;_k!klm.t&ݼz/" t Dd J  C A? "2 V,y׀ۢdrv ڢv`! V,y׀ۢdrv<F`/x x[ol> !UtJZRٖQ8O[a 6>q1!El RI&\J[EBHV5R?}3-V~y7o)<SGҴ[Y.y+fl40Ek 3jF,_1zޛEw8xl1Y*?9a0,#kS ыݟAyaw?F-R2LLKka5z5$Y1~XXjA!+x|Z`|K~AZ֡m֌c_ns-+CcXܑƖM ZviFg=b[X,~j%8frthHuP3_ֿeZ~*ZxAa+Ct њf Bo&fl-o1\hCD"Ѧ.%ڇDhSǀߖ7s/|=g0J /xT\/E,ԍG" vתL8o=߅=>6A[in^X46s"hvU kXf;j'\1Cz up 5  \%U"\%U"\%U"\%U"\%%.mfIn^{&C$w䎓qES^?] \U \kpup]#\kpݔag, /\HH~dWѳ I97l|oVȳY}~yOA0p"Bf?dK^’XsL4XD q.G0D'XX0X >Qr#+=27,,gy̸HaaQ,FH <Q%}8x􋡔Ƈv&%k7%lT=p#ey;T%xa*/0f,`XPC$7,sZDHGDx,Gj% "!@TsDbO!r5&}+v~}yx$?q:w`/K*McOVծ8O: U+2>#[~[c9(H#_4b0:c1jBUSU׍|XUOXh~soa!6PN?*Mcj4b0 JB^y+Zø4܃5,dŵ[߻kJSF/ c Ř5,Xzq/mw|XgWWR۬gwWxʮf#;#O>Zr20(Rm|MS4WmK΄;ļcғw ;(p\MS4|Sw>WIj[ZdO~ZZ`~r,'nwEwEl%C1oc3cTK1)?q|5|׵W"͐;yՒ7ycnR6;n7ᛧ{WnjX]KnqƽSmF #^M uklJm\Il;?t ;MTc~ Yמvt̶on_W{%~o4NQ*7bz%HzY{YJ. ndwndݐхXo%tki,$鄯r$G:Z_zG&Յg?v_^kTKQҨK]e]c^/Ysa wAᗣ/K:boR=ܓ~$LJGݱSG0pieፎ,_㡽:7 --X'?1Y_AyK3/m 5Pi*[ϲf>֡lK\ϖ(V 9#:wF.%K1>_]"|囹|5|EKp+7,a8_GM~?9ưQ%0(m%Go௜k`2D+|; $mhD'8i ?[*iiiIT ҏ@13Ɛ|a3 g/Am0bxp[@zwa3 )HwaOQ3%#%#%#%)y˼-O<@kDF'DG{Dsl[0 J Ru߭d#3ugjB83ñ9q *O2X4ʘS*h1_5AYuAiX3تM<*"*"%\KkpM&j&j&j&j&ʭ ɝ%$wΑEHr/$GKS^;BʄLJ,e:95GI̗ef9υ^Ŏ,G[jr $0)/B#>u6/(? 0P1!1 2 ?u{8ïA&#QUwqM ~ APWq? Aѳ?WbBfYl|k>ZLN[\EzJ'S"ɉhXJ1aD$F R )62ƓƒbXnńe6 nX3/)l,s%j!򒒑8y3L5JV{yK&0>LvDڕ:(!QUpU FގU3 ^HT31f\PZE$"xƈj-"n#PT."z6BQm OxlĆEګ+v\Sq8'N}2ND|bi2 &u;.|/ c hXʗ1.#_<zųW%_MS"]U+zuݩhw|5|EK6v)S'b0, EZu+j:|%&MV# '=_|Lya 7W5o/|Ӛ" wT=y_VhQ|yG3Gyijw>N^iM#'<_u%|4|4oi|\<"1<Ưɹ<Ƣy@y4|Ӛ\|;~M·;uc)Y|M5|uz+wTtI݁g;J@yk{?}F C )ۛ7O:`Y5+{gUJթ|lԔUB7]9ޅރ@7kKJEI ~u湻u|gz#+4ouoѩh aa; C2^G`u756Gz#99%])3J>?|rׄJd {T;{¼V)hm./ v~(m!<]__O{vhCR&ߏӧ`% Zd1%?22έ`yQgNzkxB5 Euo=O^nc &-b×1/4?߼5{@=[_s'lۛ} 8#*wF ۧpm&kc+_&M ۻi~e ߲h.s+# }XK_#ʳp7AeѾ ^ Yaź0 c=o_ܛk Dd J  C A? "2 E񳧄kkZ v`! E񳧄kkZ42y,("o x[WnG.5xg)*:&.gdh$=ӻpgv[7Ik$(PBP@aJm)HPTA%m.̛y3{z˻yy_3[O` ,iӮf䵸AbaZ 1}F15m~+uI,Y_,}?vCH}8F يݞA#][xMQ}fgگࢎHj(}q \~o b A_A6 F m>l6k\|`2dޔW\Wtc#15V!l v}#lAcZD=ѡm9չsd; FzIWC1PRINT&CompObjbObjInfoWorkbook5I#d v<   y''  Arialw@" UwUw0-Arialw@E" ' UwUw0---- Arialw@@! UwUw0------------------- Arialw@ w UwUw0-------------"System 0-'- y-- X!!---'--- y---'--- X!!- - $]8o]- $o{o${8- 8o&gsHwUzL~{- $o]8- $o{o${8- o]]kppEE@No}- 2 _0.0j%%2 0.375%%%%8]]]iwwwP ^ 8FArialw@ k UwUw0- 2 F30.50-Arialw@ l UwUw0- 2 P0.625-Arialw@ m UwUw0- 2 M0.75-Arialw@ n UwUw0- 2 0.875-Arialw@ o UwUw0- 2 {1.07---  ff- $*- - $GGG- G- - $- - - $GGL-]G- G-L-L]]- - $]- ]]- $o{GL-]o{- - $ -  - - $ o{]]-  {o]]- - $)) O)- ) OO)- - $O o{o{O- O {oO- $X*X- $X- X- - $GG'G- G''G- - $''- ''- - $o{o{G'1o{- {oGG''1- - $1'1- 1''- - $o{15- {o155- - $515- 515- - $---- -- - $o{o{- {o- - $--)O-- -))OO- - $Oo{o{- OO{o- $X$X- - $5@>- 55>@- - $@>5@>- >@55- - $KpD@>- KKDpDp>@- - $@>pD@>- >@DpDp- - $  ( - (( - - $(KK(- (KK(- - $-- -- - -- - $- - $6h6$6- - $QY- QYY- - $YQY- YQY- - $KKQpDK- KQDpDpK- - $pDQpD- DpQDp- - $  (? -  ((?- - $?(K?- ?((KK- - $  )? -  ))?- - $??- ?--    -888HHLL 2 h0.% 2 +0.1%% 2 '0.2%% 2 "0.3%% 2 $0.4%% 2 0.5%%-----'-- FF 2 PError-)---'-- X!!-----'-- ;X7 2 iAGamma4%99%---'-- X!!----'-- X 2 *Delta0%%---'-- X!!--'-- y------'--  = 32 OError vs. Lifting parameters -)%%))))%%<%%%<2 #(SUM aggregate, W-SEL-GB-100, f=1%)-09%))%)%%A--)40%%%'%;---'-- y--'-- y--'-- y-  X!!--' y '  'Excel.Chart.89qOh+'0@H\t  Viveknar Vivek NarasayyaMicrosoft Excel@!@(՜.+,0  PXt | @\pVivek Narasayya Ba=D(4 =x< 9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)                + ) , *   `qChart1Sheet1OGSheet2BHSheet3`iZR3  @@  I 0.500.6250.750.8751.00.00.1250.250.375 OE  @M\\research\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` ` ` ` ` ?3d23 M NM4 3Q:  0.50Q ;Q ;Q3_4E4 3Q: 0.625Q ;Q ;Q3_4E4 3Q:  0.75Q ;Q ;Q3_4E4 3Q:  0.875Q ; Q ;Q3_4E4 3Q:   1.0Q ; Q ;Q3_4E4D$% MP+3O&Q4$% MP+3O&Q4FAW 3OQ 3 b#M!  O43*#M! M! M NM43 +MZ4%  tMj 3O(&Q Gamma'4% f|tM` 3O &Q Error'4%  LtMj 3OU&Q Delta'43?" :,dd44% X }M3O$&Q AError vs. Lifting parameters (SUM aggregate, W-SEL-GB-100, f=1%)'44e0.00.00.00.00.00.1250.1250.1250.1250.1250.250.250.250.250.250.3750.3750.3750.3750.3750.500.500.500.500.50e*g\?ET?f*?RAE?"6X8I?@S?CV?>?AC?U3k) ?g67?Y?l?cdU?]a?w-?qPi?E(?VDM(?u$?!gx?C_+?A?yq?*A*?e> *A @  dMbP?_*+%"??U                *g\?@S?g67?w-?!gx?   *g\? ET?f*?RAE?"6X8I? ET?CV?Y?qPi?C_+?   @S? CV?>?AC?U3k) ? f*?>?l?E(?A?   g67? Y?l?cdU?]a?  RAE? AC? cdU? VDM(? yq?   w-? qPi? E(? VDM(? u$?  "6X8I? U3k) ? ]a? u$? *A*?   !gx? C_+? A? yq? *A*?d(  v  <NMM?( P4K]`  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` ` ` ` ` @远3d'23 M NM4 3Q:  0.50Q ;Q ;Q3_4E4 3Q: 0.625Q ;Q ;Q3_4E4 3Q:  0.75Q ;Q ;Q3_4E4 3Q:  0.875Q ; Q ;Q3_4E4 3Q:   1.0Q ; Q ;Q3_4E4D$% MP+3O&Q4$% MP+3O&Q4FA@ 3O@ 3 b#M!  O43*#M! M! M NM43 +MZ4% , Mz<3O(&Q Gamma'4% Mp<3O &Q Error'4% m oMz<3OU&Q Delta'43?" :,dd44% P M3O$&Q AError vs. Lifting parameters (SUM aggregate, W-SEL-GB-100, f=1%)'44eee xp  6NMM? `"x]`   @" ??3`  `  `  `  ?3d23 M NM4 3Q:  0.0Q ; Q ; Q3_4E4 3Q: 0.125Q ; Q ; Q3_4E4 3Q:  0.25Q ; Q ; Q3_4E4 3Q: 0.375Q ; Q ; Q3_4E4 3Q:  0.50Q ; Q ; Q3_4E4D$% MP+3O& Q4$% MP+3O& Q4FA 3O 3 b#M!  O43*#M! M! M NM43 #M4%  9M@/3O(& Q Delta'4% 9M@/3O(& Q Error'4% T M@/3O7& Q Gamma'43?" :dd44% <K0 M3O-2& Q @Error vs. Lifting parameters (SUM aggregate, W-SEL-GB-100, f=1%)'44eee xp  6NMM? !i]`p  @"p??3`  ` 3d23 M NM4 3Q:  0.50Q ;Q ;Q3_4E4 3Q: 0.625Q ;Q ;Q3_4E4 3Q:  0.75Q ;Q ;Q3_4E4 3Q:  0.875Q ; Q ;Q3_4E4 3Q:   1.0Q ; Q ;Q3_4E4D $% MP+3O& Q4$% MP+3O&Q4FAuw 3Ouw 3 b#M!  O43*#M! M! M NM43 #M43?" :Zdd  3O % Mp73OQ44444eee >!@, ,, 7 @  dMbP?_*+%" ??U>@7 @  dMbP?_*+%"??U>@7 SummaryInformation(DocumentSummaryInformation8<_1036782254n!FPf@뗳fOle Microsoft Researchh Sheet1Sheet2Sheet3Chart1  WorksheetsCharts !FMicrosoft Excel ChartBiff8Excel.Chart.89qOh+'0@PRINT)CompObjbObjInfoWorkbook     !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~E!  9   ''  Arialw@) UwUw0- Arialw@ z UwUw0----- Arialw@ UwUw0-Arialw@ UwUw0------"System 0-'- -- !!---'--- - - $ossoos----'-- - oo=o |o|osos---'-- - =- so soso---'-- -  - so o-[o[o|[|o[os[sooooLL**---'-- - !!---'-- - ^Z---'-- - ^Z- - F GG77vvS- M::%%vv S- G v vSNUUXXuvuvSF--  $1F[F1---'---  ^ZG $2G\G2---'---  ^Z7 $"7L7"---'---  ^Zv $vv.av---'---  ^ZS $ShS>S---'---  ^ZM- -  b8---'-- - ^Z: O%---'-- - ^Z% :---'-- - ^Zv .a---'-- - ^Z S !h>---'-- - ^ZG-  -   $2\\2---'-- -  ^Z  $---'-- -  ^Z  $---'-- -  ^Z v  $v"a"v---'-- -  ^ZS  $S h3>3S ---'-- -  ^ZN-  -  f8-N9NcNcN9---'-- - ^ZU- m?-U@UjUjU@---'-- - ^ZX- pB-XCXmXmXC---'-- - ^Zuv- _`-uv`auvuvauv`---'-- - ^ZS- k=-S>ShS>Sh---'-- - ^Z---'-- - ^Z---'-- - --------'-- -  ;9 92 K!Error Vs Data Skew: SUM Aggregate-)-%0%%-%%5-09-))%)%%32 (W-SEL-GB-100 Test Set, f=1%)A--)40%%%(%%-%'%;----'-- - ---'-- - ----'-- -   2 \0% 2 0.2%% 2 T0.4%% 2 0.6%% 2 K0.8%%---'-- - ---'-- -   2 1% 2 1.5%% 2 2% 2 H2.5%% 2 A3%---'-- - ------'-- - z 2 1 Data Skew (z)0%%-%%5"----'-- - ------'-- - a! Arialw@ s UwUw0- 2 W1Relative Error - Arialw@ t UwUw0- 2 % (L1 Metric)r- ----'-- - -- - >---'---  <---'---  <-   b- $b qb(Sb 2 USAMP# ' ---'-- -  <---'-- -  <- l llb-  {q]S2 RWSAMP0 ' ---'-- - <---'-- - <-  b-  $bqSb2 STRAT # ---'---  <---'---  <-   b- - $tR-bSb!qb!Sbq2 OTLIDX% #---'-- - <---'-- - ---'-- - - -  !!-- '  '  ' nm @\pVivek Narasayya Ba=5L\ =<X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1sArial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)                + ) , *     $  `HChart6: selGB_20_100_0.resAdHoc`iZR  3  @@  3 SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100PURE GBCONGNEG CORR (Test Set)POS CORR (Test Set)z=2 Running Time W-SEL-100W-GB-100TaggingSampling" Ha  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` vB` vC` vD` vE` vF?3d23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% M 3O&EQ4$% M 3O&DQ4FAt3Ot 3 b#M43*#M! M4% )G M3ON&CQ  Data Skew (z)'4% QMZ3O$N&CQ :Relative Error (L1 Metric)'4523  NM43"  [63O % M,3O&FQ4444% S M3O$&BQ ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44e????????@@@@@@@@@@@@e@r??6ɏ?yUg?4?k?BK8?tub?XR?K."?}A?Ĵo?I?3ۃ?ek}?`8 ?`>?ʊ?a0? $y?8H-?e> 8H @  dMbP?_*+%"??U} I }  }  }  ````````` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA    _      _~ $@zPPVn?/H?Cp?;]?_~ 4@ҌE?LT?$ӡn?ިo?_~ ?P&?!!?ދ?C+j?~ ? ĭ?PC?֌ ra?*b?~ @"p?'.+=?2Pl?Ωd?~ @OYM]? ?gA(?ْU?~ $@"J %?镲 q?\7V?$I?       ~ $@bL{)?2=a?}vuŌ?ˠDt?~ 4@.&????Eb?~ ? v?ߢ?|DL$zY?ɐck?~ ?9τ&?Zc!?yTE?!!3?~ @f+/?O9&?Nwx6?yrM?~ @OYM]?@3?Nwx?Ӻ ?~ $@@I?}͑?מY?Է?8;TDXNXPNVVVVV4BVVVVVV-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`- - - -. . . . .~ /$@/1~٭?/+H3?/z6>W?/ފ5?~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?$4,4BVVVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV``a`b`c`d`e`f`g`h`l`m`n`o`p`q`r`s`t`` ` ` ` `a a a a a a~ b$@bog_y?bt?b?bM^i?bk3 ?~ c4@c?ci^`V?cC?cpa?cr?~ d?dGL ?dTO?d a*V?dnJy?dEИ?~ e?efH?e-ex?eRG?eŭ?eB?~ f@f  R?f Rr%?fA !?f!<8b-?f3?~ g@g^SH?gd]?g3O>g bG?g;?~ h$@h%\#?hZc!?h_>Y1\>h'2?h3Y?l l l l lm m m m m m~ n$@nA?n|ԛ?n>?ni&?nwٯ;?~ o4@o'p?oN?o|?o/.Ui?o~Ϛ?~ p?pYe?p5?p+ٱA?p Pj?p>9 ?~ q?q5؀?q2?qy76?q$F?qn2d?r@PL@rt|8c?r6w\&?r?r)Wx?~ s@sݔZ ?s j֍?sZQf?soe2?st_lW?~ t$@tؼZ`?t9?t'eRC>t^?tI)?(2TBPhhhhhhhBPhhhh^h````````````````````        ~ $@%z?x҆?txH?N ^?~ 4@}?s?*.?5x_?~ ?l\?Ȳ`?4?~ ?g?\Z {?DKO?6?~ @ ??B˺?Qv0b?~ @TTJg??l g?0*D?~ $@ t?Ӝ?4?#G?,h|((BVVVVVVV((BVVVVVV`````````````            *@.@@H@@=@ *d@.@$@L@d@=@ *@.@@J@@=@       @d@@  .@.@.@   @$@@  M@@S@Q@   @d@@ B@B@B@V,R^<<<FFBFB8( (  v  <NMM? ]`|!  @"|!??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% M 3O&Q4$% M 3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % M,3O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`0  @"0??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% M 3O&Q4$% M 3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % M,3O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`1  @"1??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% M 3O&Q4$% M 3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % M,3O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`*  @"*??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% M 3O&Q4$% M 3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% M,3O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`h*  @"h*??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% M 3O&#Q4$% M 3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % M,3O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee ~v  <NMM?_ n]`*  @"*??3` %` &` '` (` )` *)3d23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D $% M 3O&'Q4$% M 3O&(Q4FA(M3O 3 b#M43*#M! M4% b JUM3Oj&)Q &Sampling Rate (%)'4% r.MZ3O&&*Q BAvg. Relative Error (L1 Metric)'4523  NM43" Y &3OY &% M,3O&%Q4444% DN" lM3O(&&Q >Error Vs Sampling Rate: COUNT Aggregate W-GB-100 Test Set, Z=2'44eee ~v  <NMM?o- ~]`+  @"+??3` v+` v,` v-` v.` v/` v0+3d2L23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% M 3O&/Q4$% M 3O&.Q4FA  3O.B r 3 b#M43*#M! M4%  ?UM3Oj&,Q &Sampling Rate (%)'4% qtMZ3O&&-Q BAvg. Relative Error (L1 Metric)'4523  NM43" a 3Oa % M,3O&0Q4444% ^N.M3O$&+Q |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44eee ~v   <NMM? ] `   @" ??3` v2` v3` v4` v5` v6` v7п;3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% M 3O&6Q4$% M 3O&5Q4FA+9 3O7|  3 b#M43*#M! M4%  MM3Of&3Q &Sampling Rate (%)'4% kIMZ3O&4Q 8Relative Error (L1 Metric)'4523  NM43"  u3O t% M,3O&7Q4444% O;M3O$&2Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-NEG Test Set, z=2)'44eee ~v   <NMM?0Z P] `\  @"\??3` v8` v9` v:` v;` v<` v=?:3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% M 3O&<Q4$% M 3O&;Q4FA^>R 3OCn / 3 b#M43*#M! M4%  FM3Oj&9Q &Sampling Rate (%)'4% lydMZ3O&:Q 8Relative Error (L1 Metric)'4523  NM43"  f3O % M,3O&=Q4444% ]J3M3O&&8Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-POS Test Set, z=2)'44eee xp   6NMM?p ] `L  @"L??3` v1` v>` v?` v@` vA?3d23 M NM4 3Q ;  WSAMP TaggingQ ;Q ;Q3_4E4 3Q ;  WSAMP SamplingQ ;Q ;Q3_4E4 3Q ;  STRAT TaggingQ ;Q ;Q3_4E4 3Q ;  STRAT SamplingQ ;Q ;Q3_4E4 3Q ;  OTLIDX TaggingQ ;Q ;Q3_4E4 3Q ; "OTLIDX SamplingQ ;Q ;Q3_4E4D $% M 3O&AQ4$% M 3O&@Q4FAT;3O=  3 b#M43*#M! M4%  7KM3O;&>Q Workload'4% j:MZ3Oq&>Q (Running Time (sec)'4523  NM43"  +3O +% M,3O&?Q4444% K ZM3OA(&1Q x:Comparison of Running Time for Creating Sample (f=1%, z=2)'44eee >H@H HH 7 SummaryInformation(DocumentSummaryInformation84_1047883157!F௳f௳fOle H\t  viveknar Vivek NarasayyaMicrosoft Excel@[M@9X՜.+,0 PXp x Microsoft Corp. selGB_20_100_0.resAdHocChart6  WorksheetsCharts     !#$%&),-.012369:;=>?@CHMPSVY\]`cdgjmrwz{| !FMicrosoft Excel ChartBiff8Excel.Chart.89qOh+'0@H\t  viveknar Vivek NarasayyaMicrosoft Excel@[M@'CompObjbObjInfoWorkbook$SummaryInformation(      !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklopTStuvwxyz{|}~ @\pVivek Narasayya Ba= =9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1.Times New Roman1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)"Yes";"Yes";"No""True";"True";"False""On";"On";"Off"                + ) , *     $  P 8 P x " `6Chart2#selGB_20_100_0.resAdHoc`iZR3  @@   SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100PURE GBCONGNEG CORR (Test Set)POS CORR (Test Set)z=2 Running Time W-SEL-100W-GB-100TaggingSamplingf=1%, z=2, W-SEL-GB, SUMMSSalesSUM (Test Set)SUM (Training Set) L2 metric" a(  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` hm` hn` ho` hp?3d23 M NM4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: OTLIDXQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_4E4D$% MP+3O&pQ4$% MP+3O&oQ4FAj|3O)? 3 b#M43*#M! M4% 5 M3O8&nQ Workload'4% nMZ3Ol&nQ (Running time (sec)'4523  NM43"  _3O `% Mp73OQ4444% S5M3Ox&mQ .Cost of Tagging (z=2)'44e W-SEL-100 W-SEL-100 W-SEL-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100W-GB-100W-GB-100W-GB-100e@@@d@d@$@@@@e>  @  dMbP?_*+%M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??U} I }  }  }  ````````` ` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA&   _        _           _~ $@zPPVn?/H?Cp?;]?~ $@9?8d˩?h4 ;?n!a?_~ 4@ҌE?LT?$ӡn?ިo?~ 4@1zn+?\u??ލA?ݰm?_~ ?P&?!!?ދ?C+j?~ ? ?T?g?9?~ ? ĭ?PC?֌ ra?*b?~ ?Xm_u?J\߇?o?, ƈ?~ @"p?'.+=?2Pl?Ωd?~ @^hHK?`;O?.KR?4fS?~ @OYM]? ?gA(?ْU?~ @PۆQ?Ɋ?s(CUL?@"?~ $@"J %?镲 q?\7V?$I?~ $@/kb?{24?P3ͅ?ɍ"k ?   &    "~ $@bL{)?2=a?}vuŌ?ˠDt?"~ 4@.&????Eb?"~ ? v?ߢ?|DL$zY?ɐck?"~ ?9τ&?Zc!?yTE?!!3?  ~ @f+/?O9&?Nwx6?yrM?   ~ @OYM]?@3?Nwx?Ӻ ?    ~ $@@I?}͑?מY?Է?~ $@rsB??N?2Yх?:0BTDXNXPN>ttPh||| `!`"`#`$`%`-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`" ~ 4@ ib ? 7O,? vnT ? 6jA?!uCU?!x=jc?!VcEl?""~ "?"Yy?"Jͫ?"6Ĉ?"5'.?"#~ #@#̩M?#Hn)?#1p(P?#6W?/ފ5?"/~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?0 ||||||Ph|VVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV``a`b`c`d`e`f`g`h`l`m`n`o`p`q`r`s`t`` ` ` ` `a a a a a a~ b$@bog_y?bt?b?bM^i?bk3 ?~ c4@c?ci^`V?cC?cpa?cr?~ d?dGL ?dTO?d a*V?dnJy?dEИ?~ e?efH?e-ex?eRG?eŭ?eB?~ f@f  R?f Rr%?fA !?f!<8b-?f3?~ g@g^SH?gd]?g3O>g bG?g;?~ h$@h%\#?hZc!?h_>Y1\>h'2?h3Y?l l l l lm m m m m m~ n$@nA?n|ԛ?n>?ni&?nwٯ;?~ o4@o'p?oN?o|?o/.Ui?o~Ϛ?~ p?pYe?p5?p+ٱA?p Pj?p>9 ?~ q?q5؀?q2?qy76?q$F?qn2d?r@PL@rt|8c?r6w\&?r?r)Wx?~ s@sݔZ ?s j֍?sZQf?soe2?st_lW?~ t$@tؼZ`?t9?t'eRC>t^?tI)?(2TBPhhhhhhhBPhhhh^h````````````````````````              ~ $@%z?x҆?txH?N ^?    ~ 4@}?s?*.?5x_? ~ $@IDA?fd?_,?N ^?~ ?l\?Ȳ`?4?    ~ ?g?\Z {?DKO?6? ~ $@) h?5#?R8ߡ?E&?~ @ ??B˺?Qv0b? ~ 4@=?V?k)?ʤ6?~ @TTJg??l g?0*D? ~ ?M(#?|?5^?A C?dwW?~ $@ t?Ӝ?4?#G? ~ ?Ϲ[?ڱ?P,cy?"?" ~ @9`?ŐL*?n?gx?" ~ @9??EИ?$(~k?o?" ~ $@{O崧?e?F =b?JaL?4>ZZ~|DDh||```````````````````````` (            *@.@@H@@=@ *d@.@$@L@d@=@ *@.@@J@@=@&        @d@@  @d@@ @d@@ .@.@.@  @$@@  @$@@" M@@S@Q@"  @d@@" B@B@B@" (    "~ 9@"k? $>?b ?0!?  ~ I@W? (G`?gA(?uHg`?    ~ Y@K."?}A?Ĵo?I? ~ 9@3ۃ?פ`?γ?ti?~ i@x^*6u?!g? ?E`o`? ~ I@g?b?9*7QKs?IaL?~ y@4?V?t5=((?"070? ~ Y@nt?ZӼc?Ĵo?;KTo?~ @om?ʉv?JPB?m? ~ i@n!a?vۅ:?'3? x#?" ~ y@8L?QH2w??!3?/HM?" ~ @%A ?Wel?i2 ?Q0c ?4l Dl^^^T|^Z^ZDh|``@``,`,`,`,`,`,```,`,`,`,`,`,```,`,        ~ 4@n?S?;On?tV?~ ?Zd;?~  R@L7A`?/$?~ ?V-?&1? +?"~?~ @|?5^?S?bX9?Zd;?~ @v?J +?~ =@V-?~ $@n?S?Cl?x?      ~ 4@S㥛?~jt?~jt?~  T@~ ?"~?K7?%C?S?~ ?MbX9?Zd;?7A`?S㥛?~ @Zd;O?"~?w/?d;O?~ @x&1?~ ?/$?ʡE?~ $@oʡ?uV?Mb?~jt?      4@X@V@n?ˡE?~ ?? +?V-?B`"?.((BVRVVRV(BRVVVRV(B@`,`,`,`,``~ ?#~j?K?v/?~jt?~ @V-?rh|?Mb?v/?~ @rh|?rh|?y&1|?Zd;?~ $@v/?S㥛?~ ?Mb?~ @'?͎T?J" ?*nb?~ @9`?ŐL*?n?gx?xdVVVRV x (  v  <NMM? ]`  @"??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`  @"??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`  @"??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`X  @"X??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`H  @"H??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee ~v  <NMM?_ n]`  @"??3` %` &` '` (` )` *)3d23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D $% MP+3O&'Q4$% MP+3O&(Q4FA(M3O 3 b#M43*#M! M4% b JUM3Oj&)Q &Sampling Rate (%)'4% r.MZ3O&&*Q BAvg. Relative Error (L1 Metric)'4523  NM43" Y &3OY &% Mp73O&%Q4444% DN" lM3O(&&Q >Error Vs Sampling Rate: COUNT Aggregate W-GB-100 Test Set, Z=2'44eee ~v  <NMM?o- ~]`0  @"0??3` v+` v,` v-` v.` v/` v0+3d2L23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% MP+3O&/Q4$% MP+3O&.Q4FA  3O.B r 3 b#M43*#M! M4%  ?UM3Oj&,Q &Sampling Rate (%)'4% qtMZ3O&&-Q BAvg. Relative Error (L1 Metric)'4523  NM43" a 3Oa % Mp73O&0Q4444% ^N.M3O$&+Q |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44e????????????????????@@@@@@@@@@$@$@$@$@$@eA?|ԛ?>?i&?wٯ;?'p?N?|?/.Ui?~Ϛ?Ye?5?+ٱA? Pj?>9 ?5؀?2?y76?$F?n2d?Q?t|8c?6w\&??)Wx?ݔZ ? j֍?ZQf?oe2?t_lW?ؼZ`?9?'eRC>^?I)?e ~v   <NMM? ] `   @" ??3` v2` v3` v4` v5` v6` v7п;3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&6Q4$% MP+3O&5Q4FA+9 3O7|  3 b#M43*#M! M4%  MM3Of&3Q &Sampling Rate (%)'4% kIMZ3O&4Q 8Relative Error (L1 Metric)'4523  NM43"  u3O t% Mp73O&7Q4444% O;M3O$&2Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-NEG Test Set, z=2)'44e????????????????@@@@@@@@$@$@$@$@e%z?x҆?txH?N ^?}?s?*.?5x_?l\?R 3OCn / 3 b#M43*#M! M4%  FM3Oj&9Q &Sampling Rate (%)'4% lydMZ3O&:Q 8Relative Error (L1 Metric)'4523  NM43"  f3O % Mp73O&=Q4444% ]J3M3O&&8Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-POS Test Set, z=2)'44eee xp   6NMM?p ] ``   @"` ??3` v1` v>` v?` v@` vA?3d23 M NM4 3Q ;  WSAMP TaggingQ ;Q ;Q3_4E4 3Q ;  WSAMP SamplingQ ;Q ;Q3_4E4 3Q ;  STRAT TaggingQ ;Q ;Q3_4E4 3Q ;  STRAT SamplingQ ;Q ;Q3_4E4 3Q ;  OTLIDX TaggingQ ;Q ;Q3_4E4 3Q ; "OTLIDX SamplingQ ;Q ;Q3_4E4D $% MP+3O&AQ4$% MP+3O&@Q4FAT;3O=  3 b#M43*#M! M4%  7KM3O;&>Q Workload'4% j:MZ3Oq&>Q (Running Time (sec)'4523  NM43"  +3O +% Mp73O&?Q4444% K ZM3OA(&1Q x:Comparison of Running Time for Creating Sample (f=1%, z=2)'44eee ~v   <NMM?P ] `   @" ??3` JB` C` D` JEA3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&EQ4$% MP+3O&DQ4FAc k3OE 3 b#M43*#M! M4% x u@M3O(&CQ R'Number of queries in workload W-SEL-GB'4% i MZ3O6&CQ L1 Error'4523  NM43" k t3Ok s% Mp73OQ4444% V M3O0&BQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44eee ~v   <NMM?0@] `H   @"H ??3` hF` hG` hH` I` JQ3dh23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&HQ4$% MP+3O&IQ4FAF= 3O5  3 b#M43*#M! M4%  CIM3Ox&JQ &Sampling Rate (%)'4% dMZ3Oj&JQ &Error (L1 metric)'4523  NM43"  w3O % Mp73O&FQ4444% V ;M3O(&GQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-NEG-100, Test Set)'44e????????????????@@@@@@@@$@$@$@$@eIDA?fd?_,?N ^?IP?CA|?v稣?5x_?? t ?w-?Dͩd?%]3f?lwN?T-q?ak?9~߿y?^ؚ?J" ?Wm?3O % Mp73O&OQ4444% VM3O$&KQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-POS-100, Test Set)'44eee ~v  <NMM? M]`   @" ??3` hQ` hR` hS` T` UQ3d6{23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&SQ4$% MP+3O&TQ4FAg< 3O_ 3 b#M43*#M! M4% K WM3O&UQ &Sampling Rate (%)'4% R~MZ3O*f&UQ :Relative Error (L1 metric)'4523  NM43"  w3O % Mp73O&RQ4444% )VW ;M3O(&QQ KError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Test Set)'44e????????????@@@@@@@@$@$@$@$@eS㥛?~jt?~jt?(\?"~?K7?%C?S?MbX9?Zd;?7A`?S㥛?Zd;O?"~?w/?d;O?x&1??/$?ʡE?oʡ?uV?Mb?~jt?e ~v  <NMM?f i]`x   @"x ??3` hV` hW` hX` hYQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&YQ4$% MP+3O&XQ4FAY 3O  3 b#M43*#M! M4%  :M3Oj&WQ &Sampling Rate (%)'4% RYMZ3O&Q&WQ :Relative Error (L1 metric)'4523  NM43"  >3O % Mp73OQ4444% )VW ;M3O(&VQ OError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Training Set)'44e????????????@@@@@@@@$@$@$@$@eGz?Q?n?ˡE?? +?V-?B`"?#~j?K?v/?~jt?V-?rh|?Mb?v/?rh|?rh|?y&1|?Zd;?v/?S㥛?{Gzt?Mb?e ~v  <NMM? xx]`   @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` hZ` h[` h\` h]` h^Q3d]23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&^Q4$% MP+3O&]Q4FA` 3Oq  3 b#M43*#M! M4% K CIM3Ox&\Q &Sampling Rate (%)'4% @lMZ3O([&\Q :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&ZQ4444% FV M3O80&[Q FError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100, z=2, Test Set)'44eee ~v  <NMM?@P+]`  @"??3` h_` ha` hb` hc` hdQ3d23 M NM4 3Q: USAMPQ ;%Q ;%Q3_4E4 3Q: WSAMPQ ;%Q ;%Q3_4E4 3Q: STRATQ ;%Q ;%Q3_  NM  d4E4 3Q: OTLIDXQ ;%Q ;%Q3_  NM  Nd4E4D$% MP+3O&aQ4$% MP+3O&dQ4FA 3O. 3 b#M43*#M! M4% f+ CIM3Ox&cQ &Sampling Rate (%)'4% -lMZ3O([&cQ :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&_Q4444% 4VB M3O=2&bQ CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100,z=2, Test Set)'44eee ~v  <NMM? `pZ]`  @"??3` `` he` hf` hg` hhQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&hQ4$% MP+3O&gQ4FA{ 3O_ 3 b#M43*#M! M4% L M3O$&fQ \,Number of queries in training set (W-SEL-GB)'4% gM Z3O&`Q 8Relative Error (L1 Metric)'4523  NM43"  ?>3O >% Mp73OQ4444% 1V?M3O&&eQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44e9@9@9@9@I@I@I@I@Y@Y@Y@Y@i@i@i@i@y@y@y@y@@@@@e3ۃ?פ`?γ?ti?g?b?9*7QKs?IaL?nt?ZӼc?Ĵo?;KTo?n!a?vۅ:?'3? x#?8L?QH2w??!3?/HM?%A ?Wel?i2 ?Q0c ?e xp  6NMM?`p]`  @"??3` hi` hj` hk` hlQ3d23 M NM4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: OTLIDXQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_4E4D $% MP+3O&iQ4$% MP+3O&jQ4FAFgL 3O5] 3 b#M43*#M! M4% I OIM3OA&lQ Workload'4% dMZ3Oz&lQ (Running time (sec)'4523  NM43"  k3O k% Mp73OQ4444% VM3O&kQ .Cost of Tagging (z=2)'44eee >@7 DocumentSummaryInformation84_1047882966!F<ųf<ųfOle  CompObjb՜.+,0 PXp x Microsoft Corp. selGB_20_100_0.resAdHocChart2  WorksheetsCharts !FMicrosoft Excel ChartBiff8Excel.Chart.89qObjInfoWorkbooks¿SummaryInformation(DocumentSummaryInformation84Oh+'0@H\t  viveknar Vivek NarasayyaMicrosoft Excel@[M@?@ABCDEFGHIJKLMNOPQR8 U9 YZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @\pVivek Narasayya Ba= =)9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1.Times New Roman1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)"Yes";"Yes";"No""True";"True";"False""On";"On";"Off"                + ) , *     $  P 8 P x "  A `Chart4'selGB_20_100_0.resAdHoc`iZR3  @@   SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100PURE GBCONGNEG CORR (Test Set)POS CORR (Test Set)z=2 Running Time W-SEL-100W-GB-100TaggingSamplingf=1%, z=2, W-SEL-GB, SUMMSSalesSUM (Test Set)SUM (Training Set) L2 metric"V aa  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` hr` s` t` hu` hv@@3deL23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&uQ4$% MP+3O&tQ4FA-Z3O 3 b#M43*#M! M4% 3@M3Oz&sQ .Sampling Fraction (%)'4%  OMZ3O$N&sQ :Relative Error (L1 metric)'4523  NM43" z 3Oz % Mp73O&vQ4444% % M3O3&rQ OError Vs. Sampling Fraction: SUM Aggregate (Real data set, Workload = Test Set)'44e????????????@@@@@@@@$@$@$@$@eS㥛?~jt?~jt?(\?"~?K7?/$?S?MbX9?Zd;?K7A?S㥛?Zd;O?"~?/$?d;O?x&1??rh|?ʡE?oʡ?uV?MbX9?~jt?e> ~j @  dMbP?_*+%M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??U} I }  }  }  ````````` ` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA&   _        _           _~ $@zPPVn?/H?Cp?;]?~ $@9?8d˩?h4 ;?n!a?_~ 4@ҌE?LT?$ӡn?ިo?~ 4@1zn+?\u??ލA?ݰm?_~ ?P&?!!?ދ?C+j?~ ? ?T?g?9?~ ? ĭ?PC?֌ ra?*b?~ ?Xm_u?J\߇?o?, ƈ?~ @"p?'.+=?2Pl?Ωd?~ @^hHK?`;O?.KR?4fS?~ @OYM]? ?gA(?ْU?~ @PۆQ?Ɋ?s(CUL?@"?~ $@"J %?镲 q?\7V?$I?~ $@/kb?{24?P3ͅ?ɍ"k ?   &    "~ $@bL{)?2=a?}vuŌ?ˠDt?"~ 4@.&????Eb?"~ ? v?ߢ?|DL$zY?ɐck?"~ ?9τ&?Zc!?yTE?!!3?  ~ @f+/?O9&?Nwx6?yrM?   ~ @OYM]?@3?Nwx?Ӻ ?    ~ $@@I?}͑?מY?Է?~ $@rsB??N?2Yх?:0BTDXNXPN>ttPh||| `!`"`#`$`%`-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`" ~ 4@ ib ? 7O,? vnT ? 6jA?!uCU?!x=jc?!VcEl?""~ "?"Yy?"Jͫ?"6Ĉ?"5'.?"#~ #@#̩M?#Hn)?#1p(P?#6W?/ފ5?"/~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?0 ||||||Ph|VVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV``a`b`c`d`e`f`g`h`l`m`n`o`p`q`r`s`t`` ` ` ` `a a a a a a~ b$@bog_y?bt?b?bM^i?bk3 ?~ c4@c?ci^`V?cC?cpa?cr?~ d?dGL ?dTO?d a*V?dnJy?dEИ?~ e?efH?e-ex?eRG?eŭ?eB?~ f@f  R?f Rr%?fA !?f!<8b-?f3?~ g@g^SH?gd]?g3O>g bG?g;?~ h$@h%\#?hZc!?h_>Y1\>h'2?h3Y?l l l l lm m m m m m~ n$@nA?n|ԛ?n>?ni&?nwٯ;?~ o4@o'p?oN?o|?o/.Ui?o~Ϛ?~ p?pYe?p5?p+ٱA?p Pj?p>9 ?~ q?q5؀?q2?qy76?q$F?qn2d?r@PL@rt|8c?r6w\&?r?r)Wx?~ s@sݔZ ?s j֍?sZQf?soe2?st_lW?~ t$@tؼZ`?t9?t'eRC>t^?tI)?(2TBPhhhhhhhBPhhhh^h````````````````````````              ~ $@%z?x҆?txH?N ^?    ~ 4@}?s?*.?5x_? ~ $@IDA?fd?_,?N ^?~ ?l\?Ȳ`?4?    ~ ?g?\Z {?DKO?6? ~ $@) h?5#?R8ߡ?E&?~ @ ??B˺?Qv0b? ~ 4@=?V?k)?ʤ6?~ @TTJg??l g?0*D? ~ ?M(#?|?5^?A C?dwW?~ $@ t?Ӝ?4?#G? ~ ?Ϲ[?ڱ?P,cy?"?" ~ @9`?ŐL*?n?gx?" ~ @9??EИ?$(~k?o?" ~ $@{O崧?e?F =b?JaL?4>ZZ~|DDh||```````````````````````` (            *@.@@H@@=@ *d@.@$@L@d@=@ *@.@@J@@=@&        @d@@  @d@@ @d@@ .@.@.@  @$@@  @$@@" M@@S@Q@"  @d@@" B@B@B@" (    "~ 9@"k? $>?b ?0!?  ~ I@W? (G`?gA(?uHg`?    ~ Y@K."?}A?Ĵo?I? ~ 9@3ۃ?פ`?γ?ti?~ i@x^*6u?!g? ?E`o`? ~ I@g?b?9*7QKs?IaL?~ y@4?V?t5=((?"070? ~ Y@nt?ZӼc?Ĵo?;KTo?~ @om?ʉv?JPB?m? ~ i@n!a?vۅ:?'3? x#?" ~ y@8L?QH2w??!3?/HM?" ~ @%A ?Wel?i2 ?Q0c ?4l Dl^^^T|^Z^ZDh|``@``,`,`,`,`,`,```,`,`,`,`,`,```,`,        ~ 4@n?S?;On?tV?~ ?Zd;?~  R@L7A`?/$?~ ?V-?&1? +?"~?~ @|?5^?S?bX9?Zd;?~ @v?J +?~ =@V-?~ $@n?S?Cl?x?      ~ 4@S㥛?~jt?~jt?~  T@~ ?"~?K7?/$?S?~ ?MbX9?Zd;?K7A?S㥛?~ @Zd;O?"~?/$?d;O?~ @x&1?~ ?rh|?ʡE?~ $@oʡ?uV?MbX9?~jt?      4@X@V@n?ˡE?~ ?? +?2w-!?B`"?.((BVRVVRV(BRVVVRV(B@`,`,`,`,```````~ ?#~j?K?9v?~jt?~ @V-?rh|?Mb?v/?~ @rh|?rh|?y&1|?Zd;?~ $@v/?S㥛?~ ?Mb?    ~ 唀?͎T?ʃ9?bc?~ 4@S ?>?O?'*V?~ D@v=T?'vU?'?m3?~ N@Z`?%9?Yk(?Z?~ T@M(D?D?qS?.o?~ ?9`?ŐL*?n?gx?vVVVRBVVVVV  (  v  <NMM? ]`,(  @",(??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`|(  @"|(??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`l)  @"l)??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`)  @")??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`*  @"*??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee ~v  <NMM?_ n]`*  @"*??3` %` &` '` (` )` *)3d23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D $% MP+3O&'Q4$% MP+3O&(Q4FA(M3O 3 b#M43*#M! M4% b JUM3Oj&)Q &Sampling Rate (%)'4% r.MZ3O&&*Q BAvg. Relative Error (L1 Metric)'4523  NM43" Y &3OY &% Mp73O&%Q4444% DN" lM3O(&&Q >Error Vs Sampling Rate: COUNT Aggregate W-GB-100 Test Set, Z=2'44eee ~v  <NMM?o- ~]`+  @"+??3` v+` v,` v-` v.` v/` v0+3d2L23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% MP+3O&/Q4$% MP+3O&.Q4FA  3O.B r 3 b#M43*#M! M4%  ?UM3Oj&,Q &Sampling Rate (%)'4% qtMZ3O&&-Q BAvg. Relative Error (L1 Metric)'4523  NM43" a 3Oa % Mp73O&0Q4444% ^N.M3O$&+Q |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44eee ~v   <NMM? ] `,  @",??3` v2` v3` v4` v5` v6` v7п;3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&6Q4$% MP+3O&5Q4FA+9 3O7|  3 b#M43*#M! M4%  MM3Of&3Q &Sampling Rate (%)'4% kIMZ3O&4Q 8Relative Error (L1 Metric)'4523  NM43"  u3O t% Mp73O&7Q4444% O;M3O$&2Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-NEG Test Set, z=2)'44eee ~v   <NMM?0Z P] `,  @",??3` v8` v9` v:` v;` v<` v=?:3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&<Q4$% MP+3O&;Q4FA^>R 3OCn / 3 b#M43*#M! M4%  FM3Oj&9Q &Sampling Rate (%)'4% lydMZ3O&:Q 8Relative Error (L1 Metric)'4523  NM43"  f3O % Mp73O&=Q4444% ]J3M3O&&8Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-POS Test Set, z=2)'44eee xp   6NMM?p ] `-  @"-??3` v1` v>` v?` v@` vA?3d23 M NM4 3Q ;  WSAMP TaggingQ ;Q ;Q3_4E4 3Q ;  WSAMP SamplingQ ;Q ;Q3_4E4 3Q ;  STRAT TaggingQ ;Q ;Q3_4E4 3Q ;  STRAT SamplingQ ;Q ;Q3_4E4 3Q ;  OTLIDX TaggingQ ;Q ;Q3_4E4 3Q ; "OTLIDX SamplingQ ;Q ;Q3_4E4D $% MP+3O&AQ4$% MP+3O&@Q4FAT;3O=  3 b#M43*#M! M4%  7KM3O;&>Q Workload'4% j:MZ3Oq&>Q (Running Time (sec)'4523  NM43"  +3O +% Mp73O&?Q4444% K ZM3OA(&1Q x:Comparison of Running Time for Creating Sample (f=1%, z=2)'44eee ~v   <NMM?P ] `.  @".??3` JB` C` D` JEA3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&EQ4$% MP+3O&DQ4FAc k3OE 3 b#M43*#M! M4% x u@M3O(&CQ R'Number of queries in workload W-SEL-GB'4% i MZ3O6&CQ L1 Error'4523  NM43" k t3Ok s% Mp73OQ4444% V M3O0&BQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44eee ~v   <NMM?0@] `.  @".??3` hF` hG` hH` I` JQ3dh23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&HQ4$% MP+3O&IQ4FAF= 3O5  3 b#M43*#M! M4%  CIM3Ox&JQ &Sampling Rate (%)'4% dMZ3Oj&JQ &Error (L1 metric)'4523  NM43"  w3O % Mp73O&FQ4444% V ;M3O(&GQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-NEG-100, Test Set)'44eee ~v  <NMM?]`/  @"/??3` hK` hL` hM` hN` hOQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&NQ4$% MP+3O&MQ4FA4 3O,x R 3 b#M43*#M! M4% A :M3Oj&LQ &Sampling Rate (%)'4% dxMZ3Ob&LQ &Error (L1 metric)'4523  NM43"  >3O % Mp73O&OQ4444% VM3O$&KQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-POS-100, Test Set)'44eee ~v  <NMM? M]`/  @"/??3` hQ` hR` hS` T` UQ3d6{23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&SQ4$% MP+3O&TQ4FAg< 3O_ 3 b#M43*#M! M4% K WM3O&UQ &Sampling Rate (%)'4% R~MZ3O*f&UQ :Relative Error (L1 metric)'4523  NM43"  w3O % Mp73O&RQ4444% )VW ;M3O(&QQ KError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Test Set)'44eee ~v  <NMM?f i]`0  @"0??3` hV` hW` hX` hYQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&YQ4$% MP+3O&XQ4FAY 3O  3 b#M43*#M! M4%  :M3Oj&WQ &Sampling Rate (%)'4% RYMZ3O&Q&WQ :Relative Error (L1 metric)'4523  NM43"  >3O % Mp73OQ4444% )VW ;M3O(&VQ OError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Training Set)'44eee ~v  <NMM? xx]`,1  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` hZ` h[` h\` h]` h^Q3d]23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&^Q4$% MP+3O&]Q4FA` 3Oq  3 b#M43*#M! M4% K CIM3Ox&\Q &Sampling Rate (%)'4% @lMZ3O([&\Q :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&ZQ4444% FV M3O80&[Q FError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100, z=2, Test Set)'44eee ~v  <NMM?@P+]`2  @"2??3` h_` ha` hb` hc` hdQ3d23 M NM4 3Q: USAMPQ ;%Q ;%Q3_4E4 3Q: WSAMPQ ;%Q ;%Q3_4E4 3Q: STRATQ ;%Q ;%Q3_  NM  d4E4 3Q: OTLIDXQ ;%Q ;%Q3_  NM  Nd4E4D$% MP+3O&aQ4$% MP+3O&dQ4FA 3O. 3 b#M43*#M! M4% f+ CIM3Ox&cQ &Sampling Rate (%)'4% -lMZ3O([&cQ :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&_Q4444% 4VB M3O=2&bQ CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100,z=2, Test Set)'44eee ~v  <NMM? `pZ]`l2  @"l2??3` `` he` hf` hg` hhQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&hQ4$% MP+3O&gQ4FA{ 3O_ 3 b#M43*#M! M4% L M3O$&fQ \,Number of queries in training set (W-SEL-GB)'4% gM Z3O&`Q 8Relative Error (L1 Metric)'4523  NM43"  ?>3O >% Mp73OQ4444% 1V?M3O&&eQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44eee xp  6NMM?`p]`\3  @"\3??3` hk` hl` hm` hnQ3d23 M NM4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: OTLIDXQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_4E4D $% MP+3O&kQ4$% MP+3O&lQ4FAFgL 3O5] 3 b#M43*#M! M4% I OIM3OA&nQ Workload'4% dMZ3Oz&nQ (Running time (sec)'4523  NM43"  k3O k% Mp73OQ4444% VM3O&mQ .Cost of Tagging (z=2)'44eee xp  6NMM?p ]`3  @"3??3` hi` hj` ho` hp` hqQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&qQ4$% MP+3O&pQ4FAj{ 3OYu S 3 b#M43*#M! M4% ) ;M 3O%(&oQ GDegree of overlap in condition ranges between test set and training set'4% lMZ3O([&oQ :Relative Error (L1-Metric)'4523  NM43"  w3O % Mp73O&iQ4444% VC M3O!0&jQ ?Error vs. Correlation: COUNT aggregate (z=2, f=1%,W-SEL-GB-100)'44eee >@7 selGB_20_100_0.resAdHocChart4  WorksheetsCharts !FMicrosoft Excel ChartBiff8Excel.Chart.89qOh+'0@H\t  _1047882971!FOسfOسfOle CompObjbObjInfoWorkbookXSummaryInformation(DocumentSummaryInformation8"4_1047883789!F@Wf@Wf                           ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 :   > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~  @\pVivek Narasayya Ba=,E =Z9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1.Times New Roman1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1sArial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)"Yes";"Yes";"No""True";"True";"False""On";"On";"Off"                + ) , *     $  P 8 P x "  A `Chart5z'selGB_20_100_0.resAdHoc`iZR3  @@   SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100PURE GBCONGNEG CORR (Test Set)POS CORR (Test Set)z=2 Running Time W-SEL-100W-GB-100TaggingSamplingf=1%, z=2, W-SEL-GB, SUMMSSalesSUM (Test Set)SUM (Training Set) L2 metric"8 aC  @M\\research\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` hr` hs` ht` hu3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&uQ4$% MP+3O&tQ4FAr3OL! 8 3 b#M43*#M! M4% Y GM3Of&sQ &Sampling Rate (%)'4% jMZ3O$N&sQ :Relative Error (L1 metric)'4523  NM43"  w3O % Mp73OQ4444% N M3O$&rQ OError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Training Set)'44e????????????@@@@@@@@$@$@$@$@eGz?Q?n?ˡE?? +?2w-!?B`"?#~j?K?9v?~jt?V-?rh|?Mb?v/?rh|?rh|?y&1|?Zd;?v/?S㥛?{Gzt?Mb?e> M @  dMbP?_*+%M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??U} I }  }  }  ````````` ` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA&   _        _           _~ $@zPPVn?/H?Cp?;]?~ $@9?8d˩?h4 ;?n!a?_~ 4@ҌE?LT?$ӡn?ިo?~ 4@1zn+?\u??ލA?ݰm?_~ ?P&?!!?ދ?C+j?~ ? ?T?g?9?~ ? ĭ?PC?֌ ra?*b?~ ?Xm_u?J\߇?o?, ƈ?~ @"p?'.+=?2Pl?Ωd?~ @^hHK?`;O?.KR?4fS?~ @OYM]? ?gA(?ْU?~ @PۆQ?Ɋ?s(CUL?@"?~ $@"J %?镲 q?\7V?$I?~ $@/kb?{24?P3ͅ?ɍ"k ?   &    "~ $@bL{)?2=a?}vuŌ?ˠDt?"~ 4@.&????Eb?"~ ? v?ߢ?|DL$zY?ɐck?"~ ?9τ&?Zc!?yTE?!!3?  ~ @f+/?O9&?Nwx6?yrM?   ~ @OYM]?@3?Nwx?Ӻ ?    ~ $@@I?}͑?מY?Է?~ $@rsB??N?2Yх?:0BTDXNXPN>ttPh||| `!`"`#`$`%`-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`" ~ 4@ ib ? 7O,? vnT ? 6jA?!uCU?!x=jc?!VcEl?""~ "?"Yy?"Jͫ?"6Ĉ?"5'.?"#~ #@#̩M?#Hn)?#1p(P?#6W?/ފ5?"/~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?0 ||||||Ph|VVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV``a`b`c`d`e`f`g`h`l`m`n`o`p`q`r`s`t`` ` ` ` `a a a a a a~ b$@bog_y?bt?b?bM^i?bk3 ?~ c4@c?ci^`V?cC?cpa?cr?~ d?dGL ?dTO?d a*V?dnJy?dEИ?~ e?efH?e-ex?eRG?eŭ?eB?~ f@f  R?f Rr%?fA !?f!<8b-?f3?~ g@g^SH?gd]?g3O>g bG?g;?~ h$@h%\#?hZc!?h_>Y1\>h'2?h3Y?l l l l lm m m m m m~ n$@nA?n|ԛ?n>?ni&?nwٯ;?~ o4@o'p?oN?o|?o/.Ui?o~Ϛ?~ p?pYe?p5?p+ٱA?p Pj?p>9 ?~ q?q5؀?q2?qy76?q$F?qn2d?r@PL@rt|8c?r6w\&?r?r)Wx?~ s@sݔZ ?s j֍?sZQf?soe2?st_lW?~ t$@tؼZ`?t9?t'eRC>t^?tI)?(2TBPhhhhhhhBPhhhh^h````````````````````````              ~ $@%z?x҆?txH?N ^?    ~ 4@}?s?*.?5x_? ~ $@IDA?fd?_,?N ^?~ ?l\?Ȳ`?4?    ~ ?g?\Z {?DKO?6? ~ $@) h?5#?R8ߡ?E&?~ @ ??B˺?Qv0b? ~ 4@=?V?k)?ʤ6?~ @TTJg??l g?0*D? ~ ?M(#?|?5^?A C?dwW?~ $@ t?Ӝ?4?#G? ~ ?Ϲ[?ڱ?P,cy?"?" ~ @9`?ŐL*?n?gx?" ~ @9??EИ?$(~k?o?" ~ $@{O崧?e?F =b?JaL?4>ZZ~|DDh||```````````````````````` (            *@.@@H@@=@ *d@.@$@L@d@=@ *@.@@J@@=@&        @d@@  @d@@ @d@@ .@.@.@  @$@@  @$@@" M@@S@Q@"  @d@@" B@B@B@" (    "~ 9@"k? $>?b ?0!?  ~ I@W? (G`?gA(?uHg`?    ~ Y@K."?}A?Ĵo?I? ~ 9@3ۃ?פ`?γ?ti?~ i@x^*6u?!g? ?E`o`? ~ I@g?b?9*7QKs?IaL?~ y@4?V?t5=((?"070? ~ Y@nt?ZӼc?Ĵo?;KTo?~ @om?ʉv?JPB?m? ~ i@n!a?vۅ:?'3? x#?" ~ y@8L?QH2w??!3?/HM?" ~ @%A ?Wel?i2 ?Q0c ?4l Dl^^^T|^Z^ZDh|``@``,`,`,`,`,`,```,`,`,`,`,`,```,`,        ~ 4@n?S?;On?tV?~ ?Zd;?~  R@L7A`?/$?~ ?V-?&1? +?"~?~ @|?5^?S?bX9?Zd;?~ @v?J +?~ =@V-?~ $@n?S?Cl?x?      ~ 4@S㥛?~jt?~jt?~  T@~ ?"~?K7?/$?S?~ ?MbX9?Zd;?K7A?S㥛?~ @Zd;O?"~?/$?d;O?~ @x&1?~ ?rh|?ʡE?~ $@oʡ?uV?MbX9?~jt?      4@X@V@n?ˡE?~ ?? +?2w-!?B`"?.((BVRVVRV(BRVVVRV(B@`,`,`,`,```````~ ?#~j?K?9v?~jt?~ @V-?rh|?Mb?v/?~ @rh|?rh|?y&1|?Zd;?~ $@v/?S㥛?~ ?Mb?    ~ 唀?͎T?ʃ9?bc?~ 4@S ?>?O?'*V?~ D@v=T?'vU?'?m3?~ N@Z`?%9?Yk(?Z?~ T@M(D?D?qS?.o?~ ?9`?ŐL*?n?gx?vVVVRBVVVVV  (  v  <NMM? ]`\:  @"\:??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`:  @":??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`;  @";??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`;  @";??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`<  @"<??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee ~v  <NMM?_ n]`,=  @",=??3` %` &` '` (` )` *)3d23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D $% MP+3O&'Q4$% MP+3O&(Q4FA(M3O 3 b#M43*#M! M4% b JUM3Oj&)Q &Sampling Rate (%)'4% r.MZ3O&&*Q BAvg. Relative Error (L1 Metric)'4523  NM43" Y &3OY &% Mp73O&%Q4444% DN" lM3O(&&Q >Error Vs Sampling Rate: COUNT Aggregate W-GB-100 Test Set, Z=2'44eee ~v  <NMM?o- ~]`=  @"=??3` v+` v,` v-` v.` v/` v0+3d2L23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% MP+3O&/Q4$% MP+3O&.Q4FA  3O.B r 3 b#M43*#M! M4%  ?UM3Oj&,Q &Sampling Rate (%)'4% qtMZ3O&&-Q BAvg. Relative Error (L1 Metric)'4523  NM43" a 3Oa % Mp73O&0Q4444% ^N.M3O$&+Q |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44eee ~v   <NMM? ] `>  @">??3` v2` v3` v4` v5` v6` v7п;3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&6Q4$% MP+3O&5Q4FA+9 3O7|  3 b#M43*#M! M4%  MM3Of&3Q &Sampling Rate (%)'4% kIMZ3O&4Q 8Relative Error (L1 Metric)'4523  NM43"  u3O t% Mp73O&7Q4444% O;M3O$&2Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-NEG Test Set, z=2)'44eee ~v   <NMM?0Z P] `?  @"???3` v8` v9` v:` v;` v<` v=?:3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&<Q4$% MP+3O&;Q4FA^>R 3OCn / 3 b#M43*#M! M4%  FM3Oj&9Q &Sampling Rate (%)'4% lydMZ3O&:Q 8Relative Error (L1 Metric)'4523  NM43"  f3O % Mp73O&=Q4444% ]J3M3O&&8Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-POS Test Set, z=2)'44eee xp   6NMM?p ] `?  @"???3` v1` v>` v?` v@` vA?3d23 M NM4 3Q ;  WSAMP TaggingQ ;Q ;Q3_4E4 3Q ;  WSAMP SamplingQ ;Q ;Q3_4E4 3Q ;  STRAT TaggingQ ;Q ;Q3_4E4 3Q ;  STRAT SamplingQ ;Q ;Q3_4E4 3Q ;  OTLIDX TaggingQ ;Q ;Q3_4E4 3Q ; "OTLIDX SamplingQ ;Q ;Q3_4E4D $% MP+3O&AQ4$% MP+3O&@Q4FAT;3O=  3 b#M43*#M! M4%  7KM3O;&>Q Workload'4% j:MZ3Oq&>Q (Running Time (sec)'4523  NM43"  +3O +% Mp73O&?Q4444% K ZM3OA(&1Q x:Comparison of Running Time for Creating Sample (f=1%, z=2)'44eee ~v   <NMM?P ] `D@  @"D@??3` JB` C` D` JEA3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&EQ4$% MP+3O&DQ4FAc k3OE 3 b#M43*#M! M4% x u@M3O(&CQ R'Number of queries in workload W-SEL-GB'4% i MZ3O6&CQ L1 Error'4523  NM43" k t3Ok s% Mp73OQ4444% V M3O0&BQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44eee ~v   <NMM?0@] `@  @"@??3` hF` hG` hH` I` JQ3dh23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&HQ4$% MP+3O&IQ4FAF= 3O5  3 b#M43*#M! M4%  CIM3Ox&JQ &Sampling Rate (%)'4% dMZ3Oj&JQ &Error (L1 metric)'4523  NM43"  w3O % Mp73O&FQ4444% V ;M3O(&GQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-NEG-100, Test Set)'44eee ~v  <NMM?]`A  @"A??3` hK` hL` hM` hN` hOQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&NQ4$% MP+3O&MQ4FA4 3O,x R 3 b#M43*#M! M4% A :M3Oj&LQ &Sampling Rate (%)'4% dxMZ3Ob&LQ &Error (L1 metric)'4523  NM43"  >3O % Mp73O&OQ4444% VM3O$&KQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-POS-100, Test Set)'44eee ~v  <NMM? M]`B  @"B??3` hQ` hR` hS` T` UQ3d6{23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&SQ4$% MP+3O&TQ4FAg< 3O_ 3 b#M43*#M! M4% K WM3O&UQ &Sampling Rate (%)'4% R~MZ3O*f&UQ :Relative Error (L1 metric)'4523  NM43"  w3O % Mp73O&RQ4444% )VW ;M3O(&QQ KError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Test Set)'44eee ~v  <NMM?f i]` C  @" C??3` hV` hW` hX` hYQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&YQ4$% MP+3O&XQ4FAY 3O  3 b#M43*#M! M4%  :M3Oj&WQ &Sampling Rate (%)'4% RYMZ3O&Q&WQ :Relative Error (L1 metric)'4523  NM43"  >3O % Mp73OQ4444% )VW ;M3O(&VQ OError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Training Set)'44eee ~v  <NMM? xx]`\C  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` hZ` h[` h\` h]` h^Q3d]23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&^Q4$% MP+3O&]Q4FA` 3Oq  3 b#M43*#M! M4% K CIM3Ox&\Q &Sampling Rate (%)'4% @lMZ3O([&\Q :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&ZQ4444% FV M3O80&[Q FError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100, z=2, Test Set)'44eee ~v  <NMM?@P+]`LD  @"LD??3` h_` ha` hb` hc` hdQ3d23 M NM4 3Q: USAMPQ ;%Q ;%Q3_4E4 3Q: WSAMPQ ;%Q ;%Q3_4E4 3Q: STRATQ ;%Q ;%Q3_  NM  d4E4 3Q: OTLIDXQ ;%Q ;%Q3_  NM  Nd4E4D$% MP+3O&aQ4$% MP+3O&dQ4FA 3O. 3 b#M43*#M! M4% f+ CIM3Ox&cQ &Sampling Rate (%)'4% -lMZ3O([&cQ :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&_Q4444% 4VB M3O=2&bQ CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100,z=2, Test Set)'44eee ~v  <NMM? `pZ]`D  @"D??3` `` he` hf` hg` hhQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&hQ4$% MP+3O&gQ4FA{ 3O_ 3 b#M43*#M! M4% L M3O$&fQ \,Number of queries in training set (W-SEL-GB)'4% gM Z3O&`Q 8Relative Error (L1 Metric)'4523  NM43"  ?>3O >% Mp73OQ4444% 1V?M3O&&eQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44eee xp  6NMM?`p]`E  @"E??3` hk` hl` hm` hnQ3d23 M NM4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: OTLIDXQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_4E4D $% MP+3O&kQ4$% MP+3O&lQ4FAFgL 3O5] 3 b#M43*#M! M4% I OIM3OA&nQ Workload'4% dMZ3Oz&nQ (Running time (sec)'4523  NM43"  k3O k% Mp73OQ4444% VM3O&mQ .Cost of Tagging (z=2)'44eee xp  6NMM?p ]`E  @"E??3` hi` hj` ho` hp` hqQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&qQ4$% MP+3O&pQ4FAj{ 3OYu S 3 b#M43*#M! M4% ) ;M 3O%(&oQ GDegree of overlap in condition ranges between test set and training set'4% lMZ3O([&oQ :Relative Error (L1-Metric)'4523  NM43"  w3O % Mp73O&iQ4444% VC M3O!0&jQ ?Error vs. Correlation: COUNT aggregate (z=2, f=1%,W-SEL-GB-100)'44eee >@7 viveknar Vivek NarasayyaMicrosoft Excel@[M@ ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~  @\pVivek Narasayya Ba= =Z9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1.Times New Roman1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)"Yes";"Yes";"No""True";"True";"False""On";"On";"Off"                + ) , *     $  T 8 T x " `Chart1&selGB_20_100_0.resAdHoc`iZR3  @@   SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100PURE GBCONGNEG CORR (Test Set)POS CORR (Test Set)z=2 Running Time W-SEL-100W-GB-100TaggingSamplingf=1%, z=2, W-SEL-GB, SUMMSSalesSUM (Test Set)SUM (Training Set) L2 metric" ga  @M\\research\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` hh` hi` hj`  k`  l3dY23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&jQ4$% MP+3O&iQ4FAj13Ob  3 b#M43*#M! M4%  GM3Of&hQ &Sampling Rate (%)'4% MZ3O$N&hQ :Relative Error (L2 metric)'4523  NM43"  ?3O % Mp73O&lQ4444% N M3O&kQ FError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100, z=2, Test Set)'44e????????????????@@@@@@@@$@$@$@$@e9?8d˩?h4 ;?n!a?1zn+?\u??ލA?ݰm? ?T?g?9?Xm_u?J\߇?o?, ƈ?^hHK?`;O?.KR?4fS?PۆQ?Ɋ?s(CUL?@"?/kb?{24?P3ͅ?ɍ"k ?e> ɍ" @  dMbP?_*+%M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??U} I }  }  }  ````````` ` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA&   _        _           _~ $@zPPVn?/H?Cp?;]?~ $@9?8d˩?h4 ;?n!a?_~ 4@ҌE?LT?$ӡn?ިo?~ 4@1zn+?\u??ލA?ݰm?_~ ?P&?!!?ދ?C+j?~ ? ?T?g?9?~ ? ĭ?PC?֌ ra?*b?~ ?Xm_u?J\߇?o?, ƈ?~ @"p?'.+=?2Pl?Ωd?~ @^hHK?`;O?.KR?4fS?~ @OYM]? ?gA(?ْU?~ @PۆQ?Ɋ?s(CUL?@"?~ $@"J %?镲 q?\7V?$I?~ $@/kb?{24?P3ͅ?ɍ"k ?   &    "~ $@bL{)?2=a?}vuŌ?ˠDt?"~ 4@.&????Eb?"~ ? v?ߢ?|DL$zY?ɐck?"~ ?9τ&?Zc!?yTE?!!3?  ~ @f+/?O9&?Nwx6?yrM?   ~ @OYM]?@3?Nwx?Ӻ ?    ~ $@@I?}͑?מY?Է?~ $@rsB??N?2Yх?:0BTDXNXPN>ttPh||| `!`"`#`$`%`-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`" ~ 4@ ib ? 7O,? vnT ? 6jA?!uCU?!x=jc?!VcEl?""~ "?"Yy?"Jͫ?"6Ĉ?"5'.?"#~ #@#̩M?#Hn)?#1p(P?#6W?/ފ5?"/~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?0 ||||||Ph|VVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV``a`b`c`d`e`f`g`h`l`m`n`o`p`q`r`s`t`` ` ` ` `a a a a a a~ b$@bog_y?bt?b?bM^i?bk3 ?~ c4@c?ci^`V?cC?cpa?cr?~ d?dGL ?dTO?d a*V?dnJy?dEИ?~ e?efH?e-ex?eRG?eŭ?eB?~ f@f  R?f Rr%?fA !?f!<8b-?f3?~ g@g^SH?gd]?g3O>g bG?g;?~ h$@h%\#?hZc!?h_>Y1\>h'2?h3Y?l l l l lm m m m m m~ n$@nA?n|ԛ?n>?ni&?nwٯ;?~ o4@o'p?oN?o|?o/.Ui?o~Ϛ?~ p?pYe?p5?p+ٱA?p Pj?p>9 ?~ q?q5؀?q2?qy76?q$F?qn2d?r@PL@rt|8c?r6w\&?r?r)Wx?~ s@sݔZ ?s j֍?sZQf?soe2?st_lW?~ t$@tؼZ`?t9?t'eRC>t^?tI)?(2TBPhhhhhhhBPhhhh^h````````````````````````              ~ $@%z?x҆?txH?N ^?    ~ 4@}?s?*.?5x_? ~ $@IDA?fd?_,?N ^?~ ?l\?Ȳ`?4?    ~ ?g?\Z {?DKO?6? ~ $@) h?5#?R8ߡ?E&?~ @ ??B˺?Qv0b? ~ 4@=?V?k)?ʤ6?~ @TTJg??l g?0*D? ~ ?M(#?|?5^?A C?dwW?~ $@ t?Ӝ?4?#G? ~ ?Ϲ[?ڱ?P,cy?"?" ~ @9`?ŐL*?n?gx?" ~ @9??EИ?$(~k?o?" ~ $@{O崧?e?F =b?JaL?4>ZZ~|DDh||`````````````````````` (            *@.@@H@@=@ *d@.@$@L@d@=@ *@.@@J@@=@&        @d@@  @d@@  @$@@ .@.@.@ @d@@  @$@@" M@@S@Q@"  @d@@ B@B@B@      ~ 9@"k? $>?b ?0!?~ I@W? (G`?gA(?uHg`?~ Y@K."?}A?Ĵo?I?~ i@x^*6u?!g? ?E`o`?~ y@4?V?t5=((?"070?~ @om?ʉv?JPB?m?0d Dl^^^T|^Z84(BVVVVV``@``,`,`,`,`,`,```,`,`,`,`,`,```,`,        ~ 4@n?S?;On?tV?~ ?Zd;?~  R@L7A`?/$?~ ?V-?&1? +?"~?~ @|?5^?S?bX9?Zd;?~ @v?J +?~ =@V-?~ $@n?S?Cl?x?      ~ 4@S㥛?~jt?~jt?~  T@~ ?"~?K7?%C?S?~ ?MbX9?Zd;?7A`?S㥛?~ @Zd;O?"~?w/?d;O?~ @x&1?~ ?/$?ʡE?~ $@oʡ?uV?Mb?~jt?      4@X@V@n?ˡE?~ ?? +?V-?B`"?.((BVRVVRV(BRVVVRV(B@`,`,`,`,~ ?#~j?K?v/?~jt?~ @V-?rh|?Mb?v/?~ @rh|?rh|?y&1|?Zd;?~ $@v/?S㥛?~ ?Mb? <VVV (  v  <NMM? ]`   @"??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`   @"??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`   @"??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`P   @"P??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`@   @"@??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee ~v  <NMM?_ n]`   @"??3` %` &` '` (` )` *)3d23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D $% MP+3O&'Q4$% MP+3O&(Q4FA(M3O 3 b#M43*#M! M4% b JUM3Oj&)Q &Sampling Rate (%)'4% r.MZ3O&&*Q BAvg. Relative Error (L1 Metric)'4523  NM43" Y &3OY &% Mp73O&%Q4444% DN" lM3O(&&Q >Error Vs Sampling Rate: COUNT Aggregate W-GB-100 Test Set, Z=2'44eee ~v  <NMM?o- ~]`(   @"(??3` v+` v,` v-` v.` v/` v0+3d2L23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% MP+3O&/Q4$% MP+3O&.Q4FA  3O.B r 3 b#M43*#M! M4%  ?UM3Oj&,Q &Sampling Rate (%)'4% qtMZ3O&&-Q BAvg. Relative Error (L1 Metric)'4523  NM43" a 3Oa % Mp73O&0Q4444% ^N.M3O$&+Q |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44eee ~v   <NMM? ] `   @" ??3` v2` v3` v4` v5` v6` v7п;3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&6Q4$% MP+3O&5Q4FA+9 3O7|  3 b#M43*#M! M4%  MM3Of&3Q &Sampling Rate (%)'4% kIMZ3O&4Q 8Relative Error (L1 Metric)'4523  NM43"  u3O t% Mp73O&7Q4444% O;M3O$&2Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-NEG Test Set, z=2)'44e????????????????@@@@@@@@$@$@$@$@e%z?x҆?txH?N ^?}?s?*.?5x_?l\?R 3OCn / 3 b#M43*#M! M4%  FM3Oj&9Q &Sampling Rate (%)'4% lydMZ3O&:Q 8Relative Error (L1 Metric)'4523  NM43"  f3O % Mp73O&=Q4444% ]J3M3O&&8Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-POS Test Set, z=2)'44e????????????????@@@@@@@@$@$@$@$@e a5??fIZ?7ُ?.;1E?Tn?7̒?qh?t=?oŏ?>Ȳ`?4?g?\Z {?DKO?6? ??B˺?Qv0b?TTJg??l g?0*D? t?Ӝ?4?#G?e xp   6NMM?p ] `X   @"X ??3` v1` v>` v?` v@` vA?3d23 M NM4 3Q ;  WSAMP TaggingQ ;Q ;Q3_4E4 3Q ;  WSAMP SamplingQ ;Q ;Q3_4E4 3Q ;  STRAT TaggingQ ;Q ;Q3_4E4 3Q ;  STRAT SamplingQ ;Q ;Q3_4E4 3Q ;  OTLIDX TaggingQ ;Q ;Q3_4E4 3Q ; "OTLIDX SamplingQ ;Q ;Q3_4E4D $% MP+3O&AQ4$% MP+3O&@Q4FA;s3O=  3 b#M43*#M! M4%  7KM3O;&>Q Workload'4% jJMZ3Oq&>Q (Running Time (sec)'4523  NM43"  ;3O ;% Mp73O&?Q4444% K ZM3OA(&1Q x:Comparison of Running Time for Creating Sample (f=1%, z=2)'44e W-SEL-100 W-SEL-100 W-SEL-100 W-SEL-100 W-SEL-100 W-SEL-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100W-GB-100W-GB-100W-GB-100W-GB-100W-GB-100W-GB-100e@.@@M@@B@d@.@$@@S@d@B@@.@@Q@@B@e xp   6NMM?P ] `   @" ??3` vB` vC` vD` vEA3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&EQ4$% MP+3O&DQ4FAc k3OE 3 b#M43*#M! M4% x u@M3O(&CQ R'Number of queries in workload W-SEL-GB'4% i MZ3O6&CQ L1 Error'4523  NM43" k t3Ok s% Mp73OQ4444% V M3O0&BQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44eee ~v   <NMM?0@] `@   @"@ ??3` hF` hG` hH` I` JQ3dh23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&HQ4$% MP+3O&IQ4FAF= 3O5  3 b#M43*#M! M4%  CIM3Ox&JQ &Sampling Rate (%)'4% dMZ3Oj&JQ &Error (L1 metric)'4523  NM43"  w3O % Mp73O&FQ4444% V ;M3O(&GQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-NEG-100, Test Set)'44eee ~v  <NMM?]`   @" ??3` hK` hL` hM` hN` hOQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&NQ4$% MP+3O&MQ4FA4 3O,x R 3 b#M43*#M! M4% A :M3Oj&LQ &Sampling Rate (%)'4% dxMZ3Ob&LQ &Error (L1 metric)'4523  NM43"  >3O % Mp73O&OQ4444% VM3O$&KQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-POS-100, Test Set)'44eee xp  6NMM?ii]`   @" ??3` hP` hQ` hR` hSQ3d23 M NM4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_4E4 3Q: OTLIDXQ ;Q ;Q3_4E4D $% MP+3O&SQ4$% MP+3O&RQ4FAg 3O5] 3 b#M43*#M! M4% w OIM3OA&QQ Workload'4% dMZ3Oz&QQ (Running time (sec)'4523  NM43"  x3O x% Mp73OQ4444% VM3O&PQ .Cost of Tagging (z=2)'44e W-SEL-100 W-SEL-100 W-SEL-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100W-GB-100W-GB-100W-GB-100e@@@d@$@d@@@@e ~v  <NMM? M]`   @" ??3` hU` hV` hW` X` YQ3d6{23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&WQ4$% MP+3O&XQ4FAg< 3O_ 3 b#M43*#M! M4% K WM3O&YQ &Sampling Rate (%)'4% R~MZ3O*f&YQ :Relative Error (L1 metric)'4523  NM43"  w3O % Mp73O&VQ4444% )VW ;M3O(&UQ KError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Test Set)'44e????????????@@@@@@@@$@$@$@$@eS㥛?~jt?~jt?(\?"~?K7?%C?S?MbX9?Zd;?7A`?S㥛?Zd;O?"~?w/?d;O?x&1??/$?ʡE?oʡ?uV?Mb?~jt?e ~v  <NMM?f i]`   @" ??3` hZ` h[` h\` h]Q3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&]Q4$% MP+3O&\Q4FAY 3O  3 b#M43*#M! M4%  :M3Oj&[Q &Sampling Rate (%)'4% RYMZ3O&Q&[Q :Relative Error (L1 metric)'4523  NM43"  >3O % Mp73OQ4444% )VW ;M3O(&ZQ OError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Training Set)'44e????????????@@@@@@@@$@$@$@$@eGz?Q?n?ˡE?? +?V-?B`"?#~j?K?v/?~jt?V-?rh|?Mb?v/?rh|?rh|?y&1|?Zd;?v/?S㥛?{Gzt?Mb?e ~v  <NMM? xx]`   @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` h^` h_` h`` ha` hbQ3d]23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&bQ4$% MP+3O&aQ4FA` 3Oq  3 b#M43*#M! M4% K CIM3Ox&`Q &Sampling Rate (%)'4% @lMZ3O([&`Q :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&^Q4444% FV M3O80&_Q FError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100, z=2, Test Set)'44eee ~v  <NMM?@P+]`   @"??3` hc` hd` he` hf` hgQ3d23 M NM4 3Q: USAMPQ ;%Q ;%Q3_4E4 3Q: WSAMPQ ;%Q ;%Q3_4E4 3Q: STRATQ ;%Q ;%Q3_  NM  d4E4 3Q: OTLIDXQ ;%Q ;%Q3_  NM  Nd4E4D$% MP+3O&dQ4$% MP+3O&gQ4FA 3O. 3 b#M43*#M! M4% f+ CIM3Ox&fQ &Sampling Rate (%)'4% -lMZ3O([&fQ :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&cQ4444% 4VB M3O=2&eQ CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100,z=2, Test Set)'44eee >@7 SummaryInformation(+DocumentSummaryInformation8/4_1047883797!F%f%fOle 4Xp x Microsoft Corp. selGB_20_100_0.resAdHocChart1  WorksheetsCharts !FMicrosoft Excel ChartBiff8Excel.Chart.89qCompObj5bObjInfo7Workbook ߠSummaryInformation(8 a   @\pVivek Narasayya Ba=i+=<9X@"1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1.Times New Roman1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial1Arial"$"#,##0_);\("$"#,##0\)!"$"#,##0_);[Red]\("$"#,##0\)""$"#,##0.00_);\("$"#,##0.00\)'""$"#,##0.00_);[Red]\("$"#,##0.00\)7*2_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_).))_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)?,:_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)6+1_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)"Yes";"Yes";"No""True";"True";"False""On";"On";"Off"                + ) , *     $  T 8 T x " `Chart2&selGB_20_100_0.resAdHoc`iZR3  @@   SAMPLING-RATEUSAMPWSAMP OPTIMCountOPTIMSumCONGRESSOTLIDXCANNEDCOUNTCOUNT Test Set Training SetSTRATSUM SUMf = 1% W-SEL-GB-100PURE GBCONGNEG CORR (Test Set)POS CORR (Test Set)z=2 Running Time W-SEL-100W-GB-100TaggingSamplingf=1%, z=2, W-SEL-GB, SUMMSSalesSUM (Test Set)SUM (Training Set) L2 metric" ga  @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` hh` hi` hj` hk` X l3d#*23 M NM4 3Q: USAMPQ ;%Q ;%Q3_4E4 3Q: WSAMPQ ;%Q ;%Q3_4E4 3Q: STRATQ ;%Q ;%Q3_  NM  d4E4 3Q: OTLIDXQ ;%Q ;%Q3_  NM  Nd4E4D$% MP+3O&kQ4$% MP+3O&jQ4FA3On 3 b#M43*#M! M4%  bM3Of&iQ &Sampling Rate (%)'4% #MZ3O$N&iQ :Relative Error (L2 metric)'4523  NM43"  J3O % Mp73O&lQ4444% N M3O$&hQ CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100,z=2, Test Set)'44e????????????????@@@@@@@@$@$@$@$@ersB??N?2Yх?ib ?7O,?vnT ?6jA?uCU?x=jc?VcEl?Yy?Jͫ?6Ĉ?5'.?̩M?Hn)?1p(P?6 {t @  dMbP?_*+%M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??U} I }  }  }  ````````` ` ` `````````````````                               ! " # $ % & '( ) * + , - . /0 1 2 3 4 5 6 78 9 : ; < = > ?@ A B C D E F GH I J K L M N OP Q R S T U V WX Y Z [ \ ] ^ _~ $@zPPVn?/H?S'ݚ@ 8Kr@fM,?LqUw? ^? 1~٭? +H3? (|V@ ͪ @ rPL??0:?X_">'@XSh?s?\i?g@?$~.r?O??Za/?$W@?xF[D?\:xb@3 8ڵ@4u/3@5Le0@6[t @7F@89Knz A:?݉A;0@ÒB<[YVB=KA>l A?_$A@A):@Bc>q@CȮ"@D6 B\}@EDH@F[@G8*@HIbAJy7$AKM-8EALAMVj)̪ANZKջAOk_HȼAPQkSUo@RJVU5@~ S$KAT>UUUAUJVUգ@V*@~ Wм@XYKnz AZ?݉A[0@ÒB\[YVB]KA^l A__$A~ 4@n/i?LT? R @q߅@Li-?ިo?EaE? Ct ? .T ? YO@ s@ 2go?$z?4 '@qGR?`HZ?ʢb?oJy?Do?ڒUn2?Ɏ@?o.2? +پ?cT?D?qH/j?Ytw?^# @ !N @"UE @#2#2J@$52;A@%B 3m?&Z*oG8-@'xGj0@()X9v^ @*9}=@+WJ@,_Z'B@-H`?.@/sh\@015@2R@31е/+@4gGR@5a^Cn@6 հc1@7Vv+@89/ĢLA:aWA;)& B< -.k B=얽zA>x|A?lA@A +}@B!S_@C%@D<+i3@Ey@F4}vL@Gl@HIjAJ]lPoAAK̗(ALM4AMtUUA~ TAUj@V7@l@WXY/ĢLAZaWA[)& B\ -.k B]얽zA^x|A_lA~ ?oaxw?!!?GW:??ur?C+j?px? v~k? uX? n? 1>^? >?^S??Ң>a'@d?9]?Q?[v?,?ֈ`?-#??3?N*kg?<Y?iܷ?+Qr?Yک@ !I#@"!^@#`V (@$ @%y"?&e@'2go90@()W#"@*됛@+tZAU(@,3h @-~p?.ė"N@/tw-@01FjR@2Yd@3368@4O7P8@5c\@6̲'l@7a:@89iA:uA;;0˦A<:A=[NA>gC/A?['FA@Af?4@B>!;ocx@CO@a3@D343@E-o@FV;@G:: @HIPJыAJ?ykAKimPALJ"AMd["FANݘ>ݧ1AO/OgAP~ Q@R)!XUUP@SXq|@T:?q@~ U@Vާ@~ WZ@XYiAZuA[;0˦A\:A][NA^gC/A_['FA~ ?B"LQ.??gA(??ޫV&?X:%?*b?px? K."? T7? bc^G?  ^f? K;?H.!??Ң>a'@R?"4?rl=C8f?~?X xa?ao?-#?X?tm?TD[ʩ?p?zܷZ?6׿?Yک@ ! W^?"7DeZ @#]A_['FA~ @4BX?Y>-?֌ ra?Q?XQ ?e3$?px? *dq? Pl? ? QۆQa'@w~Q?4c= ?3?-A;БA<^.b/A=;gAāA>:_A?['FA@AYd@Bzpwf@CD+g_@D#Ӂ@EGtvc@F X @G:: @HI>0 (pAJ4DO}wAKALMN|<ƗAM'KAN,ԾYAO/OgAP~ Q@ROUU`@SOq`@T2y@UJ@Vާ*~@~ WZ@XY ًAZ>-A[БA\^.b/A];gAāA^:_A_['FA~ @OYM]?X?2Pl?=\r)?U&5?ْU?px? c]K? 9EGr? _=[? 뉺? 3?l<*??Ң>a'@"q]?2 n?H,?^N?@߾?[1й?-#?"1?h?{K9_콰?'?Ƿ?F0}?Yک@ !ڋh;@"^@#kF @$75?~ %?&>rk@'2go90@())"ê@*Zb+h:@+JiW @,۞ ?~ -?.7k @/tw-@01J@23@3_]u@44iSuס@5(&縿@6=%@7a:@89S!A:xғA;'/RR4vA<lԅ"A=< A>gPA?['FA@AP)b@B4`@CJ%rAÐ@Dh?RD@E @F%<m@G:: @HInWrAJ7"pAKlЗ.ĥAL 3AM|AN= \{AO/OgAP~ Q`@R[{8@S8n1@~ T@UJVU@VSB@~ WZ@XYS!AZxғA['/RR4vA\lԅ"A]< A^gPA_['FA~ $@"J %?HPs?\7V?KR%?6:8?&L?px? x&4I,)? "4? /? _ Į? l?/5B?S??Ң>a'@?a'?2F ?ڦx\?iVΘ?+pW?Ap-?-#?M=?L7A`?߃.?5 S"?sFZ*o?hr1ֱ?Yک@ !3X?""gx@#qJ[\S@$m_?~ %?&gE(@'2go90@()9#?*J`sI@+lk@,"S>U?~ -?.}6@/tw-@01='o'@2fIހ@3N$r)@4N`@5|~싴@6d˲@7a:@89vIA:A;LT}A<.(A=X†VA>ZɋA?['FA@A4%`@BI]@C{\v@D{@E0~C@F}@T@UkSU@VZ@Z@WXYvIAZA[LT}A\.(A]X†VA^ZɋA_['FA&   _        _           _~ $@zPPVn?/H?Cp?;]?~ $@9?8d˩?h4 ;?n!a?_~ 4@ҌE?LT?$ӡn?ިo?~ 4@1zn+?\u??ލA?ݰm?_~ ?P&?!!?ދ?C+j?~ ? ?T?g?9?~ ? ĭ?PC?֌ ra?*b?~ ?Xm_u?J\߇?o?, ƈ?~ @"p?'.+=?2Pl?Ωd?~ @^hHK?`;O?.KR?4fS?~ @OYM]? ?gA(?ْU?~ @PۆQ?Ɋ?s(CUL?@"?~ $@"J %?镲 q?\7V?$I?~ $@/kb?{24?P3ͅ?ɍ"k ?   &    "~ $@bL{)?2=a?}vuŌ?ˠDt?"~ 4@.&????Eb?"~ ? v?ߢ?|DL$zY?ɐck?"~ ?9τ&?Zc!?yTE?!!3?  ~ @f+/?O9&?Nwx6?yrM?   ~ @OYM]?@3?Nwx?Ӻ ?    ~ $@@I?}͑?מY?Է?~ $@rsB??N?2Yх?:0BTDXNXPN>ttPh||| `!`"`#`$`%`-`.`/`0`1`2`3`4`5`9`:`;`<`=`>`?`" ~ 4@ ib ? 7O,? vnT ? 6jA?!uCU?!x=jc?!VcEl?""~ "?"Yy?"Jͫ?"6Ĉ?"5'.?"#~ #@#̩M?#Hn)?#1p(P?#6W?/ފ5?"/~ 04@0Ct ?0.T ?0[ ?0$z?~ 1?1v~k?1uX?11>^?1r0C?~ 2?2K."?2}A?2Ĵo?2I?~ 3@3*dq?3}?3QۆQVP?=g#M)?~ >?>͏O?>QG?>W\9?>>'I?~ ?@?JiW??ė"n??() ,??Ry=?0 ||||||Ph|VVVVVV4BVVVV@`A`L`M`N`O`P`Q`R`S`@@A@@"?@K8?@ ]?~ A$@AoG8-x?A]k?AiUMu>A&b?L L LM M M MMN N N N N~ O?O@r?O?6ɏ?OyUg?O4?~ P?Pk?PBK8?Ptub?PXR?~ Q@QK."?Q}A?QĴo?QI?~ R@R3ۃ?Rek}?R`8 ?R`>?~ S@Sʊ?Sa0?S $y?S8H-?LV(>BVVVV``a`b`c`d`e`f`g`h`l`m`n`o`p`q`r`s`t`` ` ` ` `a a a a a a~ b$@bog_y?bt?b?bM^i?bk3 ?~ c4@c?ci^`V?cC?cpa?cr?~ d?dGL ?dTO?d a*V?dnJy?dEИ?~ e?efH?e-ex?eRG?eŭ?eB?~ f@f  R?f Rr%?fA !?f!<8b-?f3?~ g@g^SH?gd]?g3O>g bG?g;?~ h$@h%\#?hZc!?h_>Y1\>h'2?h3Y?l l l l lm m m m m m~ n$@nA?n|ԛ?n>?ni&?nwٯ;?~ o4@o'p?oN?o|?o/.Ui?o~Ϛ?~ p?pYe?p5?p+ٱA?p Pj?p>9 ?~ q?q5؀?q2?qy76?q$F?qn2d?r@PL@rt|8c?r6w\&?r?r)Wx?~ s@sݔZ ?s j֍?sZQf?soe2?st_lW?~ t$@tؼZ`?t9?t'eRC>t^?tI)?(2TBPhhhhhhhBPhhhh^h````````````````````````              ~ $@%z?x҆?txH?N ^?    ~ 4@}?s?*.?5x_? ~ $@IDA?fd?_,?N ^?~ ?l\?Ȳ`?4?    ~ ?g?\Z {?DKO?6? ~ $@) h?5#?R8ߡ?E&?~ @ ??B˺?Qv0b? ~ 4@=?V?k)?ʤ6?~ @TTJg??l g?0*D? ~ ?M(#?|?5^?A C?dwW?~ $@ t?Ӝ?4?#G? ~ ?Ϲ[?ڱ?P,cy?"?" ~ @9`?ŐL*?n?gx?" ~ @9??EИ?$(~k?o?" ~ $@{O崧?e?F =b?JaL?4>ZZ~|DDh||`````````````````````` (            *@.@@H@@=@ *d@.@$@L@d@=@ *@.@@J@@=@&        @d@@  @d@@  @$@@ .@.@.@ @d@@  @$@@" M@@S@Q@"  @d@@ B@B@B@      ~ 9@"k? $>?b ?0!?~ I@W? (G`?gA(?uHg`?~ Y@K."?}A?Ĵo?I?~ i@x^*6u?!g? ?E`o`?~ y@4?V?t5=((?"070?~ @om?ʉv?JPB?m?0d Dl^^^T|^Z84(BVVVVV``@``,`,`,`,`,`,```,`,`,`,`,`,```,`,        ~ 4@n?S?;On?tV?~ ?Zd;?~  R@L7A`?/$?~ ?V-?&1? +?"~?~ @|?5^?S?bX9?Zd;?~ @v?J +?~ =@V-?~ $@n?S?Cl?x?      ~ 4@S㥛?~jt?~jt?~  T@~ ?"~?K7?%C?S?~ ?MbX9?Zd;?7A`?S㥛?~ @Zd;O?"~?w/?d;O?~ @x&1?~ ?/$?ʡE?~ $@oʡ?uV?Mb?~jt?      4@X@V@n?ˡE?~ ?? +?V-?B`"?.((BVRVVRV(BRVVVRV(B@`,`,`,`,~ ?#~j?K?v/?~jt?~ @V-?rh|?Mb?v/?~ @rh|?rh|?y&1|?Zd;?~ $@v/?S㥛?~ ?Mb? <VVV (  v  <NMM? ]`   @"??3` v` e` e` v ` e ` ; E?3do23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAR7y 3O7 $ 3 b#M43*#M! M4%  lLM3Ox&Q &Sampling Rate (%)'4% h.MZ3O& Q 8Relative Error (L1 Metric)'4523  NM43"  I3O % Mp73O& Q4444% WH BM3O(& Q EError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM? *x]`   @"??3` v ` v` v` v` v` vпD?3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAJ5~ 3O8 = 3 b#M43*#M! M4%  BM3Oj&Q &Sampling Rate (%)'4% h~MMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  Z3O % Mp73O&Q4444% @I -M3O&& Q IError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee ~v  <NMM?+ <i]`   @"??3` v` v` v` v` v` vB3d23 M NM4 3Q:. USAMPQ ;/5Q ;/5Q3_4E4 3Q:. WSAMPQ ;/5Q ;/5Q3_4E4 3Q:. STRATQ ;/5Q ;/5Q3_  NM  d4E4 3Q:. OTLIDXQ ;/5Q ;/5Q3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FAN)j 3O9 9 3 b#M43*#M! M4%  DM3Oj&Q &Sampling Rate (%)'4% irTMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  ^3O % Mp73O&Q4444% J/M3O&&Q CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Test Set, z=2)'44eee ~v  <NMM?< L]`8   @"8??3` v` v` v` v` v` vE?3d23 M NM4 3Q:: USAMPQ ;;AQ ;;AQ3_4E4 3Q:: WSAMPQ ;;AQ ;;AQ3_4E4 3Q:: STRATQ ;;AQ ;;AQ3_  NM  d4E4 3Q:: OTLIDXQ ;;AQ ;;AQ3_  NM  Nd4E4D$% MP+3O&Q4$% MP+3O&Q4FA6 3O.  3 b#M43*#M! M4%   LM3Of&Q &Sampling Rate (%)'4% hFMZ3O&Q 8Relative Error (L1 Metric)'4523  NM43"  o3O w% Mp73O&Q4444% OK9M3O$&Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100 Training Set, z=2)'44eee xp  6NMM?L< \i]`(   @"(??3` v` v ` v!` v"` v#H3d%23 M NM4 3Q:N USAMPQ ;OSQ ;OSQ3_4E4 3Q:N WSAMPQ ;OSQ ;OSQ3_4E4 3Q:N STRATQ ;OSQ ;OSQ3_  NM  d4E4 3Q:N OTLIDXQ ;OSQ ;OSQ3_  NM  Nd4E4D$% MP+3O&#Q4$% MP+3O&"Q4FAM/ 3O4ZH V 3 b#M43*#M! M4%  RZM3O[&!Q  Data Skew (z)'4% ]gMZ3O#&!Q Error'4523  NM43"  w3O % Mp73O& Q4444% K;M3O&&Q ?Error Vs Data Skew: SUM Aggregate (W-SEL-GB-100 Test Set, f=1%)'44eee ~v  <NMM?_ n]`x   @"x??3` %` &` '` (` )` *)3d23 M NM4 3Q:a USAMPQ ;bhQ ;bhQ3_4E4 3Q:a WSAMPQ ;bhQ ;bhQ3_4E4 3Q:a STRATQ ;bhQ ;bhQ3_  NM  d4E4 3Q:a OTLIDXQ ;bhQ ;bhQ3_  NM  Nd4E4 3Q:a  CONGQ ;bhQ ;bhQ3_4E4D $% MP+3O&'Q4$% MP+3O&(Q4FA(M3O 3 b#M43*#M! M4% b JUM3Oj&)Q &Sampling Rate (%)'4% r.MZ3O&&*Q BAvg. Relative Error (L1 Metric)'4523  NM43" Y &3OY &% Mp73O&%Q4444% DN" lM3O(&&Q >Error Vs Sampling Rate: COUNT Aggregate W-GB-100 Test Set, Z=2'44eee ~v  <NMM?o- ~]`   @"??3` v+` v,` v-` v.` v/` v0+3d2L23 M NM4 3Q:m USAMPQ ;ntQ ;ntQ3_4E4 3Q:m WSAMPQ ;ntQ ;ntQ3_4E4 3Q:m STRATQ ;ntQ ;ntQ3_  NM  d4E4 3Q:m OTLIDXQ ;ntQ ;ntQ3_  NM  Nd4E4 3Q:m  CONGQ ;ntQ ;ntQ3_4E4D$% MP+3O&/Q4$% MP+3O&.Q4FA  3O.B r 3 b#M43*#M! M4%  ?UM3Oj&,Q &Sampling Rate (%)'4% qtMZ3O&&-Q BAvg. Relative Error (L1 Metric)'4523  NM43" a 3Oa % Mp73O&0Q4444% ^N.M3O$&+Q |<Error Vs Sampling Rate: SUM Aggregate W-GB-100 Test Set, z=2'44eee ~v   <NMM? ] `   @"??3` v2` v3` v4` v5` v6` v7п;3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&6Q4$% MP+3O&5Q4FA+9 3O7|  3 b#M43*#M! M4%  MM3Of&3Q &Sampling Rate (%)'4% kIMZ3O&4Q 8Relative Error (L1 Metric)'4523  NM43"  u3O t% Mp73O&7Q4444% O;M3O$&2Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-NEG Test Set, z=2)'44e????????????????@@@@@@@@$@$@$@$@e%z?x҆?txH?N ^?}?s?*.?5x_?l\?R 3OCn / 3 b#M43*#M! M4%  FM3Oj&9Q &Sampling Rate (%)'4% lydMZ3O&:Q 8Relative Error (L1 Metric)'4523  NM43"  f3O % Mp73O&=Q4444% ]J3M3O&&8Q GError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100-POS Test Set, z=2)'44e????????????????@@@@@@@@$@$@$@$@e a5??fIZ?7ُ?.;1E?Tn?7̒?qh?t=?oŏ?>Ȳ`?4?g?\Z {?DKO?6? ??B˺?Qv0b?TTJg??l g?0*D? t?Ӝ?4?#G?e xp   6NMM?p ] `@   @"@??3` v1` v>` v?` v@` vA?3d23 M NM4 3Q ;  WSAMP TaggingQ ;Q ;Q3_4E4 3Q ;  WSAMP SamplingQ ;Q ;Q3_4E4 3Q ;  STRAT TaggingQ ;Q ;Q3_4E4 3Q ;  STRAT SamplingQ ;Q ;Q3_4E4 3Q ;  OTLIDX TaggingQ ;Q ;Q3_4E4 3Q ; "OTLIDX SamplingQ ;Q ;Q3_4E4D $% MP+3O&AQ4$% MP+3O&@Q4FA;s3O=  3 b#M43*#M! M4%  7KM3O;&>Q Workload'4% jJMZ3Oq&>Q (Running Time (sec)'4523  NM43"  ;3O ;% Mp73O&?Q4444% K ZM3OA(&1Q x:Comparison of Running Time for Creating Sample (f=1%, z=2)'44e W-SEL-100 W-SEL-100 W-SEL-100 W-SEL-100 W-SEL-100 W-SEL-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100W-GB-100W-GB-100W-GB-100W-GB-100W-GB-100W-GB-100e@.@@M@@B@d@.@$@@S@d@B@@.@@Q@@B@e xp   6NMM?P ] `   @"??3` vB` vC` vD` vEA3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&EQ4$% MP+3O&DQ4FAc k3OE 3 b#M43*#M! M4% x u@M3O(&CQ R'Number of queries in workload W-SEL-GB'4% i MZ3O6&CQ L1 Error'4523  NM43" k t3Ok s% Mp73OQ4444% V M3O0&BQ x:Error Vs. Workload Size z=2, f=1%, Test-Set, SUM aggregate'44eee ~v   <NMM?0@] `(   @"(??3` hF` hG` hH` I` JQ3dh23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&HQ4$% MP+3O&IQ4FAF= 3O5  3 b#M43*#M! M4%  CIM3Ox&JQ &Sampling Rate (%)'4% dMZ3Oj&JQ &Error (L1 metric)'4523  NM43"  w3O % Mp73O&FQ4444% V ;M3O(&GQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-NEG-100, Test Set)'44eee ~v  <NMM?]`   @"??3` hK` hL` hM` hN` hOQ3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&NQ4$% MP+3O&MQ4FA4 3O,x R 3 b#M43*#M! M4% A :M3Oj&LQ &Sampling Rate (%)'4% dxMZ3Ob&LQ &Error (L1 metric)'4523  NM43"  >3O % Mp73O&OQ4444% VM3O$&KQ LError Vs. Sampling Rate: COUNT Aggregate, (z=2, W-SEL-GB-POS-100, Test Set)'44eee xp  6NMM?ii]`   @"??3` hP` hQ` hR` hSQ3d23 M NM4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_4E4 3Q: OTLIDXQ ;Q ;Q3_4E4D $% MP+3O&SQ4$% MP+3O&RQ4FAg 3O5] 3 b#M43*#M! M4% w OIM3OA&QQ Workload'4% dMZ3Oz&QQ (Running time (sec)'4523  NM43"  x3O x% Mp73OQ4444% VM3O&PQ .Cost of Tagging (z=2)'44e W-SEL-100 W-SEL-100 W-SEL-100! W-SEL-GB-100! W-SEL-GB-100! W-SEL-GB-100W-GB-100W-GB-100W-GB-100e@@@d@$@d@@@@e ~v  <NMM? M]`   @"??3` hU` hV` hW` X` YQ3d6{23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&WQ4$% MP+3O&XQ4FAg< 3O_ 3 b#M43*#M! M4% K WM3O&YQ &Sampling Rate (%)'4% R~MZ3O*f&YQ :Relative Error (L1 metric)'4523  NM43"  w3O % Mp73O&VQ4444% )VW ;M3O(&UQ KError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Test Set)'44e????????????@@@@@@@@$@$@$@$@eS㥛?~jt?~jt?(\?"~?K7?%C?S?MbX9?Zd;?7A`?S㥛?Zd;O?"~?w/?d;O?x&1??/$?ʡE?oʡ?uV?Mb?~jt?e ~v  <NMM?f i]`   @"??3` hZ` h[` h\` h]Q3d23 M NM4 3Q: USAMPQ ;Q ;Q3_4E4 3Q: WSAMPQ ;Q ;Q3_4E4 3Q: STRATQ ;Q ;Q3_  NM  d4E4 3Q: OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&]Q4$% MP+3O&\Q4FAY 3O  3 b#M43*#M! M4%  :M3Oj&[Q &Sampling Rate (%)'4% RYMZ3O&Q&[Q :Relative Error (L1 metric)'4523  NM43"  >3O % Mp73OQ4444% )VW ;M3O(&ZQ OError Vs. Sampling Rate: SUM Aggregate (Real data set, Workload = Training Set)'44e????????????@@@@@@@@$@$@$@$@eGz?Q?n?ˡE?? +?V-?B`"?#~j?K?v/?~jt?V-?rh|?Mb?v/?rh|?rh|?y&1|?Zd;?v/?S㥛?{Gzt?Mb?e ~v  <NMM? xx]`   @M\\RESEARCH\112C2ndD<S odXXLetterPRIV0''''x\KhCvU x5XRX>@Draft00222222"dXX??3` h^` h_` h`` ha` hbQ3d]23 M NM4 3Q:  USAMPQ ;Q ;Q3_4E4 3Q:  WSAMPQ ;Q ;Q3_4E4 3Q:  STRATQ ;Q ;Q3_  NM  d4E4 3Q:  OTLIDXQ ;Q ;Q3_  NM  Nd4E4D$% MP+3O&bQ4$% MP+3O&aQ4FA` 3Oq  3 b#M43*#M! M4% K CIM3Ox&`Q &Sampling Rate (%)'4% @lMZ3O([&`Q :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&^Q4444% FV M3O80&_Q FError Vs. Sampling Rate: COUNT Aggregate (W-SEL-GB-100, z=2, Test Set)'44eee ~v  <NMM?@P+]`   @" ??3` hc` hd` he` hf` hgQ3d23 M NM4 3Q: USAMPQ ;%Q ;%Q3_4E4 3Q: WSAMPQ ;%Q ;%Q3_4E4 3Q: STRATQ ;%Q ;%Q3_  NM  d4E4 3Q: OTLIDXQ ;%Q ;%Q3_  NM  Nd4E4D$% MP+3O&dQ4$% MP+3O&gQ4FA 3O. 3 b#M43*#M! M4% f+ CIM3Ox&fQ &Sampling Rate (%)'4% -lMZ3O([&fQ :Relative Error (L2 metric)'4523  NM43"  w3O % Mp73O&cQ4444% 4VB M3O=2&eQ CError Vs. Sampling Rate: SUM Aggregate (W-SEL-GB-100,z=2, Test Set)'44eee >@7 Oh+'0@H\t  viveknar Vivek NarasayyaMicrosoft Excel@[M@2M)՜.+,0 PXp x Microsoft Corp. selGB_20_100_0.resAdHocChart2 DocumentSummaryInformation8<4_1036137450F0f0fOle ACompObjBf WorksheetsCharts FMicrosoft Equation 3.0 DS Equation Equation.39q!!| E[]=y FMicrosoft Equation 3.0 DS EqObjInfoDEquation Native E=_1036138565Fc5fc5fOle FCompObjGfObjInfoIEquation Native J1_1036137489Fffuation Equation.39q "n FMicrosoft Equation 3.0 DS Equation Equation.39q)  E["n]=Ole KCompObjLfObjInfoNEquation Native OEY FMicrosoft Equation 3.0 DS Equation Equation.39qZ E[("y) 2 ]=S 2 k_1036137716FFfFfOle QCompObjRfObjInfoTEquation Native Uv_1036137720FpQfpQfOle WCompObjXf FMicrosoft Equation 3.0 DS Equation Equation.39qt E[("n"Y) 2 ]=n 2 S 2 k FMicrosoft Equation 3.0 DS EqObjInfoZEquation Native [_1036137726F`Yf`YfOle ^CompObj_fObjInfoaEquation Native b_1036138200FPbfPbfuation Equation.39q  E"n"YY() 2 []=n 2 S 2 Y 2 k FMicrosoft Equation 3.0 DS Equation Equation.39qOle eCompObjffObjInfohEquation Native iqU '$ =n j  jj " n FMicrosoft Equation 3.0 DS Equation Equation.39q!T E[]=y_1036138812F0+lf0+lfOle kCompObjlfObjInfonEquation Native o=_1036138444FxfxfOle pCompObjqf FMicrosoft Equation 3.0 DS Equation Equation.39q" "n FMicrosoft Equation 3.0 DS Equation Equation.39qObjInfosEquation Native t1_1036139781F$f$fOle uCompObjvfObjInfoxEquation Native y_1036139737Fߏfߏf E[("y) 2 ]=1n 2 n j2 S j2 k jj " FMicrosoft Equation 3.0 DS Equation Equation.39qOle }CompObj~fObjInfoEquation Native " E[("n"Y) 2 ]=n j2 S j2 k jj " FMicrosoft Equation 3.0 DS Equation Equation.39q_1036139788F|f|fOle CompObjfObjInfoEquation Native _1036140000"FcfcfOle CompObjf|# E"n"YY() 2 []=1Y 2 n j2 S j2 k jj " FMicrosoft Equation 3.0 DS Equation Equation.39qObjInfo Equation Native [_1036140053 FffOle           R S              ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q T V U W X Y Z [ ] \ ^ _ ` b c e d f g h i j l k m o n p q r t s u v y w x z { | }  ~ ? n j S jj " FMicrosoft Equation 3.0 DS Equation Equation.39q@ k j =kn j S j n jCompObj  fObjInfoEquation Native _1048673842F`´fP]Ĵf S jj " [] FMicrosoft Equation 3.0 DS Equation Equation.39q\ kw t w uu " ()Ole CompObjfObjInfoEquation Native x_1048602886KF@ɴf@ɴfOle CompObjfObjInfo FMicrosoft Equation 3.0 DS Equation Equation.39q, k 1 =kmaxk,k2()maxk,k2()+max0,k2()()=k23(),k 2 =kEquation Native _10485006622F@ʹf@ʹfOle CompObjfmax0,k2()maxk,k2()+max0,k2()()=k13() FMicrosoft Equation 3.0 DS Equation Equation.39qObjInfoEquation Native C_1048500692 F0Dдf0DдfOle '؊ 1k 1 FMicrosoft Equation 3.0 DS Equation Equation.39qk 121k 1 +1k 2 (CompObj!fObjInfo"Equation Native _1048500866-%F p״f p״f) FMicrosoft Equation 3.0 DS Equation Equation.39qX\ 121k 1 +121k 1 +1k 2 ()Ole CompObj$&fObjInfo'Equation Native 5()=34()k 1 +14()k 2 FMicrosoft Equation 3.0 DS Equation Equation.39q k 1 =k 3   3  +1(),k_1048602844*F ٴf ٴfOle CompObj)+fObjInfo,Equation Native _1048501464/FffOle CompObj.0f 2 =k1 3  +1() FMicrosoft Equation 3.0 DS Equation Equation.39q/ 1 k 1$V00[-b~kMʓ2Lkx:_Z_a|{ÑNݎE+L{g+w9,].K~#˒w9,].K~#˒w9We3{l9m{\Xc#4a { { {sק-7rOx{awYFz ϵMՖ>h#IjKx 6֪Jf '3< C!1V oEbe 1mNe5&@r $wUďIyͨmq'yP$EP%U0nvQ0JġA"qH.!}SyH} Q>jGSS '}$؂=o'!(zXˁ>I0^SLZG̎ f?&Uc1"fv10Lg PG/ ή2: 0٨+Y6|# dL4cj[0cecx7.ŀ\'0bR Ue q Nɢ2.;{lSW\'lǘ܅1;]g6PVp(Gê6 0qz G|@ڬ.w1bLj}ףau1 nmV 'x=V"fu~QFS +l<+mIZ>Lݹgbcf?Cu:3Zd5c4!{^fiM\4bN4TpηА=Kkf@l1r':kj;I9zг-4dϙ*a6έ1K1\ w:W;wa~/7rh}tG௘BCʊ^CxRSxc~Yf}o}~whnX5 +l] J=c @ vÝX7773g5ZC\5ruБF|4W(  'H }}cp6={*7ex-_m߿9,Ҽq[,_RoWIQ1w(w*xA # vn#Q 7tI8@/P~}9Çツo9wޘ& Gi_wp;6^X[4Zf֌ȯBTPET=e*Β2(Š(C u+_sr\+Y[ej,$^{ۏgp.r:; MlW?gOmg\x!Q^_t ;M,KXu,^:V˻Rޔ"+X_|HV.H^s}1?w!ήlwI?_ uz["rK8;ׂcPڄ&Y- ?U',㫞ݾ?/eXؘZGǖLkQ/\fk=sT>4YBWʝ"y(l2UH]ZݤC3+nhwǤ}ow܀CpXk]~<sZ0>ɑ=O{ljsZJ̴컷bvwwoXi4{jgJ"&BG:+˨(KYANӾ~m3VKRVǿ7^"Sh)^Ƥȃ{Fu:mv[Ûh5E;N`b^qu~/WrBc̢{DŽM9?nF5ȵZVOv.&81-ɾ_#RH/ףdQD DdU J   C A? "2׋%Cuc6v`!z׋%Cuc6&[n2L"HxZklTE>vP$iD&,G%*"1R[Zj"P"$"H4D@$& j4!D #|3sfݝnw!93sw9P(:G4%5pɄ"ȈAхAh*Ht+%`d$^Cqʚmj?i.Z#^a@+X{4);- X:}$óXzf ʡ!k=[.V1ֿ&p=@,m逪 iImX,\9lm(mC4ͳ3}4G_<0Lִt͈\8d[__LN2nmLFζӯu ֺEu Ǫ[ٶuj&sP2WJL)o.k=Q͖oeHН6Sѝa;Nq7Fw3Ct%X .,^_ލN:x˓ُ^Ew[xP5)>S̷k}dNqH̻}h=jt'.KHw$v$L㑮ܑp.qQ| 5h<3F=~ԮfkZk̃d_/fTlWy9)BWGߦG=Bo\z̜VG>ic:֨&-}\ 7ܴl.U$jF7Q!J)@ t~TV Gٮ։ Í4X9RnݴV *ni,ǧ Q;7Wej\m^I| ܹh@m@vG1x7qh0VZv2UDP ՠ6ִnmX7(cH_UvQbL)Rpgw::رp* d) ~s4>ѯ]s]BkP/$ߡU(~sRUja-^2c+؞CFnd t~/klY}JU#E*<ws}Sy Bu?Jx*#Z;H/9BOUYĜ `{Z'RN;lu?JE)آƒk]Rпf/7]۫e6hdFQ󣂷jPuX: G-85\5jO{+7VNe Ӆȝwiڅ}D:e1ݏtVո]|۫@#@Goՠ=QO>]*sϜGh*=d?>[[Eʅ[*rlȌ@G%~i4Kfcɜ}ms!ٝ1 d) ~s4>ѯO5=%QXdS ԫϜR*ô>uJǶ<݌ƶ>]`OEdcỌ0DWwR'aQQ+Njme'pk該syFazRoM|Ovm'8Zijk~-71esDrSvr3whNT+v0ms}m]e,ݴ:U9NSegʰ⠏f*݃ݽ g eԋD+D$}pco?*}FAp0ts ]]a8F"L]H/n3W[JQv~{G][cNq(~*pҍr 5MT[dW ,ظBo%[1;iLO)>+ExӪB4={3=LYk҅-޶B۔S?`9SSZ6̞v5YJ{vjMhY%FR RלI[=W{ 5! HzkoBOQ4Dd J ! C A? " 2XG=? hWZv`!XG=? hW2*B@.xZ]hU>3;(0%IlMMi&Of5شYvƦi,"@UT⻅o}KU{޻{g4;.ssϝ{G(@m 'GT-M/˴eh팦k:k?>-WrY"\q*͢2-=*0 \ExLևߩY\ ˩i3?-OG "UCW(|']^#K&2yXA_cY@za7zyeէ-FCإ?(}Jc=dw lqڠ nA#6Kϩ#HZ\\dr"f'U /WZV %W£޼.}&<,. ()iW ^aw*v\G-*Ze-#hK$h{VG+~):w)":_,ΐC3e9~!e?6ЧdcXZleI!e##m8z/hchЇVBOɆ2~ {2̆>. }BߨM_%|Ny// }BEw?!&I!蘿wRț}*cӈO27X! 2 y^ B ^)"GK@& =en5eKfs[F]Ųe3e[òF [.a20-rTJZF] hJ2t% wylV1%&q][%̍̍Ǫ.­1K`\m+ܪp*U*R9䕆me!l[;QIuKmXWϾIm0Iq)f-ҨtUk/֔rply:B1w14'q'BdLNgpnԈPGVOS89iN 'B)cF3:BYb8:iN,!tL ԈTGh`/9fϟߊ _WYr8=^9 ypwBHy"-x[ 9\4gKY3u:77 l) Y#U66a h|c Vaո+6ҙ:p}]]f:W,\UԞ;?F DeO~oQ0rH5KBBCm# Jkr/R@ Ae*h_ \ 0h?\vGl:͖fL+z}᦭j['pٟ+f: vM~aiymQ7SgsgKE;z3&斪}hkl_XDmk1%Hjgw],V1=aHh9VsBZ"6:*%V FjPȼ+w@$*}IjD~5R4%PBz/AƿW`GvD, ey"\u)OK#Ϡ1l%/e͍ ""Rmw4N34R}iZB4WbYk4F>]D@FQ^7wyqmz^qY/_A4'gEl=`aH:`d8#[ҌZ .ݳtw.T}3ռ_ IDdH J " C A? "!2 djg[KOv`!djg[KO8X-xXMlTU>k+[hQ 0HTB됁"%0)$ < + nX31nqO\ĺ%ĥ[4sO 3e&ws===VŬ0 rXY̒TlY&{&I'9Dc^c_b⩧sI)JӖ_q~a"׌a1lY$-RYˡM#tq!9ٚ]< P?FV9MXhC{U(w?zjZD~v^1IsrF?RwBNhވqk۱%9s"[&"idpDjo1*ڒ n6tJޯrtJ^@' _xţM;Zv4kc 9[rكȨ99$GbsmeCS%e³_lA5VƼ'ƫc%!!]JBۙ4S^+^!9} -|gƈナNկ,^8ӕɃ\5Lt=% o`֙<E}ӖQ-&Qe.}e^WkKsV>9v;[=OGOS;+Onk!uDtR6%<"I9B䞨sGeT&FOmy7(C˭7{~cݔҷ>iXo䩓sy',ykV䛽e.}o4n{f܉Iw~gVQ'mT=?mSۍ6Uڄ W7t Ddu J # C A? ""2F5̂QGZ&Εv`!F5̂QGZ&Εh.;-xxZklU>;;nT h!AVEIJVhХNaeV PbMؤ $h4$&1/"5YϽsٽ3wE;6s{{^17 ̃j0$2dط(^qǞ8H|D+OhzVGXwYR:X9]&_v,u{s ^ +2N"}c^sjbǠ5,ƥ!?BeĄx" ox/e#:$J-}.Ħޓ>KKD;u X)njETPWAJB9 ]Hm\;+dө \ӆ,ڞEx9 R,(N'm' 6II⤍$Ak+Ēpbq vm'^%q')"] ^q\c|Ysܛ!øsPc,Y^1TWE>\uT,GUM%1?g`S!' ' bp^$3F+jF3WH$aB4aHA0H06!Ka?aO& $a8ANi s[%ta1L:I88B:,EB>c$g\c:bo*e-<H{f(Wo.v& i~o ?dh8'B&3 lOI4o ~C|Lj{_@FD.!30Q߇B^۬zkYzK16bt>]K"okjK7f&`lc6aId%Yۋ A\u0O1G';,03`VWk s˜Zܣw?W`N1'ӜwQ6]'3% ־4^~&prbt0&}U0c0c8 >y+Q`~`PMdDu >,,t0\يv":}qx gJ+^mLwv`r[k[V,\t{}03Ho:Sk멱VUo!B9x^7XHڍнyv\0bZ,4 |RBrͬP{m=UeQ. 5~-: *2;z ‘0#;rX}b̳XH-R\8{oʽ~ugNjք~>R㽲DcC*1gǫՊBFlv ahY3z&ݿ3L6ՓKwvz`{l^(g?롑hvgR 9Bv ΝY{g*g"2߾ZÓp/RvةLT.sKݙԫ=wڹ%xZ29x&YB=BXM07?ztUyq=mZ`)ݼ[_Lj\gLBd ik{vzZ ؂BIkI4qr X=0X-Xӧ"+oMm9s o ;b:<-럧oIEuDs5Z񰈂j/飠nxΤr73mVw*նO[v@ص%g%Y|s@Q_g ,AgS"آn*q(q=,1/+:Ӂ|uV˳[u7W})7= ‘tui+RkEVݛW5~(e d"/3S_x' C;;eK7uS~i܌_i_N\(FJe2dWr: GF#ƠFgcMR4j̶7i_ wU*)ZQvl=g=OeRYArBα|HmH_(buY9ȹZ~Yl3/tE'sҧa_܊F DdH J $ C A? "#2(y|,v`!|(y|.8X-dJxZ[lTEޠFW!FKu%应fa˶۵HoJCC O<@7|1MH 4 "ag̿;n1=s1 3X['lʒa ^3 SV7`ңgBAcR+xmiQIt~nsco0,;g%jZd}ϰɨCTi4K^^"VCR]ߚb7g~Z寠U_VqK5!W(5MĘxK la k#Y6cSbO:sEFzK tI؆XUm cA԰k,c*X|*ZsDn#[#c lT6bMV(j8QS(N)Nb''1vƒKFsKX/avs]&2aFXi\c*Y9M4;O, "~.6Ewx=\(L\xЖɝ]&'vN LX\R , s3 3~GC8dq!6na83 q8Oi0Mä0Ȓ,ȓ< `> Q#!l>3a;ނ`Z@#5cFns!`R_M]r3zf7\\K%s-.9 f7\F\rr 49 q촸y.8(=®kd?d?/Һv6mݘKfJVf$reoXqQ4<=yzau%[%Y\?] xV.y+Y33 dt?9FKu'+h۫s%~d>V]Jf}Ї=s@^uW.0`p*o!.[(}ԏsʥP u\-z>#0*Qo d{F'ˠ7bZ,t F~YoAk쥝Sn弱[b΄,b!( ppbNQBl _ΊBS8JrlsM7Mׯϙ=#=`0/3gLJhԖesѷD@&}4H/D];T.}k6aۇXA@%HDH׭\hXZۑLdDt(_dCїD(#OE >Z-֯j^'E 2^֛;]$'?y/1g-)lolj֗)x-4nIB4.= {[mZBZk\KZK"9R\fD"Ϭ=eQp o!V~ $,)btS{~}wV;:kV3ONĘJ35Fϭ9>JL"NFiT[M[=1<1h{s?ϥ{욼բlL'2H>!LsS)q[leȜ&\$#VazPѩ;ڷakw`Vͯ\OYLIWuNO!3{.__RD+%R*R,Uv/8Bܮ/__BKK>e)~~6l;/<%dUD {ra0/{#+hϕ{koTS^m7&hEug~2:z;B1/w<+ʱݟ)7.,p# 7;.&T; U1/%kURK2d/v~٦K.=k˟n/{ DdH J % C A ? "$20 tM{dE#<՚ц rv`! tM{dE#<՚цB18X-dx[}TU?Η9fA_S$)cBEiM엮NꌻͮJ_nW(  #$A `#iFd4{߽gw|hwy9ǻc@wAY __aSJY,7f iBe>??e'fYzLˏz"~} /K oԳ{ >`@އԝyTFVϯ K 6I_Ru[d+_CN}['n5mshlR`zK(_I3_At՚$$$$%YO7e@\n5&䷛v-w  wA~G/k3rZVom.ͅm:wn#0/IhU  Q!P{!9f52ܐM@f #dǻ!nf_=B&y Ys zLKoY퀋YNr7dױf5 u#de'WEBrĄb<"4R:mt K/Ƹ֏Dža,,]Y)m'[K2" Ydo\x21| QFAK2#)J3Zfra]Y%a]|;˸DÑu1\Uޅ/$ ؎11eoPuaj A؏챟ޠt,9GqTG\DQ"7>p:Cn9eoP`m{z^|-z>ʮ/_D"/9uNh7;nf( *\}: Hiׁv^EhAvvhM.E#q\i~\%BrDM8FJésmB;o"lPUU"$wʾs1i׏vD`57[DH#4c}m'Nnk$BGdͫU"$ 8ڱ.v'cS? b}dL._3sl1`faX\a]ƐgHBV Åxx: όeZo(dPp ߭kXMV K Yzy]Q<mJ Wrɲ_H,gG"sA~/+/b~$E>a韡;%xS~k/a)xsU&HD ~y2ϑ>;I\ѷYE4WLUt< Ym,O*\MX?YZ6952EY| &` G5r oW; Y{lPR3p"?Q|>[dLW۴wQFIߤOd~@3}X*1˥\flhw6n aSeVoݼblJ%XaoQɫ\yF<V#_u'ܠStęzb=ߊT8av/_T>*b=_k:։sǮzӐ?~״M {!u[^k QxF(*mNUOcFEEY,esQ{0`vjf.%ZI^)_΍N*@y)(yj1͆a...g#'n_.zIιlS>vVq8FQ߷P ܆w*{"̛{bmi"6M(kO%_fppUN#We;en _7TЍq_: Dd J & C A!? "%24)Jc!ސ@v`!4)Jc!ސ12y,0x[]lU>;Y$&Y?4R.ERm6.-nBcqKץ?uA%H 7A$11 $5$>G5HHԸ{޳;3w:6ӹ{{ιM]p>dɥ ^ LsiT".7}U(?1%67֩1>tO~ÞorR#ΞK"sQaOguhJnEۙf˨Wbl^ˠw߻<׳n/Ġn$Ovm,*O:ZPg]#ZA>\B≷!,Ila$V˜i_1^+OZ}5;;+Tdnu`Nmom( wD X3Ob/^AC>5{Xn"_w*tfӕXSj1QDb"F1QLD)&bԖh=ޘ+S%Xa)r;A vK]"a+ s _= p{ӥiNا,[1䭡?Tè#"Ū~'c_ԓBL¨ yRHKRHKERHO<..EWJ'^ey xنmeyMI%^Yux$^GQG^f{tnOzIoH0&I~6e1#g,Vs~\h4G$n`ZC5@ͣ\ ~,s ҇.vyCœ1.C6B,33\ 3L8f&UYʳl3}VI5AMYBŞ9=b/VvդM09#9#ΰd Lba3"y0ü 6D aF9/;*oW$[Y7ZsD}w4LiV;uq|]q_*g [Un3-Dq%㭴wSU\-Vgp/H+f3bz,4#,$Wj a1 `bV>sBl ecg*XH-4cu 3b{6Xh?| o',$wJxe98fŬr)sBl M7&\`!?PN>V*A9g,vu7*XH-4Q.MnŬr(n4@O.ku{u}5o+ee)c .y,m ]&GG3tpoG=JgcS6[sc6*)ABǟ߶pdS|{skSKsC"‰tv:OO7zY6_7uM{ַaY$t6 ռ%"9׷?0K z[ح/,T,< L 4R˚"r# GZ ;`t[b¢ޯ[sǰd{5l9}xTi8sQqqTXR~Y}VRoz45=KȴV^;m˸Y6")&|=-K;&Ƨ٪v-R٧4⍦Zl7Nʼ>u턟~zP*@TFZ'*O=w!m7 ۷/u&z/o߱π||u3@VW-? 맅^J%^*=4xi;eK@~ VS/ԯ^Ld%K~zàr.LGr/{cLx#mO^qcu7j_ sV˗FG+wmG~2VaGiј(V*7#t-p5`;ngVw xG.j+2d[oe]?J@5ok"DyK #http://research.microsoft.com/dmx/yK Fhttp://research.microsoft.com/dmx/DyK http://www.tpc.orgyK (http://www.tpc.org/Dd@J ( C A#? "&2{m-/',Wv`!Om-/',b xcdd``6dd``baV d,FYzP1n:&B@?b u  UXRY7S?&meabM-VK-WMcjZ Č`u ,@RF\ X@20Zn>k@(\Px5`=SA*lK!2=R w8$+ `ss+1.] `p321)Wx ], 0O`q~eDdJ ) C A$? "'2gdZJ?u`yKgCv`!;dZJ?u`yKg  xcdd`` @c112BYL%bpu{JlVfOկfުmruwo{zfK*^J(zQ?Oqlafh}UEI}„>AG{N0^q:$3hd5%ס3ֽHΗ#^A- v`!9;`b>-" @q"xڕ;/Q=s5ƣQ BDCGa#ل0HH6$(JUx+(%}0;s~Ϝ3 `Ux@+!F # #aja5K];^yKʡB)W68qθ!Mzdc:C^BY]Ҫa2-#ߑ#{wn~/Ly7CqN tTf2 "MK'vAsT|o~$+n0?RFuXWndϣvs?41N8qQ_ޖ3R_V(F4Zkߘ|WĞuD<5?7G?UF5aT}?]Nm}<=BtReE+|tʹ9Xi<ލa 58Eةk<u|k@(\Pƕ|F0ɞ) %sbѕX‡)ĺJ.hlpc b;njLLJ% H  3X8eDdJ / C A*? ".2gNOC#iCv`!;NOC#i  xcdd`` @c112BYL%bpuqɎ "W K9 b;+KRsA<.he\~fODd J 2 C A,? "12WHdk%0͈\qv`!WHdk%0͈\qVxcdd``f 2 ĜL0##0KQ* W A?d- -,@=P5< %! 9/&*de-b YSAt/DP 27)?(UX f222d<ysR?} Ra[ +p~3j Q! ~ Ay  j?57HEԞ>d0 4~l?l ~$/Ă*bEF0/2߄<;%}P>l&VXⅉ3LvL \Υ]=|Ĥ\Y\ qAs%(s00#3X0="Dd HJ 3 C A-? "22QfZ$yx``v`!XQfZ$yx`n @"&xڥ=hA߼.{bxI-N5"86 6i&ZXi43Ay3o"J{ofcG@[F@ONZ `K`:&t%Ee a?$P񊲖x&ץaM L^'@$zwM=JUarxPZnoiγ+L:_XK )h*ǫlnkBhOwfV|?}RL ˱sUd~" tuݫ.Wy79䉿30Aߜ;>W$~uR[WJUdܘ6X3d\צ Hq׹!gyV:fhxko[{Tt˼ Q9^,O}E?YQO'ߑxNm]tsa{˥,^]]3zrw"N||}{(VyM}'ՒFLmENgߤDd40J 4 C A.? "32|n%{'NjlaX"v`!Pn%{'Njla pkxڝQJ@OD,"6~)"AX?k8D׷o7U&t=K(ЇNv9|sܷUǯ>[PǮ”#} {XMߎw`f.4^nN~Dd8J 7 C A0? "62 v$ye[)v`! v$ye[>` v xڕKP]~h_+qn8:K*5Vh%AUD_?h|b_>ݽ q@ F@,[c (7It,0]{v/ C$!9799X(׊lhn@X& 5.WDV{@W$_Y471ͮ1S|T_]|>'}YWRN:Q+&-^Yg6ӹ)]Ժ+vEƶ:2[yԸ4Y߾,v`!rT؋$ 1L xڥMhAggg"0M-"* BmFmB ^<GN粓Tv:vx"9K6~>\2Z?y3R].N=>d>zr YQZܫ3^h+V?Ѯΰ6!xŻMgqYdxwSFX(+]x?/jTŢy|; r|JuMYb880QO'9}!o=ӽBPdDd@J 9 C A2? "82c2nHDD ?0v`!72nHDD @ xcdd``f 2 ĜL0##0KQ* W$d3H1)fY;Р@=P5< %! 8 :@u!f0109Y@*F;3)V ZZ\  f22za%ļ?cʀ\F 1 jnsJ.hrC b;/LLJ% 4Tq f81PeDdJ : C A3? "92l?\^5J!2v`!l?\^5J!f `\xڥJQ9wFsh!QQ>@%"4(r20zڵqjDI `h{Μ"$ f@>X ! ) Ce- GqIj9Wqa-`B )-a5*11Iب/t|D dQמ2啕"ZYk}1οuK)ě*`VF^86GƤXVLo9Sa1knR?oQ{ ~.8;Q-0K\Ʉ=zzJ_FfTaRH׺ʈDWsC4}i㙫Z] s(F=s#=uku;qEw#lO@gn)i;?>Dd J ; C A4? ":2yX*W/FR܄L|c5v`!tyX*W/FR܄L@n$8BxڝkA߼͏֬iڊxR JB{hB IZh0.x_!uSBVyEe7^#)Ч&ɟyOcBDd J < C A5? ";2H'BC%:qW8v`!xH'BC%:qW 8FxڥkA߼$tED<"ESOc.z$[h l ك 7*GQ(R<Rx}3k&ԋnX>oo/o`76B!L$%јarC.F:`'H g:ZoW/.*]c4y8>ٺPZ':-[GΊՖ09`qGQ:p眍(w>ù58T~S/tתWUxs6\z]Ggjh=Aϥz~ =vzzrDsHg;x tȨ?D<3uS+A3k:rpV.ʗ.Wflo M]+捴ypYE!co{~yl{$752<7z?^ o]l6fJ'ɩz2CY]FCMsSˌ[swo}[+z@rlyOW󤸔ˣ#*Hn4v"$$xhV?f o2DdJ = C ObjInfo1Equation Native K_1048501644#'4FWffOle  FMicrosoft Equation 3.0 DS Equation Equation.39q{{t# 121 k 1 +1 k 2 ()CompObj35fObjInfo6Equation Native _10485016589FffOle CompObj8:fObjInfo;Equation Native ? FMicrosoft Equation 3.0 DS Equation Equation.39q#t| 121 k 1 +121 k 1 +1 k 2 ()()=34 k 1 +14 k 2_1048602932>F=f=fOle CompObj=?fObjInfo@ FMicrosoft Equation 3.0 DS Equation Equation.39q< k 1 =k3 23 3 23 +1(),k 2 =k13 23 +1()Equation Native _10485023557FCFififOle CompObjBDf FMicrosoft Equation 3.0 DS Equation Equation.39qv max1 k 1 ,1 k 2 ()ObjInfoEEquation Native _1048502475}HFffOle CompObjGIfObjInfoJEquation Native _1048602968<MFff FMicrosoft Equation 3.0 DS Equation Equation.39qh1<" max1 k 1 ,max1 k 1 ,1 k 2 ()()Ole CompObjLNfObjInfoOEquation Native w FMicrosoft Equation 3.0 DS Equation Equation.39q[H+) k 1 =k2,k 2 =k2 FMicrosoft Equation 3.0 DS Equation Equation.39q_1046704450TcRFPfPfOle CompObjQSfObjInfoT  !&),/0149<=>?@ABCDEFGHIJKLMNOPQRSVYZ]`abehijmpsvwz}~V n j +1() 1d"jd"r " FMicrosoft Equation 3.0 DS Equation Equation.39qz m j nEquation Native r_1048507970WF f fOle CompObjVXfObjInfoYEquation Native _1047153203J\FffOle   j 1"m j n j () FMicrosoft Equation 3.0 DS Equation Equation.39qf4 n j m j () j  mCompObj[] fObjInfo^ Equation Native _1046863632aFff j (1" j ) n j "m j FMicrosoft Equation 3.0 DS Equation Equation.39q  0..n 1 {}0..n 2 {}Ole CompObj`bfObjInfocEquation Native ...0..n r {} FMicrosoft Equation 3.0 DS Equation Equation.39q  n _1048674917fFpfpfOle CompObjegfObjInfohEquation Native 2_1048674948kFpq!fpq!fOle CompObjjl f FMicrosoft Equation 3.0 DS Equation Equation.39q  n  FMicrosoft Equation 3.0 DS Equation Equation.39qObjInfom"Equation Native #2_1046862348;xpF`,&f`,&fOle $CompObjoq%fObjInfor'Equation Native (w_1047060066uFP*fP*f[H0  j =lnn j  n j FMicrosoft Equation 3.0 DS Equation Equation.39q?hĿ  l 1 ..u 1 {}l 2 .Ole *CompObjtv+fObjInfow-Equation Native ..u 2 {}...l r ..u r {} FMicrosoft Equation 3.0 DS Equation Equation.39qx _10468651876zF@2f@2fOle 2CompObjy{3fObjInfo|5Equation Native 6#_1048502753F06f06fOle 7CompObj~8f FMicrosoft Equation 3.0 DS Equation Equation.39q 8(l d"1"1"2e "c8 j  2  j n j () largeR j  " []"n=1"1"2e "c jObjInfo:Equation Native ;&_10471540381 FNfNfOle T ln 2 n j () largeR j  " []"nd"1"1"2e "cmin,{}ln 2  n  () r []"n=1"1"2e "cmin,{}ln 2 n () r []"n=r1()n2e 0.5cmin,{}ln 2 n "r2()n2e 20.5c()min,{}ln 2 n +...+"1() j+1 rj()n2e j0.5c()min,{}ln 2 n +...+"1() r+1 rr()n2e r0.5c()min,{}ln 2 n FMicrosoft Equation 3.0 DS Equation Equation.39qCompObjUfObjInfoWEquation Native X_1047121725OF}Sf}Sffԫ MSE 2 ApproxMSEp Q{} ()!1 FMicrosoft Equation 3.0 DS Equation Equation.39q، 'lim n!Ole [CompObj\fObjInfo^Equation Native _" MSE(p {Q} )ApproxMSE(p {Q} )=1 FMicrosoft Equation 3.0 DS Equation Equation.39q ; H j_1048337298FGfGfOle cCompObjdfObjInfofEquation Native g_1046850262YFUfUfOle kCompObjlf  2  j 1" j ()h jH j "H " FMicrosoft Equation 3.0 DS Equation Equation.39q,h Em[]=  C  ObjInfonEquation Native oH_1046850356FZfZfOle qn FMicrosoft Equation 3.0 DS Equation Equation.39qpL Em"n() 2 []=1"()nCompObjrfObjInfotEquation Native u_1048489100 FffffOle xCompObjyfObjInfo{Equation Native | FMicrosoft Equation 3.0 DS Equation Equation.39q ""0,1[],p1"()n<m<1+()n()e"2e "c 2 n_1046849903FmfmfOle CompObjfObjInfo FMicrosoft Equation 3.0 DS Equation Equation.39q| p(m)=nm() m (1") n"mEquation Native _1047106780F.f.fOle CompObjf FMicrosoft Equation 3.0 DS Equation Equation.39qOȅ 'lim n!" MSE 2 =0 FMicrosoft Equation 3.0 DS Equation Equation.39qObjInfoEquation Native k_1047798356bF^~f^~fOle CompObjfObjInfoEquation Native _1046865209FP\fP\fzD SE(m 1 ,m 2 ,...,m h"r )=n j  2 y j  2 k j m j n j 1"m j n j () 1d"jd"h"r " m j y j1d"jd"h"r " () 2 d"n j m j y j  21d"jd"h"r " m j  2 y j  21d"jd"h"r " d"nm j y j  21d"jd"h"r " m j  2 y j  21d"jd"h"r " d"nm j y j  21d"jd"h"r " m j y j  21d"jd"h"r " d"n FMicrosoft Equation 3.0 DS Equation Equation.39qOle CompObjfObjInfoEquation Native r  MSE(p {Q} )=MSE 1 +MSE 2 FMicrosoft Equation 3.0 DS Equation Equation.39q. SE(m 1_1047742357F`Ef`EfOle CompObjfObjInfoEquation Native _1048338915FP⓵fP⓵fOle CompObjf ,m 2 ,...,m h"r )=n j  2 y j  2 k j m j n j 1"m j n j () 1d"jd"h"r " m j y j1d"jd"h"r " () 2 FMicrosoft Equation 3.0 DS Equation Equation.39qu minMSE F (p Q )=minMSE H (p Q ) FMicrosoft Equation 3.0 DS EqObjInfoEquation Native _1048574297"(F@f@fOle CompObjfObjInfoEquation Native 8_1048338761F0ɟf0ɟfuation Equation.39q` n j   j 1" j () R j "F " () 2 k j n jR j "F " () 2Ole CompObjfObjInfoEquation Native . FMicrosoft Equation 3.0 DS Equation Equation.39q# k j =kn j   j 1" j ()  n j   i 1" i () R i "F " ()_1048336832F f fOle CompObjfObjInfo FMicrosoft Equation 3.0 DS Equation Equation.39qH'. minMSE H (p Q )=H j   j 1" j () H j "H "Equation Native _1048337303FffOle CompObjf () 2 k j H j  H j "H " () 2 FMicrosoft Equation 3.0 DS Equation Equation.39q0  h j =kObjInfoEquation Native L_1048337285F𖷵f𖷵fOle H j   j 1" j ()  H i   i 1" i () H i "H " () FMicrosoft Equation 3.0 DS Equation Equation.39qCompObjfObjInfoEquation Native _1047121414F¾f¾f 'lim n!" MSE H (p {Q} )=H j  2 h j  j 1" j () H j "H "  j H j  H j "H " () 2Ole CompObjfObjInfoEquation Native  FMicrosoft Equation 3.0 DS Equation Equation.39q 'lim n!" MSE 2 ApproxMSE(p {Q} )e"1_1048603908F}õf}õfOle CompObjfObjInfo FMicrosoft Equation 3.0 DS Equation Equation.39q@H 'lim n!" 1"2e "c j ln 2 n j ()=1"'lim n j !" 2e "c j ln 2 n jEquation Native \_1048677526iF͵f͵fOle CompObjf     "#$%(+,-./0123456789:;<=>?@ABCDGJKLMNOPQRSTUVY\]^_`abcdefghijmpqrstuvwxyz{|} =1"0=1 FMicrosoft Equation 3.0 DS Equation Equation.39q| d"pm 1 ,m 2 ,...,m r () m 1 ,m 2 ObjInfoEquation Native  _1047119852FԵfԵfOle ,...,m r []"C 2  " n j  2 k j 1" j () j n j n j 1"1+ j () j n j n j () largeRj " n j  2 k j  j 1" j () largeRj " [] j n jlargeRj " 1+ j () j n jlargeRj " [] 2  FMicrosoft Equation 3.0 DS EqCompObjfObjInfoEquation Native _1047119562'Fݵfݵfuation Equation.39q 'lim n!" MSE 2 ApproxMSE(p {Q} )d"1 FMicrosoft Equation 3.0 DS Equation Equation.39qOle CompObjfObjInfo Equation Native !;8 'lim n!"  j ='lim n!" lnn j  n j ='lim n j !" lnn j  n j =0 FMicrosoft Equation 3.0 DS EqA6? "<2 ߉-S -bWp;v`!h ߉-S -bW 6xcdd``vfd``aaV d,FYzP1n:f! KA?H1j-Xzjx|K2B* R9 :@Ab 0Ky&LZF Z%h(ʹ01B`,:&T9&PfRm#N&br<Яb@ V` ^#.!.&a%ļ ?cTgn.# 5ws؄J.hqc b;+KRsA<.hHdX=Dd+HJ > C A7? "=2<ĮMs&L>v`!ĮMs&L6 @M"xڥAkA߼l6C[DR=R!xVirD(vm!0!sɹRz,"Tł̼%⡸˰ovA!2+VYaƺnZ_>+`517y:BPYJP96u2aQcXZ`䫈GH.?L3_V{$1]Hw.'oWa \=CyN7sd&Yw;5i~ķhž0҆^lmBSuykax-?ā=a9TK5t`-C!۝V)FTn.DdpJ ? C A8? ">2ǂ_>RdK@v`!ǂ_>RdK&F)xڝ?hAI/14C("Ec E:8"m P4VAR'ѭ888IJq)N;8T~w&yw罻 G 1 ?1b-Jt:U$9틛8D:#?ʐ.Qkgh/!|Wy*_?_*voR{]Nqv)Zc '(ְn|+`]ֵXv '\Ǧ֡,{"sA+7qMJZgyWZDk73ع&֙mև,m񶨭xo?d{y[\WzEveu6) FNJKT~%ΰq9Dd 8J @ C A9? "?2}q)(~J\sYDv`!Qq)(~J\sV@v xڥKkAǫ}Mb /xSoA7xl"I`<si OcɃCtUWUip8jThaQBEhƺDN[_h&cc:/0HqjjrUS;s+g @{Wc`q,ImP־D߀\"?͏Ycl̮~6:/GN6?h7CyTj8]vOALeMbkTLpwϑD޷ EoϻH"zۖ)",g^U OEMs, 򞫷B&9; JP1\̓Nܺt[;s p11`[\CU_fy>cC.͈}[V7o׹ő,㑋6]:A> ϣ'g*>3rhoR j`'sJ϶啤)cfŐ2DdJ A C A6? "@2 ߉-S -bWpGv`!h ߉-S -bW 6xcdd``vfd``aaV d,FYzP1n:f! KA?H1j-Xzjx|K2B* R9 :@Ab 0Ky&LZF Z%h(ʹ01B`,:&T9&PfRm#N&br<Яb@ V` ^#.!.&a%ļ ?cTgn.# 5ws؄J.hqc b;+KRsA<.hHdX=DdHJ B C A:? "A2/q޶b~{5= Iv`!q޶b~{5= @w"xڥSK+A_A8^xZZM,by yioaa%,B7D-Dܽ=L7|7 <#'Bƌ0b}זqj a#ifфcsnW^ut: {!5k^1numO·sB0)v/$H ڻDpoP5y\Yg$դDR#R}? "E2kY[ +[sTv`!kY[ +[sbe 8xڥ;KAg.3ÈX AKMl">%Bq{`a)i 䧤26>P9-=vy BC~ȕP"D#!Au\0/K|gYZI"\!K㓅Z*<4kIGŎPD9RYIyJrP7I?;їhǭYNӮ?wy0ydMZKI˫|Eqٚ ߠq瞭1`xDcKrQ~ A؏OZSk)no%ǩɓf9׆@S:nBz4L>*Ehѧp4:jbuBJѽq$ZV%9fBAU[ϖ<&DdJ G C A?? "F2*}Qcm]O$/Wv`!*}Qcm]O$ x`\xڥ?HQw$pE54D5Β`i A 5%4;545UFK)]Ǚbw<~wB2 L|lk!!"8$®_):άLÝ`dV(e1'j1cxr^!NEU`dvB(̭+g "BA͋4|gr=$$z?ܺ-r娨[RljWG|^SS<]B/zu'(;xuiʗb u?۟GOT_N/2 V|;/7>!rJ~u%?&t1gwyM~ Ȼujd]Z]lE6 !q*mWb15 E DdJ H C A@? "G20#XCYv`!0#XC\ FxڥTK@~͵bqͣQ`[" [(\ܜ:;8".ҔM}w/! q%x3ID!P 8z(.rXY\4Rh9;GM&73;C'`NYƅb"BX~xk+u#x>pLfhb\aۭѝfRMF,m{8Q]wHtn*d=<Dt=Dd|hJ J C AA? "I2i4m Ϙce=n[E^v`!=4m Ϙce=n[`@0| x]NA1bA!>P 8z(.rXY\4Rh9;GM&73;C'`NYƅb"BX~xk+u#x>pLfhb\aۭѝfRMF,m{8Q]wHtn*d=<Dt={Dd$ J K C AB? "J2xxE`'re`v`!xxE`'refHD 8xڝSMK@ݤ6TDCP(EB@<ꡭoVhiRŒ"=ߠ7E_)TnXy,B%aS 4Me"շ*s+@6@)~sZ296P^Wv$Z'27E} q1FxjpeHr9Lvmy/_N'pkgaWif?KbҕM]-/,҈u#1X]}Ϗ]sO&?K01hB^[ÓV Ӛ>H*v B|,lOWoA*1騮ջ 9=>Gjyȵ_3~yDd0J L C AC? "K2b sp<cv`!b sp< Hxcdd``> @c112BYL%bpuGldo * -eT[=ҐI)$5b2B10aQgDdTJ M C AD? "L24yS ev`!4yS   ȽXJkxcdd``d!0 ĜL0##0KQ* Wä2AA?H1Zc@øjx|K2B* R\``0I3uDd@J N C AE? "M28/kkKzRF; gv`!8/kkKzRF;V*HA"yxڭklU絳ϙcw)KK-jH hK)!B[S!X|`|4B O*,>ESAX)Zu;,+v~sι=svwg9ˢq%<^t=QU\̥iܦ]^mE7ܬ%׆8֢#TоóVs&i7~9V+e>#bN~.u\6˺ed=xTGxpR=ȸсtyN<.OY@za) u6W<i\MU6WNce=ZW9e{#*U,oU +ݚ+pnز[ g\x~X}OL.^Ds3jJ+)56WC:3wk}RuOmg{Z)շMi+Gi`o)׏)Yƹm[)Z 8M2ܝZld;#=3Z5or5Źlskn!R7|WnJ=r奤w3z6޻`c #{-9Sn9Syޯ)Hޭna;ՙRa{EHgHg r$)iUCSCe ój89lnC5P<I!U :$/}pb˦9x`p7g%yl 1W?/qC 󥅃?yBejB__1,0*0G[*-EyN:̣›!rtOA밀yǑ< ƏM;~rC2c @Ot|9q!>_Kuvl4ڏMҢ,Ӆs!$-Cdr8d#W׏<5jn!"͛䍹c.Ҽ_B! E߯5ODyF wE*\U2<>C3Weˈ&D'hqqB'XIqNg 3Fy0cT3FWtƨz'Xφ3a)0cܝq 3.UaFJgO+53ڭ0cq3aFHg'brK!&br F9aAa'g~gVd^8=$Q!%{ϹG<4BC)t^D|`02Nݨ"AcC蝱$hDK8#|JU<[,xD=E}8H'Os\hq,h? [eh7K|N|L 3d^OQ]^ Qh_ҡI+Wɴ=&_ȡ1ّ퓜^m(>zeH- ȅ̷ fЈ{_f }ÜЇ9/DcQiCo'@ۉc6+2i~r;_61_2#.@=~N ^7!K-<\hBq}c+)FR61y?#&: lȓɾ: ~LT&VsAIHsqVD皼 /^ E?V#:WDdJ O C AF? "N2ȂoS 2 Bov`!ȂoS 2 BP@xcdd``` 2 ĜL0##0KQ* W'd3H1)fY@<>%ĒʂT 3vL@(\P|30 dr8-'Hfpn#l.HE-ewg?.wBe!;HQݻ8.gvf%  s(D,a ˌↆ 70ZZǀ Lh~k` 0M?y5 ;6 J,,\ĺw7H: 2R^)4^Fw s%HEA%_ E`W ff ({\ 1*v0o8G321)Wx\tble{Dd J P C A ? "O2'ڼhԫuU5rv`!'ڼhԫuU  p(+xcdd``fe 2 ĜL0##0KQ* WôEA@RcgbR V~OĒʂT@D22Bar`fm wfjQ9A $37X/\!(?717k0@ddeĵɌ$cX bV5|)._ʯe|ʅ,_x" 3 9@W@\<2pjL̈́۸Aj(.c8]F7Nb$:*yW_=P} W9'ux)cڹc!jcԱdKRBXw gHS,}[OPC̅DdJ R C AF? "Q2ȂoS 2 Bwv`!ȂoS 2 BP@xcdd``` 2 ĜL0##0KQ* W'd3H1)fY@<>%ĒʂT 3vL@(\P|30 dr8-'Hfpn#l.HE-ewg?.wBe!;HQݻ8.gvf%  s(D,a ˌↆ 70ZZǀ Lh~k` 0M?y5 ;6 J,,\ĺw7H: 2R^)4^Fw s%HEA%_ E`W ff ({\ 1*v0o8G321)Wx\tble{Dd J S C A ? "R2'ڼhԫuU^zv`!'ڼhԫuU  p(+xcdd``fe 2 ĜL0##0KQ* WôEA@RcgbR V~OĒʂT@D22Bar`fm wfjQ9A $37X/\!(?717k0@ddeĵɌ$cX bV5|)._ʯe|ʅ,_x" 3 9@W@\<2pjL̈́۸Aj(.c8]F7Nce;pb.#{A*42gĹ~. Mb+N{ ĺ0I ֽv bUٽ DYd++&1^ y0MI?jO?Aw J_Y| ~.ƃ*_`? f|IE1+b$lƟBaʍ*Uކ] p3Pk_ 8\_X7;8( |' ypkp_bӃ?8b l D b3 /m|q^Tm`|+&+UrAk;.pq/ `pĤ\Y\ t00b܆JBDd mJ  C As? "2Gc}xkv;v`!Gc}xkv;"xڝ;KAg6Ŝ "bRJDW/V"be)V >.K.PXv~3v Da@؈`BB<š#ZTXq;Y|ib<VsͬSZB8ԅ*E0[UNYz Zcy` ! Eԇ ƹ(,^zD-{:umSWst[~3Tl|T?T<QTO}.];.J=>D?'7ߤUT򓊗T8O߿~cFE8/(<KD_H{?GG^$kŋY !Ad)W,D?HEDddJ  C A^? "2"R+7. v`!{"R+7. ^ @CIxcdd``fd``baV d,FYzP1n:L 8,56~) @ k'p00sC0&dT20, @201d++&1~-b g0j`\1m)g#c+HQ&3@penR~CH0.Ce 6{1̽$u8&0L;$<ao+F3R L`sAS8AtA``%#RpeqIj.| Ddp P  S Ar? "2Evv h=v`!Evv h=Z %@X":xڥkhUܹwwfvgwv7ͣIFZI*"XX tc(~p 6nRM5lD!Fl S4j)A[3wa;9doad>1MscmIGWYc=ޡdWp2?X<8@nCN?TϻDͨ l.+!KsKtKAk9uL] H_ 0o>ӾI lzܷ wU/BE Uvg~5Xf Q`~?03:?Y{kme]ֽbt}A< Xc~ 9_ zyz34 oz˸)A?G9ouʸac&>\9i Ǖ_N+͐lWfP2GNȱ-/0sTތ6Ԏ(ScBαwð^-`#h=`zJk{-'Xa@5J[oZ?7]*| gnocbW.`@M'%a< >W~1O6/{6F8Yyd1;K1O2|ik۽sEu)ϵ]gDy~AXϾK͐e^O9@~4cJ)֗$?#P\' \ɴ`= Z̾Y>:~ͱ=aϱ\y[=әL(.]y !1,+I _CLhzG$\o OlAT r"K@.wE?ǪA7 D O9S_=mf.,a[Vsxz&oOųF{e*iy ě=ݯAfŻDi4thELu$LWDd |J  C AZ? "2>oyYv`!>oyY  `h0[xڥJ@g6鿴AD ",@3Ilz_3hXf&ڴ-~tLJB!A(t5_-Ö$VA3 D#+Oҏb5§y F$ ?s=tͦRH#P&&2q6hsJN(r=@ϵн{ޅ;wXj;T(,@{9Cs"'b}wY3jtyK]]ـ3)߀_23]^oj[ ח-=K*:vqAcct!#DyFo|n׭zi#z!LW}G|c@Ic2z]:.sJ;dU̝(hRVFΨɤjR*$dSܟ_7y7J<\<'lEx>K$x Yվ^/VĿg:'zS-ACcXHn@},v]Y٠Sv_BxCÅ"%KLuΠf bDd |+J | C Ao? "2Qe:= zϕv`!Qe:= z`(0fxcdd``ed``baV d,FYzP1n:! KA?H1Z" l@P5< %! `35;aR&br<Яb@ V`$u mxT$Y P.f +73/XG&? #ÊF072FF0@*3a.@? e#aq#< (x*/Ǿ0 aS%ޘP 27)?(˖0i`< RaM%\KN{LLJ% H  73XmwDdHvdJ { C An? "2w5(HzIb1v`!w5(HzIbF@ (xڥMhA|l;I4)R?ZɃA^j)M/BZ֏ª`BAxBx ~4ED1ufv ˾ߛ?l3;@][cy6KqiiIl^T0d%7HQ`Vթqw 0Ov>XpCREc̩VGD`8jd|6;7"\ſ(V.__`+=nre^H,5e6Yʊd9 ޒK|KW IcyVk.k?W)GʵLebkffbj>Aidԙ> zwv3ΰ^Cv#I_e(r3Glx' hsv?arBnȾ6V}OF&1/numg"^:B Xp}>3l}<p5:NuC/DmuY ϻo>8\q|B#W gŵd!JwݹjHloտz'Dd VEJ z C Am? "2W_5؛v`!W_5؛8 xڥMhAW>[K~ꡨP=IA(mbUӊ-=zOz*VFD ċT M=IE%%t}7,(,:v*_,qтt;:CqYlSG,é ,@ _E1e(3۽8a ]5@7{gQ4񐸯4}>*+Xנv2>aZOb}R;MZzir_]E+2?Zۢ^7GT~Hw>p:.[uH7u9 cz2[%[}HƑOI+?YM\5h]jܗU_s ֹ͒/a|; gmoa|9:ܭxϹŻ~\@yZh=0bI[<[ўǼyrMq`qIt NG!~͌~- ~I|l䢱Iyϼ6+arߋ+Gߊ}6gx/erѰF^՘&߉mգ"hNő쪓"N%8OɩlNvLԀ_m9DdJ y C Al? "2Gouе5NL|s#Zv`!ouе5NL|s@ $'(xڥMhAc7ݤ6i!H-b[-Tc!-ִJNVDrOSAU<*DmS$ii83ofݭvÒ{޼7Dhi$n(`,-Lpż8X#If$ f\anˑ1q46`T#L '/0w;["{,ʗyϯ"9o sk^mX]u04#J̶Gw߼Nqߤdg隭֞ ˵_d[vP*mȱOx |NuDkI]1dZj'9'dHcLB&_ܝ/L͡p ԀH #;i1P>@C]Pg(3JNE7cSUDUp!V kmWq2m,tx&uE?~]*}_+ o$ =~|}X>K>\O߲q:ףl<zz6 QEW1>snC:#'l9FGL; ?ZK B/6 z+͘SNia1_dɯ  ?T >:Ddx gXJ x C Ak? " 2!'}?:+剓e~?v`!'}?:+剓e~X xڥkAgfwv7XQ "KBċxуmﶘҪkRċ*8$^K=DrZPQkqf4 K;o޼vvB#_ Zcial6ՇH-Hɬ;pP53rW%}&#}Rhab8C) ~P/;IRnuX\V1y{1{'M T2!;_]\#\ȁxuӾ:cq ]`W{=v,kg;,/Ӛ/.WhŵZh ku+K׷lz5Y/~2qYot}AyUD߼wV8}#<wǎsMnV?{Ij.2U  9-Ov@_8GDdJ u C Ah? " 21Q/tɁ{} v`!Q/tɁ{} P%wxڥMhAn6M6b!EJX̥~M0[@Œ'A OQP*% ^R $TR|:[ m’{y͛(`T c"B",5Mf*[/Q OĈ[Vh`/NZ &9"&#hePED<~4^;nB.L.q-7o9FK"oQ:_@'VI(G2CW=QX~Q1QuIR'T7A#VZ5XѭC㎏[q4+ow#{K5ooFba6uPN.`T[R7iD Me#c;ҸvW2׵C!9 ȼ>1h%8}줝tO_.SBԥ}}x-,Ern?WblbM3c;'<:CZ c)0fleY^nQXt݆*?Tڙ̖ EJX^X멤2s+'Dd J } C Ap? " 2O[,W\Zͪv`!O[,W\Z `pPxcdd``$d@9`,&FF(`T)EA@RcgbR v~ ĒʂT@D22Bab`b wfjQ9A $37X/\!(?717k0@ddeĕd1UyA|M8_ ר䄪7C &0Wy@*j3a6n? eb90D+ssB\qb/<@*6gbO T4ðĘCu+p c '#).KPob ǂ `WH cdH*4ss|=Ĥ\Y\ qAS>| (Ք*Dd\0J t C Ag? " 2Ypgdzؽdshlv`!`Ypgdzؽds`Љ-k.xڭkAߛl]Zc@RIjBxPPVEh+ $[1ŸB^zz^z/z"^*]쌙B0&Lv>wc0dp1d,c1d-um-O ٌ3|l@'Z|= F>a~{ءŨф Z;c|U9~NqQROY{=R\Nn`Kk]t*6Z+!>e^L{9IzxafٌijR bǚWءd\eAm9zvsg+QNSUlR6CŜgW; R4BŵUΙ7-O#~hw̲gi"~9?p)Cřn.[G%35y7+?> ??OKfkc 8ޞ!E짭~x0BU}od/y{m/>d؅Ũh-3ZbFdDd oJ v C Ai? " 23Kh|GZ6sv`!Kh|GZ6s"4 P_xڭ_L[U9]];J&0c):Q#$U7X,KKII|)1f%1)/DuS @qf{ιzNWe|}s{!d|5df uҊAٷŎs U`3Fke\cƸ^MICx[HZ'9LIIåtOF`mq=4v ct?݊#¦7k'Jyy<.4uH}9wYNlMDY9h)>X+s@ޖ]Һ![7kayH,~vavSN=tj-h߄}ڡ_W53_Awn!s/[P:\o覼(Mu},mt:w. sL%%GaC;zxhKkah =Q'uUX=T%/ Vx&t9scEcX}BQur>AfjPX3QKyQyh;9E-^`2BoiJhOר *lqa͊~g5P^k7y2"*[&(,&m=EUKk1kMǛ1GM7)<2'E#5ʖ U{7³.|R9"|QdGgbn1Ga6OE,RˊoN,R^}O {r 3R;|ܧ3yPwvʉ0X0Pܩf+X<, ](9w?8GK;<;'o2~tP;c(nV/Wq|7͚zyI=La_ƫ?۠H"Dd J r C Ae? "2` ϐ# $gv`!` ϐ# $`8Pxcdd``$d@9`,&FF(`T)A@RcgbR v~ ĒʂT@D22Bab`bm wfjQ9A $37X/\!(?717k0@ddeĕd1UxA|M8_ ר䄪7C &0Wx@*j3aVn? eb90D+ss\Ҹ| J| 36@<jf@u0sNp{la#E|  2yML4XPP_I!yQlI傦r.pNq;w+KRsA<.hgԻi-Dd J s C Af? "25&QSRϊwoIkv`!c5&QSRϊwoI+ 81xڭTkA~fٝlDC+QzЫEzd ]iRa !`%h/R7Aū" *_Ջ)뼝YS!77; |8rh"dLG ,ˣGjdta,"d)Ǡ9Z<A8Sk7p;Eu " # $ & % ' ( * ) + , - . 0 / 1 3 2 5 4 7 6 8 9 : ; < = ? @ B A D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { ~  1 ,m 2 ,...,m r () m 1 ,m 2 ,...,m r []"C 2  " n j n() 2 1k j m j n j 1"m j n j () 1d"jd"r " n j n() 2 1k j  j 1" j () 1d"jd"r " [] j n j n() 1d"jd"r " n j n() 1d"jd"r " [] 2 FMicrosoft Equation 3.0 DS Eq_1047154236F  f  fOle ~CompObjfObjInfouation Equation.39qfy 'lim n!" MSE 2 ApproxMSE(p {Q} )=pm 1 ,m 2 ,...,m r () m 1 ,m 2 ,...,m r []Equation Native _1047154112 FffOle CompObj  f"C 2  " n j  2 k j m j n j 1"m j n j () 1d"jd"r " n j  2 k j  j 1" j () 1d"jd"r " [] j n j1d"jd"r " m j1d"jd"r " [] 2 FMicrosoft Equation 3.0 DS Equation Equation.39qfZ ApproxMSE(p {Q} )=nObjInfo Equation Native v_1047107785F"f"fOle  j  2 k j  j 1" j () 1d"jd"r "  j n j1d"jd"r " () 2 FMicrosoft Equation 3.0 DS Equation Equation.39qCompObjfObjInfoEquation Native #_1048677675F5f5fT  FMicrosoft Equation 3.0 DS Equation Equation.39qJ(r1 d"1"2e "c j ln 2 n j () largeR j Ole CompObjfObjInfoEquation Native f " n j  2 k j 1" j () j 1"1+ j () j () largeRj " n j  2 k j  j 1" j () largeRj " [] j n jlargeRj " 1+ j () j n jlargeRj " [] 2 FMicrosoft Equation 3.0 DS Equation Equation.39qw MSE 2 _1047796077F?f?fOle CompObjfObjInfoEquation Native _1048516507Fp-Pfp-PfOle CompObj fd"pm 1 ,m 2 ,...,m r () m 1 ,m 2 ,...,m r []"C 2  " "n=n j m j () j  m j (1" j ) n j "m j 1d"jd"r " [] m 1 ,m 2 ,...,m r []"C 2  " "n=1"n j m j () j  m j (1" j ) n j "m j 1d"jd"r " [] m 1 ,m 2 ,...,m r []"C 1  " []"n=1"n j m j () j  m j (1" j ) n j "m j l j <m j <u j  " 1d"jd"r " []"n FMicrosoft Equation 3.0 DS Equation Equation.39q# d"nm j1d"jd"r " m j  21d"jd"r " d"nm j1d"jd"r " m j1d"jd"r " d"nObjInfo!Equation Native 1_1048516483U$FpRfpRfOle CompObj#%fObjInfo&Equation Native q_1047108928)F@`f@`f FMicrosoft Equation 3.0 DS Equation Equation.39qUHmT SE(m 1 ,m 2 ,...,m r )=n j  2 k j m j n j 1"m j n j () 1d"jd"r " m j1d"jd"r " () 2 d"n j  2 k j m j n j 1"m j n j () 1d"jd"r " m j  21d"jd"r " d"n j m j1d"jd"r " m j  21d"jd"r "  FMicrosoft Equation 3.0 DS Equation Equation.39qu@ MSE 2 =pm 1 ,m 2 ,...,m r () m 1Ole CompObj(*fObjInfo+Equation Native  ,m 2 ,...,m r []"C 2  " "SEm 1 ,m 2 ,...,m r () FMicrosoft Equation 3.0 DS Equation Equation.39q_1046864879.F@@cf@@cfOle CompObj-/fObjInfo0Equation Native _1047153330@3F0ljf0ljfOle CompObj24fu MSE 1 =pm 1 ,m 2 ,...,m r () m 1 ,m 2 ,...,m r []"C 1  " "SEm 1 ,m 2 ,...,m r ()      !$'()*-012347:;<=>?@ABEHIJKLMNOPQRUXYZ[\]^_befghijmpqrstwz{| FMicrosoft Equation 3.0 DS Equation Equation.39qf/, MSE(p {Q} )=n j m j () j  m j (1" j ) n j "m j 1d"jd"r ObjInfo5Equation Native _1046864386_,8F 'of 'ofOle " [] m 1 ,m 2 ,...,m r []"C " n j  2 k j m j n j 1"m j n j () 1d"jd"r " m j1d"jd"r " () 2 []CompObj79fObjInfo:Equation Native _10467853326=Fxfxf FMicrosoft Equation 3.0 DS Equation Equation.39q{ MSE(p {Q} )=pm 1 ,m 2 ,...,m r () m 1 ,m 2 ,...,m r []"C " "SEm 1 ,m 2 ,...,m r () FMicrosoft Equation 3.0 DS Equation Equation.39q p(m 1 ,m 2 ,...,m r )=Ole CompObj<>fObjInfo?Equation Native p(m j1d"jd"r " ) FMicrosoft Equation 3.0 DS Equation Equation.39qf((4 p(m j )=n j m j (_1047153228BFffOle "CompObjAC#fObjInfoD%Equation Native &_1047153186GFffOle +CompObjFH,f) j  m j (1" j ) n j "m j FMicrosoft Equation 3.0 DS Equation Equation.39qfJ0 p(m 1 ,ObjInfoI.Equation Native /f_1047122425ELF֋f֋fOle 5m 2 ,...,m r )=n j m j () j  m j (1" j ) n j "m j 1d"jd"r " FMicrosoft Equation 3.0 DS Equation Equation.39qCompObjKM6fObjInfoN8Equation Native 9a_1047122324QF f fE`, p(m 1 ,m 2 ,...,m r )=n j m j () m j (1") n j "m j R j "R Q  " []n j m j () m j (1") n j "m j R j "R\R Q  " [] FMicrosoft Equation 3.0 DS Equation Equation.39qȇ p(m 1 ,Ole CCompObjPRDfObjInfoSFEquation Native Gm 2 ,...,m r )=n j m j () 1d"jd"r " [] m jRj"RQ" () (1") n j "m jRj"RQ" ()  m jRj"R\RQ" () (1") n j "m jRj"R\RQ" () [] FMicrosoft Equation 3.0 DS Equation Equation.39q p {Q} (_1046697062hVFffOle SCompObjUWTfObjInfoXVEquation Native W6_1046790074^[FxfxfOle `CompObjZ\afR')= m jRj"RQ" () (1") n j "m jRj"RQ" ()  m jRj"R\RQ" () (1") n j "m jRj"R\RQ" () FMicrosoft Equation 3.0 DS Equation Equation.39q SE(m 1 ,m 2 ,...,m r )=n j  2 k j m j n j 1ObjInfo]cEquation Native d_1046790015m`FffOle k"m j n j () 1d"jd"r " m j1d"jd"r " () 2 FMicrosoft Equation 3.0 DS Equation Equation.39qPȋ SE(R')=CompObj_alfObjInfobnEquation Native ol_1046704850eFЪfЪfn j  2 k j m j n j 1"m j n j () 1d"jd"r " m j1d"jd"r " () 2 FMicrosoft Equation 3.0 DS Equation Equation.39qOle uCompObjdfvfObjInfogxEquation Native yXԢ G(m 1 ,m 2 ,...,m r )=n j m j () 1d"jd"r " FMicrosoft Equation 3.0 DS Equation Equation.39q_1046703130jFffOle }CompObjik~fObjInfol G(m 1 ,m 2 ,...,m r )={R'|m j =R')"R j ,1d"jd"r} FMicrosoft Equation 3.0 DS Equation Equation.39qEquation Native _1046788530oFpfpfOle CompObjnpfObjInfoqEquation Native 1Table} ?SummaryInformation(th MSE(p {q} )=p {Q} (R')"SE(R') R'"R "Oh+'0 ( D P \ ht|"Type in the Title"Typ l yfJ q C Ad? "2UM4ȕ\J 12v`!)M4ȕ\J B3@kPdxڭ{LW=.b[&kӬXFl%"b%}$MMc%M?|Ħ h, Q4մj-ˊ@}"E:3wf8gE.sg.KSvbw8)R)+o;c14LvxyW9T կvd)GS8oUWQeNYQyKV( Q'oWmS޴o5E+`S> /5SmE7\>o.]̽MCAy]Zj[bIWɷZ]pm^V.C՗ZDsë#Or;xL(k1-:7 Z R-z~D\ZP8 {axؖE;ɤ^t k5_Ը|uXuII'u'u.MK-z<6!kC{cM]8X4%PV^dzEE/}&ӚҢx{huV+.+ߍV]$ŕo|5gׯyϲ޳]?ypx<gesd5/`\^[rJL~*p:O EqXE֦PV^]^egHGA]IrT2q}-/:45GcH#'01xF\RdwR1ыIŪs =Nsm*Op>?wD"A.1o_!6BϤAO@1\mع?"sZk"Lq#y  vݴV/3E|օKWថ%5SYJ/NSpGX# yi`^(c0WIOzIo m`^(c0WIOzx;k` -ʘc'^_\/@~.lac>d zy9Џ_j$GDt+y@ĺ1wBl =Z|oa zK(nO~煱O&OrT .*1_O%lsn`ňk~*C 祛Hj}ĽP=#C E'_\O:]c8-*~b|q 9 0v, `kˁ(~:!}Pq8)kB*ZnK^9C rUrpn(T9Ɠd& Ty./}0L;?)o w Z,*||p~=p']N+t0^b<춿A4>WSq; >?x γ|ci*±o1֗eZ=<>7 e!7 HM>WDdHhJ p C Ac? "2ڀ^d)g5KV|%v`!tڀ^d)g5KV @%@ 9Bxڭ]lTE~RvYڅR6Hx055 6%DQ ŨM 1 D_T /4yP$&,Ťv3g:lܦ9?g|ܽyѢC)?V"&JE2U~ jZٍ(*E>9u!T -> l-#3Ě$rx%_Xرg sX FhMn=+˵.ks, XNۃե{|/9!`_lhj+qe˶(۪)9D6L!fc&jOrE_Džu\].$v1KĨ uI[?4ֹ+N+]AX֝to~8S؍jE滞|{+zܾ^΃$;nZ=ڤ,2k- !e#Jb <|OnQ>ՙLfuw&6 _ 9r~852nɹQ XS~u#. lc&?IAy9ur;2Nm83y <# |;>Zy*3y_Թ7$u,;7_ۢ~G <39|^uowԚ}4ؑSl4ir:˷_A*AoɗW7$DdJ o C Ab? "2tOԗ EPcv`!HOԗ E+ بBFxڭ]L\EΝ.?wB)֘k&5vO--Bi ؘ`FnƇ>ԤM1$j$ƟEMZXbA@s2su7s *B"Q̟K=ʊ5!Go6\sT0Zg90cUm2L:tX}-P|ZC/ ͯH2|u֚ow?{\5BLE岇D$F_f?QizZ׫Z}!+z[Wlա!Iy~:h;|%L6UdA&2م0Yd-% WA6dLV<_>9O_D l_~_?/~a~^ȭVU*V+`NEAvZ촪GݾReWw%SVA̧x"Ql4v2teGvitjH?権8H&H%&/E ,/&|FTm ~4Ꚛjټ%K< FAՀTF7_]y0_!rpRkm_=5ٯ"?׬Qj_I58˪<"+"_E>k[) <_-RGbE䫲gm3NO;S6/ȓBzϒI!xR 7]7:cKb`A^axڱrzˍ|G 0[O5sG\"ڞߒ|<+2ps7~^vWg}u_>v~/n'o==hżBk@縚#k9b4#!!B% aqv@ϳWari,3{B{}.K>~Ž3[=Oz+V>jyE=#pg(5x ?9y{sE.TEK[%={נf0w]T9Ω"C};64xjѰu0Iv'(ļӹ5r-xDdJ n C Aa? "2yG7?FUuv`!MG7?Fb ) ?FxڭH]U=ܟ_SsǜAERZ6$l{t:${ciAH bCp\?Q8mk rS7zi~}+ !BE(2?0]aAW^\\V !dqmzlXAf55" {:mw˾$B،=Ņ?̠H59V~ިm+X6Y#E?!MMi*4掍K ^)O+~=~f\fz VK7WcDɦ }Tv"JelT9٨eSp=:߻/sW#WkÎ߲<7ퟗW%}ڪm|oe"/ ;)aE+v:/zbhDzvؘ<{2{@&էK[R5Zg pnrs6kG2!wK4k)O$5ُKepD|&V;h-Q#%d~UN_Zz+j\wOsڪS؞lU%a &hu]q%x>%a&[6Wm <n0yұy<_yNTJx.&4. <ؿ^ƫu4rij2Qgs/BYԁ|?gxev>G}2Ͻ gQ-QM6Vf x@+3oc4<6M4KfR㹂{jⲺ5ݟj\;uܪ^8y?NF$-f*d[kq?a I=>G~r8ü~>ן Q.YszlF'1σHš6a͡Ɏ3$wsEXo~npx"%:{e?u֑+sHi sT'k.˞5>V&ϟ3 _fXuB.^X/`\#u%hn !$j-?.zyDd8 J m C A`? "2W tM:oS/v`!W tM:oS/Z$7xڭ}h[U/˽&MӴ6m3aCE((s *vikQ!L[7;~DituhSqL a݇k|=M-9{=ӧbYji0ϳQL2ӱ#p֨]jAShzϚ#lؼmdn0^cI/C>E㾃Ѐ{ UZ4+lGf9B֯?loK֣1§Zm؃fm.tq G- _,>|o]+N 9➒Z>SZ93ޜ]RMԠ ^v#e٠-몶e![L <7K_/ؗ 븼_k_]~_t淋Y-|)m|rRsqUWwbIg;7Kz7 H C夏g;,Ni<: 6 qhۻ9/8G؍xΙ_/|g[V@]`xi}ٹZ,is_cuEnbއ1zj1kqt?<=W~ ZWrϙYGXo/[[3FfXY'<˳>fjhbG)<yzUHGʉj2cc&Q~VՏV*UTs Fe3*Oj2O^hs9_u fSDQ'xS@֟<y{kHw3TU}@ݨze{~B**}0mw/1g"Ûwwin-ɒNO"7Hi;c 4T3_a}q<?0}ľ4ΡT_tu&6Dd J k C A_? "2( ioP|Cv`!( ioP| $xڥkA6_&MT,H=A<66-ƩV(B)<ثG/PqRBq~ekW 6ay;of0q) v-hzb5l$a3،]fhdYqZ{E3@~HNxw[)f:Ǜ>.Q\|-|QC lMZ '_\igOv`OH%crO.rmiv*sJ=wL@MiJêCTr,$9S\$1?߿,΋MηTOgV:rskv݈y|LN{|Sy O+^v/%8y”[/-7IsA݄bfUqoT#p\rHaw̛DͷHI_7h+R^@|14BtV;ir9,Q~-mk׆\zgƪ]7U!U|_H|yTHN{ާi8Nó4:Iʵ />hW'[ֳ6ݒP͎yD܏kl!jv4bhOZt fLޞ;SHcvv?:e[wjd$-ԟlQ"ِs>Bo5s*u9_qd̻<'"")TU+F4kU̝Xq <{9(OVy܉e78_uqOPNRQ9?O+2g9{,>&%fj}|E7a5"Vf~W`y~. s&wO 'ԼU` dwNoJ$yqi=N&=C񥢇~A1R'uՉȱ%r1k|)C>"N"[ߴ%S1F,sP_"wW-㝓18+)D.?S_罤ϯîxާfϑl܀h*)d~\^hhQ2O5xc"&SQ,i1߉&- )"ԼXR2豈M{,TxDd( P i S A]? "2.Ͷ}(ޫ v`!Ͷ}(ޫ4@0JxڭmlSUo=owt:am hU2)ɔ@2d41j`T0(Ì $A?Ő,Fem]b(:V="vz}9<빧H$ɯ9T#xؕr9rT|*һ"߁!?pz OJTp=ڹ2Wť0_zԆM/p5p=qquezwB?QI:C~}0C޺c8P97Et=ctOg]ܧ@Y0֗kT Ə=U?.܇UP߉4 F E1ߝvexyx% _/q[E^aSp=q~$3}I5ZMZr~ IV7;Pۣ<$?mh"OSz4+}c~N9߿z%Z_5:Dޏ)g8 z9'r}K.KW?n-n?!2lF:G)k0.b45~z_۽ynǐvrYy _~YorRxo5w0 /FQԠtJ'5fu>ptJjί 歱Pˡ 5iJaj:șϤcMqv;;`BYGʡ> eh>C}mJ=2Ic:m(a>|BT'{UoQh trXYlϷsFӥaP}%3WH`/;(z/TB'ɷVb݉K Kl=IVDdL tP h S A\? "2bOM, .T[:>v`!6OM, .T[:" p xڥKA߼qzS)BDl4/3gPqWi'զE0`!bc'$M4 f~fG >}wޛ7&09ֶ[\~c5!.=ll~Gzrr'f\Uniy'P7t9 Q[Gbũo^9 <T!~(Hi8`ba\ԅ{lHM=A5ſxJ .k~iy-Yk/}ī4&fsΝ!uo;3|}_ԑDS</Q}1}tC .aV ~ft>< WG-_?mfo^v-nI{.NOC6cZKt(xgT|{À>-zN74CDd4P g S A[? "20:;Le^ v`!:;Le^" -@ EhxڥKhAggg٦kmA=xAP|6A/*J/J*Q"AmAzM=XDD(h7EhZ*6l)K37;#! !A&0oaIrZd2v+,9|gf#Zs=>T2ԌLրuȷ8m=#lChVtMEGIbc㓸mL{c`Ls󎓥elX*cA[Qc ?Oy<$D ~g%ig$A]VW?㍍wZd:Z-Ӟ TM0>,9"/Eޠ.1` f~7gOq4H#uYޜ}eQ02OڢB쾳2Cʩ?_!7Sp"'+1N?!1?M 21Hg(1q?'3x{f@N0g9Eg8[bXw:e?%so;\ܴ7ځv<~.{6.9Iv\crs"*8߽O9Jz(y[.PA|UPTA|%n ۵oV|zaA/~)C)P_k/Silǿ='kqgjy:{#D$kE}PZ O? @NݮZ!'B8 s.*1e*c5CnF_}E+~] F%wV(y6['Q_+GEn^D?}JGfw5b8Y$rLQicDwxBp8~3K5 9c{Mºs'˄o<gT 㚖āmqJsTźvZKWDd |J f C AZ? "2>oyYv`!>oyY  `h0[xڥJ@g6鿴ADKR 3Tb dRrGGOOOPуyfD zJc00҃A|C[߲y>-0=|f@̀ !q@QsgFםoq8Vf A*੖ķs/Ł8d<SlqĖV8Auރ%הLLJ% H X,M-]XDdDP d S AX? "2m^ !^Dd /1v`!m^ !^Dd /ĨG/Vxcdd`` @c112BYL%bpu>b6[`z8P͸ 1g㈁,bs;;ܘِEClI4qaOBKgemˀtyH~0[cs 78W1N{]=I)$5b2B16?0i]+7Dd hP c S AW? "2--& zfqov`!g--& zfqh -@ `E5xڭ[H\Gg̜hjUՍ릱BD()!4 &tC]#, I_ҾP m})$)MPЗj!%D&&DI̕Zי=XO`P A(Fr96J 1W QKQ.@* XBcoھFc>|D"Lf.|m3L1k}-ޔ'@%eVw+ʿdھ%Qwi?z!"羈ȹQ9s5=ƞH& \7L<rFeR`SjpN9&c, r^H{TSM.LrT8[|yC.;ϩ(TMr9Oob6FƜ+Z!EKsP9K!r*^gɭgYg1uz[Γj=gY,is!=UgZSc1B9xܔw0nT6[-.~vO5:}g[t_Kk הǬg'$_d=)@S]F_Dn4#N)?Zg[ agŤrZe:^tPq;cݪ/oz]\,vwHxcK~m?8᱘)1?EL@-.%o߷7%v`! <Ժ?'>!4oxڭUMkA~gff&I5փTV/JA@M<,[B\D xA?G?ГG=(b֙wgu-քٝg}/:(@24QBdPf6iƆy``(qkyh<2&s#E_?")R/^$oȖ p[ r=a z]v|^Ce:kK9qAl}*ZRӻFލl_P@V:ΊtֹUP n3L3y;!~z.*GV2HݷӝGNk{㸽'kVlM.b/a)ߙu'o]nM9Hg 5C̼J:tА}\۶]]}3J-Y;flWfO3vrWꝚ_sA pef0ЃQ?yndnf072A!d/ W&0;عp ^F'?zq%럍< `>~q. [ 3R Ep\Tzt1vR O7*󗲣eFgDg1@sAs8wB\q;+KRsA<.,#c Cf8}Dd P ` S AT? ""27ю⸧L瓌P [ v`! ю⸧L瓌P [A`\xڥKA{3^X1]@͑.UcYևlHȟ`a(H M@6uwš ~=`@0KDFBJDI9vyBAHD/8RKcaS+EU\Kou8TY9_zVWzV֡WXA)YzOh 5kQ| 4|ڼ?1*-¨*=qϵUOF[,wqukthʖp+S>5O9:Q{~N/ >V_L_:{uskn[FJ2|c5{%W=1g6ϥ?n9>E=,-VmOY͞y)`\Axڥ=hA߼ޙ穸X'^l,,40E nMXllDDQN0vZ (]d1yowL6uQ 13iYH*fhJl(YmUT! bX ^8SGGH dR$'gCsXżK|m^(_@xzŢLt [̻'w=,I;I]HK7yOGTKL^긚І/ᛘqy}cvǙwv7I<ÞL9z,}Q380OLJVyNZ}#7"yp@\Er׾]q=9.t:gKg Ǫn><^K}Nuz%g]8t_,?%Yɳy }ؘW{$Z~ڞsЇml}u 60lؘa=yȖ+>1i5{z(:T&[F($w1e%fFo Ddp6&P ^ S AR? "$2d#q UzDV@v`!8#q UzDV. ,@FZ xڥMLAV@(|`?Dz2^%!ɳbcl<#! 1pGc/Ơ&P$Zw>:bh{.b*PS47lV$|:O; nFKQ TC'CYgt9z_Cgș{8`B/%e.JoNgb%GG+6p~b9vse"<Öd`Ψ)"yέGsF??(V3;ƍLw D^81zwa?0gTCǥi&Cr9$~H^19e[MR~#U g=W3;jWHjn\䍟c>"MS~ d c2zO8Wߏ7/>1G,o>6Aoۅ8 ̳s?pP?آkv͏(vZ h]7^c8X曊 S=ǣh\K-9P*Tox* +҆n;5Hr⫘u HkDd(P ] S AQ? "%2G|'K/#v`!|'K/ @0J xڥ]HQ3knf~LfIaB>EEIBD!D$1IEЃKPHD/K`}Pef;stKCevoι{(!ˇAiGX0fe^ >c"b|hRg@(jn;њnGhӱ'Ho&?ܒN:z .w}a@YgC߮# 9@9O9xdl Wgga=q M4laȦwrvhJn q`Ov}m_uI!lB_ SgޯiΙc )ѻ@=onշv%:K)cx䓵^N)7k((0Y)x z*xp(K ƿR7xKaϋaGPrq1*/za>Oޯw=fG2.yZ[ysomW( y׮AD;>2g2nYוsgQ]r*+%`ϹjɯAL /y?(FOq0RVjVj#e5оJÙcUn1:iTS2k: )|.9(mµ]~ SGړh/YrDa Dd P [ S AO? "&2fڞ}/ q Bv`!:ڞ}/ q " @#`6xڥkAl&m7&5/Jz񠅂?DIzˡCliB j "T)ObUDPk/Z"/ЌW8)v~ȿM<=lU~mUJ+gVzh8: ;_pPYyHIY6/|hg0AβS)\s;ޤh 1wzo]c/X?c{a c[W:VS73~)ߊcz0>&+om q<]mП,8˓*k+8ΧQ/!qη"8 >U#j߈'t<+0%lþFik-#c(}*65ȝ%A$ٰ͙"dkf~arO@>(Zdԑ-G^Ť~Bڼ19 Ŝ@m}se2=-\.zs%$7bNꞲ>諣3U\6D)O c{Ѡtk~[}uvQ=UKMu $&!!srUo~dafMykVwݐ&;!ۮ?zُWys$p,Tnb2 r{`R9oe.*=4=[i?b:r=p8PG炶ds. Z[ϲ99BdWr{aykϗcɯ-xޘ9baÔJ>}O}ni?ypv< Y>%3~cݶŤͨ_;dwFDzqIs/d\v_&eI{AGr`7Sv|8mǟ1;e_ϽĞ{d0T|3 1B*a῀Ape+$37X/\!(?71~Gk|GfS48SOܫ9)9]'| Q-]&qg[NҜ^Ӕ!{r&M:ӌ~;xWF}W(޲_zsDZi#:iXC鱫<?o0_0s '{]Ϫ0U*wQί.CaD$5S7Tq6ͯj!; v%wjCDҹ u.^G̿Dd 0J W C AK? "+25Y~zUT*v`!5Y~zUT8\ kxcdd``vcb``baV d,FYzP1n:"B@?b u2@0&dT20|_&,eBܤr@8?cЯb@Vg$u mq&Hi%<n#7Y,܆;!2B]z%/b$'$37X/\!(?71X??O@30|MVÿӿ]%JB0t^@8 Title$<a$ 5CJKH.O. TRnumber$a$CJ&L@& Date$a$CJ2J"2 Subtitle $<a$CJ*O2* Author$a$CJ(B( Addressd>P@R> Body Text 2$7$8$H$a$CJ O Style1 1r Style2:: References$ & FPa$&@& TOC 1CJaJ.U@. Hyperlink >*B*ph.@. TOC 2 ^CJaJ.@. TOC 3 ^CJaJ8B@8 Body TextB*CJaJph8&@8 Footnote ReferenceH*.@. Footnote Text&)@& Page Number4 @4 Footer ! !CJaJ4@"4 Header " !CJaJ>Q@2> Body Text 3#$d xa$CJ'&R,vB!7PiL)Ge|2i . | I \uT>M\lw3fP+G_& $'+/37VZaemqx|!)2<KT\|v     7PiL)Ge|2i . | I \uT>M\lw3fP+G_& $'+/37VZaemqx|!)2<KT\|      !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abclmnvYUVhs   lv0M dq - ; P + ^+SXR^_N=\Hjlmn{/ #%%*.-//134C8;>eAE F.FH]K^K`KaKbKcKdKeKfKgKhKiKjKkKlKmKnKoKpKqKKPTTUUV-VVWW(X@XnXXXX [1[]]]]]]]`aaaaaaaaaahdeeg1hhijmoorlux*zRz} ؅P4ًÏߐZ ڝ"?O% gL.ȴ~Ϻ ,pCp iJHFO\n R]u}z"Q"R"S"""# '?'*=*>*?*z*{*|**++/(/0111 1I1w446688!9"9*9<<<==>>e??V@@pABfBCCCYDDDHEEEFF GmGHQHHIIJJKuKKPNaN-O6OkOOO"P:P[PsPPPPQQ%U.UBUZUUUUV'V?V`VxVVVVYZeZZZZ]]1`C`abb,f-fEfGfHfgg'hdhehhhhhhhipiiiAjjjjjjjjkk_kkkl0l2l3lm4mfmxmop?@BCEGIJLMNPQSTUWXZ\]_cdgiklnpqsuwz  {cO(\e O1MCR^ht}ܔ̚UDZֵY7mF:"%').16:=ADFHKORVY[^`abefhjmortvxy{v+-.0P,HJKMm *C_abd<g O k n o q ' * + - M _ { ~    5 8 9 ; [ m  . J M N P p % ( ) + K r <X[\^~ %()+K]y|}1MPQSs#6RUVXx0LOPRr<X[\^~=Y\]_7f,HKLNnz7:;=]%:VYZ\|5_{~&BEFHhzHdghj333UUUV(V*VV W W(XXXXXԅօpsˬ߬35Ymo{$&g{}"1"3"4"M"O"U"n"p"q"""*** *9*;*A*Z*\*]*v*x*000111888888899999BBCHHIkOOOOOOOOO"P6P8P[PoPqPPPPBUVUXUUUUUUUUVV'V;V=V`VtVvVVVV@ZTZVZZZZe]y]{]aaa-fAfCfgggh"h$hKh_hahhhhiiiiii(jjjjjGk[k]kkkkkkkl,l.lxxxE{Y{[{kՂvà  "ThjFZ\r13h|~g{}Mac{Ocev X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%X%̕::::::::::::::::::::::::::::::::::::::XX:::::::::::::::::::::::::::::::::::::::::::::::: !!7KMPdfi}L`b)=?G[]ey{|2FH i }  . B D | I ] _ !+-\pruThj::::::::::::::::::::::::::::::::::::::::::::8@ 10(  H  # B BH  # Y YT  C E E N    4  B2   tT$N2  3 DTL`  C L L`  C K K`  C J J`  C I I`  C H 0` H`  C G  G`  C F(ph F.b %)81  #"  T  # A%). A`  C @ /81 @d N   "  ZB B S DZ  3 d _! " d.` @  # ! '!Z   H2  # f`  C c@ cT  # b`  bZB  S D@Z  _   _H2  #  _T`  C a  aT  C @ `  C `H5 ` ` @  #  `  C _X4 _T  # ^ ` ^T  C @ YZ  `    ` H2  #  S `  C ] `8  ]ZB  S D _b`   #  Bh>4b   `  C \D \42  Dh0 s`  C [K [B2   7+`  C Z H X Z` X8 # pzZB B S DXZB  S D(8` 8ZB  S D T  C M Mbb 5@   #" B   10P@ B2   H2  #  *`  C X X`  C W5 W`  C V* V,Z (*@ R  (*@ RZ  3 U( h R U`  C TH *@  T2Z @  R  @  R`  C S@ 8 R S`  C R   R,Z r  rZ  3 Qr( Q`  C P] P2Z H  H`  C OH O`  C Nv NN  3 D DN  3 C CT  C 0 0T  C / /T  C . .N  3 F N  3 >  >N  3 <  <N  3 ?  ?T  C : :T  C 8 8H  # 7 7T  C 9 9N  3 =  =T  C ; ;H  # 6 6N  3 2 2N  3 3 3N  3 4 4N  3 5 5T  C - -T  C * *T  C , ,T  C + +T  C (! (T  C '" 'T  C &# &T  C %$ %T  C $% $T  C #& #T  C )  )T  C "' "T  C !( !T  C  )  T  C * N  3 + N  3 / T  C 0 N  3 1 T  C 2 T  C 3 T  C 4 T  C 5 T  C 6 T  C 7 T  C 8 N  3 : T  C ; T  C . N  3  <  N  3  =  T  C  >  N  3  @  N  3  ?  N  3 A T  C B N  3 C T   C  D N   3 E N   3 1 1T   C  , T   C  - T  C 9 B S  ?^K]aaߐ""S"T"**?*@*008888Efhj0low)y-y}~~426$(Iˆy}܌}v}֘ ȚvH jtH[&tH FQt  ^t/ttt)ts'ktH*&th`%`t@a " tJ ( th $ t  t H$zt pzt $Dt pdt  %tH tH $t t* "t* t  rSTm5 tE tQp tp)TTpSt(*8t(@& tpE) tW(Bt(% t(-(t((yt(@(t(:(t(G`'^t(" tp%'<tp(t( t plZ"2 t pP ;tpYZ% tp(>tA=) t' t(x'P tp8(pt((g t(X)tp @tpa)\tP)t t) t(tpa)t(it(0(ttp) t((g tp) tp>)tpE)h th t p t (8(et(t _Toc511732114 _Hlt511807255 _Hlt511807256 _Toc511726305 _Toc511726306 _Toc511732115 _Toc511726307 _Toc511732116 _Toc511726308 _Toc511732117 _Toc511726309 _Toc511732118 _Toc511726310 _Toc511732119 _Toc511726311 _Toc511732120 _Toc511726312 _Toc511732121 _Toc511726313 _Toc511732122 _Toc511726314 _Toc511732123 _Toc511726316 _Toc511732124 _Toc511726317 _Toc511732125 _Toc511726318 _Toc511732126 _Toc511726319 _Toc511732127 _Toc511726320 _Toc511732128 _Toc511726321 _Toc511732129 _Toc511726322 _Toc511732130 _Toc511726323 _Toc511732131 _Toc511726324 _Toc511732132 _Toc511726325 _Toc511732133 _Toc511726326 _Toc511732134 _Toc511726327 _Toc511732135 _Toc511726328 _Toc511732136 _Toc511726329 _Toc511732137 _Toc511726330 _Toc511732138 _Toc511726331 _Toc511732139 _Toc511726332 _Toc511732140 _Toc511726333 _Toc511732141 _Toc511726334 _Toc511732142 _Toc511726335 _Toc511732143 _Toc511726336 _Toc511732144 _Toc511726337 _Toc511732145 _Toc511726338 _Toc511732146 _Toc511726339 _Toc511732147 _Toc511726340 _Toc511732148 _Toc511726341 _Toc511732149 _Toc511726342 _Toc511732150 _Toc511726343 _Toc511732151 _Toc511726344 _Toc511732152 _Toc511726345 _Toc511732153 _Toc511726346 _Toc511732154 _Toc511726347 _Toc511732155 _Toc511726348 _Toc511732156 _Toc511726349 _Toc511732157 _Toc511726350 _Toc511732158 _Toc511726351 _Toc511732159 _Toc511726352 _Toc511732160 _Toc511726353 _Toc511732161 _Toc511726354 _Toc511727737 _Toc511727825 _Toc511728868 _Toc511732033 _Toc511732162 _Toc511726355 _Toc511732163 _Toc511726356 _Toc511732164 _Toc511726357 _Toc511732165 _Hlt499270420 _Hlt499270421 _Toc511726358 _Toc511732166 _Toc511726359 _Toc511732167 _Toc511726360 _Toc511732168 _Toc511726361 _Toc511732169 _Toc511726362 _Toc511732170 _Toc511726363 _Toc511732171 _Toc511726364 _Toc511732172 _Toc511732173 _Toc511732174 _Toc511732175 _Toc511726365 _Toc511732176MMknn//EE F FqKqKTT [ []]aaee*z*z""??ppCCOORRuu}}zz ' '|*|*++// 1 1w4w466888888!9"9<<<<HHvKvKPNPNQQ]]1`1`bbmmfm\̛̛w@@  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmn@o@pqrstuvwxyz{|}~loz//FF-F-FKKTT0[0[]]aaegQzQzٝٝ>>MM--oo II[[[[>'>'**++&/&/G1G14466999999)9)9<<==IIKK`N`NQQ]]B`B`bb3m3mvml/w.,KCbgO o + _   9 m . N ) r <\ )]}1Q6V0P<\=]f,Lz;:Z_&FzHh34UUV+VV W(X?XXXׅpsˬ6Yp{'g~"P"U""* *A*y*1188899 9aa-fDfKhbhhhjjl/lE{\{kvărg~Ofi}~+BС,Lcby˨18OVmt,.EG^`wy:QSjl8O kI`tw.,KCbgO o + _   9 m . N ) r <\ )]}1Q6V0P<\=]f,Lz;:Z_&FzHh34UUV+VV W(X?XXXׅpsˬ6Yp{'g~"P"U""* *A*y*1188899 9aa-fDfKhbhhhjjl/lE{\{kvărg~OfgiSU}~+BС,Lcby˨18OVmt,.EG^`wy:QSjl8O kI`˴ Y^gi¸ºĺƺȺʺ̺κtw3333333333333.,KCbgO o + _   9 m . N ) r <\ )]}1Q6V0P<\=]f,Lz;:Z_&FzHh34UUV+VV W(X?XXXׅpsˬ6Yp{'g~"P"U""* *A*y*1188899 9aa-fDfKhbhhhjjl/lE{\{kvărg~Ofhik|~+BΡΡСXcgikmpjjl£٣~,.EJJLcϥby{8X[[\^˨ 18OVmtªĪǪު,.EG^`wyʭ٭ۭܭޭ߭88:QSjl/668O ZdffghkѰҰհ@-..I`ѲӲԲ77^^ɳɳ44rrOT[_/tw Gautam DasiC:\Documents and Settings\gautamd\Application Data\Microsoft\Word\AutoRecovery save of techrep-final5.asd Gautam DasiC:\Documents and Settings\gautamd\Application Data\Microsoft\Word\AutoRecovery save of techrep-final5.asd Gautam Das$C:\papers\samples\techrep-final6.doc Gautam Das$C:\papers\samples\techrep-final6.doc Gautam DasiC:\Documents and Settings\gautamd\Application Data\Microsoft\Word\AutoRecovery save of techrep-final6.asd Gautam Das$C:\papers\samples\techrep-final6.doc Gautam Das$C:\papers\samples\techrep-final6.doc Gautam Das$C:\papers\samples\techrep-final6.doc Gautam DasbC:\Documents and Settings\gautamd\Application Data\Microsoft\Word\AutoRecovery save of techrep.asd Gautam DasC:\papers\samples\techrep.doc%q w [(K\\g Bh_ x 4A+|#k ?\o% 89)'8f-Ye.Ry6 [7Y ?~B#@dЬxX2L}Lʝ@ M_#Mj@X:Uy$] VOa~/[a<6j?zkbT*el:<fewl@forf_sh !jot}Ydo2$.r;M7fxMm{ |z8ahh^h`.P^`P..^`...x^`x.... ^` .....  X ^ `X ......   ^ `....... 8^`8........ `^``.........^`o(.^`.pLp^p`L.@ @ ^@ `.^`.L^`L.^`.^`.PLP^P`L.hh^h`o(()88^8`o(()L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L.hh^h`o(()88^8`.L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L.P^`P56CJ OJGautam Daseautaut msrtr.dote Gautam Dase70tMicrosoft Word 9.0@Z7W@lb@i.@2f;U  FMicrosoft Word Document MSWordDocWord.Document.89q   microsoft2. DocumentSummaryInformation8 CompObjjQJo(.@@^@`56CJOJQJo(..0^`056CJOJQJo(...``^``56CJOJQJo(.... ^`o( ..... ^`o( ...... p^`o(....... `p^``o(....... 00^0`o(........z^`zo(()88^8`o()L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L.hh^h`OJQJ^Jo(.0^`0o(.0^`0o(..88^8`o(... `^``o( .... `^``o( ..... ^`o( ...... ^`o(....... pp^p`o(........0^`0o(()^`.                        ! " # $ % & ' ( ) *  L ^ `L.  ^ `.xx^x`.HLH^H`L.^`.^`.L^`L.P^`Po(@@^@`o(.0^`0o(..``^``o(... ^`o( .... ^`o( ..... ^`o( ...... `^``o(....... 00^0`o(........hh^h`o()hh^h`o(.88^8`.L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L.88^8`o()^`. L ^ `L.  ^ `.xx^x`.HLH^H`L.՜.+,D՜.+,X px   microsoft2. "Type in the Title" Title4 8@ _PID_HLINKSAz|4http://www.tpc.org/{$#http://research.microsoft.com/dmx/6j_Toc5117321766d_Toc5117321756^_Toc5117321746X_Toc5117321736R_Toc5117321726L_Toc5117321716F_Toc5117321706@_Toc5117321696:_Toc51173216864_Toc5117321676._Toc5117321666(_Toc5117321656"_Toc5117321646_Toc5117321636_Toc5117321616_Toc5117321606 _Toc5117321596_Toc5117321586_Toc5117321576_Toc5117321566_Toc5117321556_Toc5117321546_Toc5117321536_Toc5117321526_Toc5117321516_Toc5117321506_Toc5117321496_Toc5117321486_Toc5117321476_Toc5117321466_Toc5117321456_Toc5117321446_Toc5117321436_Toc5117321426_Toc5117321416_Toc5117321406_Toc5117321396_Toc5117321386_Toc5117321376_Toc5117321366z_Toc5117321356t_Toc5117321346n_Toc5117321336h_Toc5117321326b_Toc5117321316\_Toc5117321306V_Toc5117321296P_Toc5117321286J_Toc5117321276D_Toc5117321266>_Toc51173212568_Toc51173212462_Toc5117321236,_Toc5117321226&_Toc5117321216 _Toc5117321206_Toc5117321196_Toc5117321186_Toc5117321176_Toc5117321166_Toc511732115^`.^`.L^`L. hh^h`OJQJo(nhh^h`o()h ^`OJQJo(h ^`OJQJo(oh pp^p`OJQJo(h @ @ ^@ `OJQJo(h ^`OJQJo(oh ^`OJQJo(h ^`OJQJo(h ^`OJQJo(oh PP^P`OJQJo(hh^h`o(.88^8`o(.L^`L.  ^ `.  ^ `.,,^,`o()HH^H`.^`.L^`L.hh^h`o(.88^8`.L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L.hh^h`o(.88^8`.L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L. hh^h`OJQJo(nhh^h`o(()88^8`.L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L.^`o()^`.pLp^p`L.@ @ ^@ `.^`.L^`L.^`.^`.PLP^P`L.^`o(()^`.pLp^p`L.@ @ ^@ `.^`.L^`L.^`.^`.PLP^P`L.hh^h`o()^`o(.^`.pLp^p`L.@ @ ^@ `.^`.L^`L.^`.^`.PLP^P`L.hh^h`o(()88^8`.L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L.^`o(()^`.pLp^p`L.@ @ ^@ `.^`.L^`L.^`.^`.PLP^P`L.hh^h`o(()88^8`.L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L.^`o()hh^h`o(.88^8`.L^`L.  ^ `.  ^ `.xLx^x`L.HH^H`.^`.L^`L.^`o(.^`.pLp^p`L.@ @ ^@ `.^`.L^`L.^`.^`.PLP^P`L.hh^h`o()hh^h`CJOJQJo([]^`o(.^`.pLp^p`L.@ @ ^@ `.^`.L^`L.^`.^`.PLP^P`L. hh^h`OJQJo(n^`o(()^`.pLp^p`L.@ @ ^@ `.^`.L^`L.^`.^`.PLP^P`L.hh^h`o()hh^h`56CJOJQJo(.^`.pLp^p`L.@ @ ^@ `.^`.L^`L.^`.^`.PLP^P`L.(q|#g /[a(K*ezkbX:U7fx ?$.r|Mm{|#o?\o%y6_shy$]fee.}L Bh_[7_#Mrf#@@ M!joX2L+8f-wVOa89)l@f|#oYdo|#o %%         &z                B                                  d                                                                       hi}+Сpl£.Lb{˨8VtǪ.G`y:Sl8k!0Iײ9`̳6t'Qyĵ5ĺȺ̺к #'*»˻ջ*;tw@ll`o llhi  cdwxӀӁӂӃӅ444444ʛʜv0@0l0@000@000$@000@008@00x@00@00@00<@0v0@00@00000@@00L@00T@00\@00l@00|@UnknownGz Times New Roman5Symbol3& z Arial;Wingdings?5 z Courier New"hZTfkTkTFo;U!0da2q:C:\Program Files\Microsoft Office\Templates\1033\msrtr.dot"Type in the Title" Gautam Das Gautam DasRoot Entrys F qi?%Data }H-WordDocumentrObjectPoolufܶf                   ! " # $ % & ' ( ) * 4 5 6 7 8 9 : ; < = . x#$511732115$yI{MSR-TR-2001-37 final versiongautamd@microsoft.com Gautam DasDocumentSummaryInformation83 CompObjj                        ! > " # $ & % ' ( * ) + , - . 0 / 1 3 2 5 4 7 6 8 9 : ; < = ? @ B A / D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { ~   C   ՜.+,D՜.+,X px   microsoft2. "Type in the Title" Title\@< _PID_HLINKS_AdHocReviewCycleID_EmailSubject _AuthorEmail_AuthorEmailDisplayNameAz|4http://www.tpc.org/{$#http://research.microsoft.com/dmx/6j_Toc5117321766d_Toc5117321756^_Toc5117321746X_Toc5117321736R_Toc5117321726L_Toc5117321716F_Toc5117321706@_Toc5117321696:_Toc51173216864_Toc5117321676._Toc5117321666(_Toc5117321656"_Toc5117321646_Toc5117321636_Toc5117321616_Toc5117321606 _Toc5117321596_Toc5117321586_Toc5117321576_Toc5117321566_Toc5117321556_Toc5117321546_Toc5117321536_Toc5117321526_Toc5117321516_Toc5117321506_Toc5117321496_Toc5117321486_Toc5117321476_Toc5117321466_Toc5117321456_Toc5117321446_Toc5117321436_Toc5117321426_Toc5117321416_Toc5117321406_Toc5117321396_Toc5117321386_Toc5117321376_Toc5117321366z_Toc5117321356t_Toc5117321346n_Toc5117321336h_Toc5117321326b_Toc5117321316\_Toc5117321306V_Toc5117321296P_Toc5117321286J_Toc5117321276D_Toc5117321266>_Toc51173212568_Toc51173212462_Toc5117321236,_Toc5117321226&_Toc5117321216 _Toc5117321206_Toc5117321196_Toc5117321186_Toc5117321176_Toc5117321166_Toc