begriffs open source - ai-pg/blob - full-docs/txt/sql-createaggregate.txt

   1
   2 CREATE AGGREGATE
   3
   4    CREATE AGGREGATE — define a new aggregate function
   5
   6 Synopsis
   7
   8 CREATE [ OR REPLACE ] AGGREGATE name ( [ argmode ] [ argname ] arg_data_type [ ,
   9  ... ] ) (
  10     SFUNC = sfunc,
  11     STYPE = state_data_type
  12     [ , SSPACE = state_data_size ]
  13     [ , FINALFUNC = ffunc ]
  14     [ , FINALFUNC_EXTRA ]
  15     [ , FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
  16     [ , COMBINEFUNC = combinefunc ]
  17     [ , SERIALFUNC = serialfunc ]
  18     [ , DESERIALFUNC = deserialfunc ]
  19     [ , INITCOND = initial_condition ]
  20     [ , MSFUNC = msfunc ]
  21     [ , MINVFUNC = minvfunc ]
  22     [ , MSTYPE = mstate_data_type ]
  23     [ , MSSPACE = mstate_data_size ]
  24     [ , MFINALFUNC = mffunc ]
  25     [ , MFINALFUNC_EXTRA ]
  26     [ , MFINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
  27     [ , MINITCOND = minitial_condition ]
  28     [ , SORTOP = sort_operator ]
  29     [ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ]
  30 )
  31
  32 CREATE [ OR REPLACE ] AGGREGATE name ( [ [ argmode ] [ argname ] arg_data_type [
  33  , ... ] ]
  34                         ORDER BY [ argmode ] [ argname ] arg_data_type [ , ... ]
  35  ) (
  36     SFUNC = sfunc,
  37     STYPE = state_data_type
  38     [ , SSPACE = state_data_size ]
  39     [ , FINALFUNC = ffunc ]
  40     [ , FINALFUNC_EXTRA ]
  41     [ , FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
  42     [ , INITCOND = initial_condition ]
  43     [ , PARALLEL = { SAFE | RESTRICTED | UNSAFE } ]
  44     [ , HYPOTHETICAL ]
  45 )
  46
  47 or the old syntax
  48
  49 CREATE [ OR REPLACE ] AGGREGATE name (
  50     BASETYPE = base_type,
  51     SFUNC = sfunc,
  52     STYPE = state_data_type
  53     [ , SSPACE = state_data_size ]
  54     [ , FINALFUNC = ffunc ]
  55     [ , FINALFUNC_EXTRA ]
  56     [ , FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
  57     [ , COMBINEFUNC = combinefunc ]
  58     [ , SERIALFUNC = serialfunc ]
  59     [ , DESERIALFUNC = deserialfunc ]
  60     [ , INITCOND = initial_condition ]
  61     [ , MSFUNC = msfunc ]
  62     [ , MINVFUNC = minvfunc ]
  63     [ , MSTYPE = mstate_data_type ]
  64     [ , MSSPACE = mstate_data_size ]
  65     [ , MFINALFUNC = mffunc ]
  66     [ , MFINALFUNC_EXTRA ]
  67     [ , MFINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE } ]
  68     [ , MINITCOND = minitial_condition ]
  69     [ , SORTOP = sort_operator ]
  70 )
  71
  72 Description
  73
  74    CREATE AGGREGATE defines a new aggregate function. CREATE OR REPLACE
  75    AGGREGATE will either define a new aggregate function or replace an
  76    existing definition. Some basic and commonly-used aggregate functions
  77    are included with the distribution; they are documented in
  78    Section 9.21. If one defines new types or needs an aggregate function
  79    not already provided, then CREATE AGGREGATE can be used to provide the
  80    desired features.
  81
  82    When replacing an existing definition, the argument types, result type,
  83    and number of direct arguments may not be changed. Also, the new
  84    definition must be of the same kind (ordinary aggregate, ordered-set
  85    aggregate, or hypothetical-set aggregate) as the old one.
  86
  87    If a schema name is given (for example, CREATE AGGREGATE myschema.myagg
  88    ...) then the aggregate function is created in the specified schema.
  89    Otherwise it is created in the current schema.
  90
  91    An aggregate function is identified by its name and input data type(s).
  92    Two aggregates in the same schema can have the same name if they
  93    operate on different input types. The name and input data type(s) of an
  94    aggregate must also be distinct from the name and input data type(s) of
  95    every ordinary function in the same schema. This behavior is identical
  96    to overloading of ordinary function names (see CREATE FUNCTION).
  97
  98    A simple aggregate function is made from one or two ordinary functions:
  99    a state transition function sfunc, and an optional final calculation
 100    function ffunc. These are used as follows:
 101 sfunc( internal-state, next-data-values ) ---> next-internal-state
 102 ffunc( internal-state ) ---> aggregate-value
 103
 104    PostgreSQL creates a temporary variable of data type stype to hold the
 105    current internal state of the aggregate. At each input row, the
 106    aggregate argument value(s) are calculated and the state transition
 107    function is invoked with the current state value and the new argument
 108    value(s) to calculate a new internal state value. After all the rows
 109    have been processed, the final function is invoked once to calculate
 110    the aggregate's return value. If there is no final function then the
 111    ending state value is returned as-is.
 112
 113    An aggregate function can provide an initial condition, that is, an
 114    initial value for the internal state value. This is specified and
 115    stored in the database as a value of type text, but it must be a valid
 116    external representation of a constant of the state value data type. If
 117    it is not supplied then the state value starts out null.
 118
 119    If the state transition function is declared “strict”, then it cannot
 120    be called with null inputs. With such a transition function, aggregate
 121    execution behaves as follows. Rows with any null input values are
 122    ignored (the function is not called and the previous state value is
 123    retained). If the initial state value is null, then at the first row
 124    with all-nonnull input values, the first argument value replaces the
 125    state value, and the transition function is invoked at each subsequent
 126    row with all-nonnull input values. This is handy for implementing
 127    aggregates like max. Note that this behavior is only available when
 128    state_data_type is the same as the first arg_data_type. When these
 129    types are different, you must supply a nonnull initial condition or use
 130    a nonstrict transition function.
 131
 132    If the state transition function is not strict, then it will be called
 133    unconditionally at each input row, and must deal with null inputs and
 134    null state values for itself. This allows the aggregate author to have
 135    full control over the aggregate's handling of null values.
 136
 137    If the final function is declared “strict”, then it will not be called
 138    when the ending state value is null; instead a null result will be
 139    returned automatically. (Of course this is just the normal behavior of
 140    strict functions.) In any case the final function has the option of
 141    returning a null value. For example, the final function for avg returns
 142    null when it sees there were zero input rows.
 143
 144    Sometimes it is useful to declare the final function as taking not just
 145    the state value, but extra parameters corresponding to the aggregate's
 146    input values. The main reason for doing this is if the final function
 147    is polymorphic and the state value's data type would be inadequate to
 148    pin down the result type. These extra parameters are always passed as
 149    NULL (and so the final function must not be strict when the
 150    FINALFUNC_EXTRA option is used), but nonetheless they are valid
 151    parameters. The final function could for example make use of
 152    get_fn_expr_argtype to identify the actual argument type in the current
 153    call.
 154
 155    An aggregate can optionally support moving-aggregate mode, as described
 156    in Section 36.12.1. This requires specifying the MSFUNC, MINVFUNC, and
 157    MSTYPE parameters, and optionally the MSSPACE, MFINALFUNC,
 158    MFINALFUNC_EXTRA, MFINALFUNC_MODIFY, and MINITCOND parameters. Except
 159    for MINVFUNC, these parameters work like the corresponding
 160    simple-aggregate parameters without M; they define a separate
 161    implementation of the aggregate that includes an inverse transition
 162    function.
 163
 164    The syntax with ORDER BY in the parameter list creates a special type
 165    of aggregate called an ordered-set aggregate; or if HYPOTHETICAL is
 166    specified, then a hypothetical-set aggregate is created. These
 167    aggregates operate over groups of sorted values in order-dependent
 168    ways, so that specification of an input sort order is an essential part
 169    of a call. Also, they can have direct arguments, which are arguments
 170    that are evaluated only once per aggregation rather than once per input
 171    row. Hypothetical-set aggregates are a subclass of ordered-set
 172    aggregates in which some of the direct arguments are required to match,
 173    in number and data types, the aggregated argument columns. This allows
 174    the values of those direct arguments to be added to the collection of
 175    aggregate-input rows as an additional “hypothetical” row.
 176
 177    An aggregate can optionally support partial aggregation, as described
 178    in Section 36.12.4. This requires specifying the COMBINEFUNC parameter.
 179    If the state_data_type is internal, it's usually also appropriate to
 180    provide the SERIALFUNC and DESERIALFUNC parameters so that parallel
 181    aggregation is possible. Note that the aggregate must also be marked
 182    PARALLEL SAFE to enable parallel aggregation.
 183
 184    Aggregates that behave like MIN or MAX can sometimes be optimized by
 185    looking into an index instead of scanning every input row. If this
 186    aggregate can be so optimized, indicate it by specifying a sort
 187    operator. The basic requirement is that the aggregate must yield the
 188    first element in the sort ordering induced by the operator; in other
 189    words:
 190 SELECT agg(col) FROM tab;
 191
 192    must be equivalent to:
 193 SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
 194
 195    Further assumptions are that the aggregate ignores null inputs, and
 196    that it delivers a null result if and only if there were no non-null
 197    inputs. Ordinarily, a data type's < operator is the proper sort
 198    operator for MIN, and > is the proper sort operator for MAX. Note that
 199    the optimization will never actually take effect unless the specified
 200    operator is the “less than” or “greater than” strategy member of a
 201    B-tree index operator class.
 202
 203    To be able to create an aggregate function, you must have USAGE
 204    privilege on the argument types, the state type(s), and the return
 205    type, as well as EXECUTE privilege on the supporting functions.
 206
 207 Parameters
 208
 209    name
 210           The name (optionally schema-qualified) of the aggregate function
 211           to create.
 212
 213    argmode
 214           The mode of an argument: IN or VARIADIC. (Aggregate functions do
 215           not support OUT arguments.) If omitted, the default is IN. Only
 216           the last argument can be marked VARIADIC.
 217
 218    argname
 219           The name of an argument. This is currently only useful for
 220           documentation purposes. If omitted, the argument has no name.
 221
 222    arg_data_type
 223           An input data type on which this aggregate function operates. To
 224           create a zero-argument aggregate function, write * in place of
 225           the list of argument specifications. (An example of such an
 226           aggregate is count(*).)
 227
 228    base_type
 229           In the old syntax for CREATE AGGREGATE, the input data type is
 230           specified by a basetype parameter rather than being written next
 231           to the aggregate name. Note that this syntax allows only one
 232           input parameter. To define a zero-argument aggregate function
 233           with this syntax, specify the basetype as "ANY" (not *).
 234           Ordered-set aggregates cannot be defined with the old syntax.
 235
 236    sfunc
 237           The name of the state transition function to be called for each
 238           input row. For a normal N-argument aggregate function, the sfunc
 239           must take N+1 arguments, the first being of type state_data_type
 240           and the rest matching the declared input data type(s) of the
 241           aggregate. The function must return a value of type
 242           state_data_type. This function takes the current state value and
 243           the current input data value(s), and returns the next state
 244           value.
 245
 246           For ordered-set (including hypothetical-set) aggregates, the
 247           state transition function receives only the current state value
 248           and the aggregated arguments, not the direct arguments.
 249           Otherwise it is the same.
 250
 251    state_data_type
 252           The data type for the aggregate's state value.
 253
 254    state_data_size
 255           The approximate average size (in bytes) of the aggregate's state
 256           value. If this parameter is omitted or is zero, a default
 257           estimate is used based on the state_data_type. The planner uses
 258           this value to estimate the memory required for a grouped
 259           aggregate query.
 260
 261    ffunc
 262           The name of the final function called to compute the aggregate's
 263           result after all input rows have been traversed. For a normal
 264           aggregate, this function must take a single argument of type
 265           state_data_type. The return data type of the aggregate is
 266           defined as the return type of this function. If ffunc is not
 267           specified, then the ending state value is used as the
 268           aggregate's result, and the return type is state_data_type.
 269
 270           For ordered-set (including hypothetical-set) aggregates, the
 271           final function receives not only the final state value, but also
 272           the values of all the direct arguments.
 273
 274           If FINALFUNC_EXTRA is specified, then in addition to the final
 275           state value and any direct arguments, the final function
 276           receives extra NULL values corresponding to the aggregate's
 277           regular (aggregated) arguments. This is mainly useful to allow
 278           correct resolution of the aggregate result type when a
 279           polymorphic aggregate is being defined.
 280
 281    FINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE }
 282           This option specifies whether the final function is a pure
 283           function that does not modify its arguments. READ_ONLY indicates
 284           it does not; the other two values indicate that it may change
 285           the transition state value. See Notes below for more detail. The
 286           default is READ_ONLY, except for ordered-set aggregates, for
 287           which the default is READ_WRITE.
 288
 289    combinefunc
 290           The combinefunc function may optionally be specified to allow
 291           the aggregate function to support partial aggregation. If
 292           provided, the combinefunc must combine two state_data_type
 293           values, each containing the result of aggregation over some
 294           subset of the input values, to produce a new state_data_type
 295           that represents the result of aggregating over both sets of
 296           inputs. This function can be thought of as an sfunc, where
 297           instead of acting upon an individual input row and adding it to
 298           the running aggregate state, it adds another aggregate state to
 299           the running state.
 300
 301           The combinefunc must be declared as taking two arguments of the
 302           state_data_type and returning a value of the state_data_type.
 303           Optionally this function may be “strict”. In this case the
 304           function will not be called when either of the input states are
 305           null; the other state will be taken as the correct result.
 306
 307           For aggregate functions whose state_data_type is internal, the
 308           combinefunc must not be strict. In this case the combinefunc
 309           must ensure that null states are handled correctly and that the
 310           state being returned is properly stored in the aggregate memory
 311           context.
 312
 313    serialfunc
 314           An aggregate function whose state_data_type is internal can
 315           participate in parallel aggregation only if it has a serialfunc
 316           function, which must serialize the aggregate state into a bytea
 317           value for transmission to another process. This function must
 318           take a single argument of type internal and return type bytea. A
 319           corresponding deserialfunc is also required.
 320
 321    deserialfunc
 322           Deserialize a previously serialized aggregate state back into
 323           state_data_type. This function must take two arguments of types
 324           bytea and internal, and produce a result of type internal.
 325           (Note: the second, internal argument is unused, but is required
 326           for type safety reasons.)
 327
 328    initial_condition
 329           The initial setting for the state value. This must be a string
 330           constant in the form accepted for the data type state_data_type.
 331           If not specified, the state value starts out null.
 332
 333    msfunc
 334           The name of the forward state transition function to be called
 335           for each input row in moving-aggregate mode. This is exactly
 336           like the regular transition function, except that its first
 337           argument and result are of type mstate_data_type, which might be
 338           different from state_data_type.
 339
 340    minvfunc
 341           The name of the inverse state transition function to be used in
 342           moving-aggregate mode. This function has the same argument and
 343           result types as msfunc, but it is used to remove a value from
 344           the current aggregate state, rather than add a value to it. The
 345           inverse transition function must have the same strictness
 346           attribute as the forward state transition function.
 347
 348    mstate_data_type
 349           The data type for the aggregate's state value, when using
 350           moving-aggregate mode.
 351
 352    mstate_data_size
 353           The approximate average size (in bytes) of the aggregate's state
 354           value, when using moving-aggregate mode. This works the same as
 355           state_data_size.
 356
 357    mffunc
 358           The name of the final function called to compute the aggregate's
 359           result after all input rows have been traversed, when using
 360           moving-aggregate mode. This works the same as ffunc, except that
 361           its first argument's type is mstate_data_type and extra dummy
 362           arguments are specified by writing MFINALFUNC_EXTRA. The
 363           aggregate result type determined by mffunc or mstate_data_type
 364           must match that determined by the aggregate's regular
 365           implementation.
 366
 367    MFINALFUNC_MODIFY = { READ_ONLY | SHAREABLE | READ_WRITE }
 368           This option is like FINALFUNC_MODIFY, but it describes the
 369           behavior of the moving-aggregate final function.
 370
 371    minitial_condition
 372           The initial setting for the state value, when using
 373           moving-aggregate mode. This works the same as initial_condition.
 374
 375    sort_operator
 376           The associated sort operator for a MIN- or MAX-like aggregate.
 377           This is just an operator name (possibly schema-qualified). The
 378           operator is assumed to have the same input data types as the
 379           aggregate (which must be a single-argument normal aggregate).
 380
 381    PARALLEL = { SAFE | RESTRICTED | UNSAFE }
 382           The meanings of PARALLEL SAFE, PARALLEL RESTRICTED, and PARALLEL
 383           UNSAFE are the same as in CREATE FUNCTION. An aggregate will not
 384           be considered for parallelization if it is marked PARALLEL
 385           UNSAFE (which is the default!) or PARALLEL RESTRICTED. Note that
 386           the parallel-safety markings of the aggregate's support
 387           functions are not consulted by the planner, only the marking of
 388           the aggregate itself.
 389
 390    HYPOTHETICAL
 391           For ordered-set aggregates only, this flag specifies that the
 392           aggregate arguments are to be processed according to the
 393           requirements for hypothetical-set aggregates: that is, the last
 394           few direct arguments must match the data types of the aggregated
 395           (WITHIN GROUP) arguments. The HYPOTHETICAL flag has no effect on
 396           run-time behavior, only on parse-time resolution of the data
 397           types and collations of the aggregate's arguments.
 398
 399    The parameters of CREATE AGGREGATE can be written in any order, not
 400    just the order illustrated above.
 401
 402 Notes
 403
 404    In parameters that specify support function names, you can write a
 405    schema name if needed, for example SFUNC = public.sum. Do not write
 406    argument types there, however — the argument types of the support
 407    functions are determined from other parameters.
 408
 409    Ordinarily, PostgreSQL functions are expected to be true functions that
 410    do not modify their input values. However, an aggregate transition
 411    function, when used in the context of an aggregate, is allowed to cheat
 412    and modify its transition-state argument in place. This can provide
 413    substantial performance benefits compared to making a fresh copy of the
 414    transition state each time.
 415
 416    Likewise, while an aggregate final function is normally expected not to
 417    modify its input values, sometimes it is impractical to avoid modifying
 418    the transition-state argument. Such behavior must be declared using the
 419    FINALFUNC_MODIFY parameter. The READ_WRITE value indicates that the
 420    final function modifies the transition state in unspecified ways. This
 421    value prevents use of the aggregate as a window function, and it also
 422    prevents merging of transition states for aggregate calls that share
 423    the same input values and transition functions. The SHAREABLE value
 424    indicates that the transition function cannot be applied after the
 425    final function, but multiple final-function calls can be performed on
 426    the ending transition state value. This value prevents use of the
 427    aggregate as a window function, but it allows merging of transition
 428    states. (That is, the optimization of interest here is not applying the
 429    same final function repeatedly, but applying different final functions
 430    to the same ending transition state value. This is allowed as long as
 431    none of the final functions are marked READ_WRITE.)
 432
 433    If an aggregate supports moving-aggregate mode, it will improve
 434    calculation efficiency when the aggregate is used as a window function
 435    for a window with moving frame start (that is, a frame start mode other
 436    than UNBOUNDED PRECEDING). Conceptually, the forward transition
 437    function adds input values to the aggregate's state when they enter the
 438    window frame from the bottom, and the inverse transition function
 439    removes them again when they leave the frame at the top. So, when
 440    values are removed, they are always removed in the same order they were
 441    added. Whenever the inverse transition function is invoked, it will
 442    thus receive the earliest added but not yet removed argument value(s).
 443    The inverse transition function can assume that at least one row will
 444    remain in the current state after it removes the oldest row. (When this
 445    would not be the case, the window function mechanism simply starts a
 446    fresh aggregation, rather than using the inverse transition function.)
 447
 448    The forward transition function for moving-aggregate mode is not
 449    allowed to return NULL as the new state value. If the inverse
 450    transition function returns NULL, this is taken as an indication that
 451    the inverse function cannot reverse the state calculation for this
 452    particular input, and so the aggregate calculation will be redone from
 453    scratch for the current frame starting position. This convention allows
 454    moving-aggregate mode to be used in situations where there are some
 455    infrequent cases that are impractical to reverse out of the running
 456    state value.
 457
 458    If no moving-aggregate implementation is supplied, the aggregate can
 459    still be used with moving frames, but PostgreSQL will recompute the
 460    whole aggregation whenever the start of the frame moves. Note that
 461    whether or not the aggregate supports moving-aggregate mode, PostgreSQL
 462    can handle a moving frame end without recalculation; this is done by
 463    continuing to add new values to the aggregate's state. This is why use
 464    of an aggregate as a window function requires that the final function
 465    be read-only: it must not damage the aggregate's state value, so that
 466    the aggregation can be continued even after an aggregate result value
 467    has been obtained for one set of frame boundaries.
 468
 469    The syntax for ordered-set aggregates allows VARIADIC to be specified
 470    for both the last direct parameter and the last aggregated (WITHIN
 471    GROUP) parameter. However, the current implementation restricts use of
 472    VARIADIC in two ways. First, ordered-set aggregates can only use
 473    VARIADIC "any", not other variadic array types. Second, if the last
 474    direct parameter is VARIADIC "any", then there can be only one
 475    aggregated parameter and it must also be VARIADIC "any". (In the
 476    representation used in the system catalogs, these two parameters are
 477    merged into a single VARIADIC "any" item, since pg_proc cannot
 478    represent functions with more than one VARIADIC parameter.) If the
 479    aggregate is a hypothetical-set aggregate, the direct arguments that
 480    match the VARIADIC "any" parameter are the hypothetical ones; any
 481    preceding parameters represent additional direct arguments that are not
 482    constrained to match the aggregated arguments.
 483
 484    Currently, ordered-set aggregates do not need to support
 485    moving-aggregate mode, since they cannot be used as window functions.
 486
 487    Partial (including parallel) aggregation is currently not supported for
 488    ordered-set aggregates. Also, it will never be used for aggregate calls
 489    that include DISTINCT or ORDER BY clauses, since those semantics cannot
 490    be supported during partial aggregation.
 491
 492 Examples
 493
 494    See Section 36.12.
 495
 496 Compatibility
 497
 498    CREATE AGGREGATE is a PostgreSQL language extension. The SQL standard
 499    does not provide for user-defined aggregate functions.
 500
 501 See Also
 502
 503    ALTER AGGREGATE, DROP AGGREGATE