last_day(date)
Описание
Функция last_day()
возвращает последний день месяца для указанной даты или временной метки.
Параметры
date
: Column - столбец с датами или временными метками
Возвращаемое значение
Date - дата последнего дня месяца
Пример использования
from pyspark.sql.functions import last_day
from pyspark.sql import SparkSession
from datetime import datetime
# Создаем SparkSession
spark = SparkSession.builder.appName("last_day_example").getOrCreate()
# Создаем DataFrame с датами
data = [
(datetime(2023, 1, 15),), # январь
(datetime(2023, 2, 10),), # февраль
(datetime(2023, 12, 1),) # декабрь
]
df = spark.createDataFrame(data, ["date"])
# Получаем последний день месяца для каждой даты
result = df.select(
"date",
last_day("date").alias("last_day_of_month")
).show(truncate=False)
# Результат:
# +-------------------+-----------------+
# |date |last_day_of_month|
# +-------------------+-----------------+
# |2023-01-15 00:00:00|2023-01-31 |
# |2023-02-10 00:00:00|2023-02-28 |
# |2023-12-01 00:00:00|2023-12-31 |
# +-------------------+-----------------+
Примечания
- NULL значения возвращают NULL
- Функция учитывает количество дней в месяце, включая високосные годы
- Для работы с датами также используйте:
year()
для извлечения годаmonth()
для извлечения месяцаday()
для извлечения дняhour()
для извлечения часаminute()
для извлечения минутsecond()
для извлечения секундquarter()
для извлечения кварталаdayofweek()
для извлечения дня неделиdayofyear()
для извлечения дня года
XML функции
aggregate functions
- any_value(col[, ignoreNulls])
- approxCountDistinct(col[, rsd])
- approx_percentile(col, percentage[, accuracy])
- array_agg(col)
- avg(col)
- bit_and(col)
- bit_or(col)
- bit_xor(col)
- bool_and(col)
- bool_or(col)
- bool_xor(col)
- collect_list(col)
- collect_set(col)
- corr(col1, col2)
- count(col)
- countDistinct(col, *cols)
- count_distinct(col, *cols)
- count_if(col)
- count_min_sketch(col, eps, confidence, seed)
- covar_pop(col1, col2)
- covar_samp(col1, col2)
- every(col)
- first(col[, ignorenulls])
- first_value(col[, ignoreNulls])
- grouping(col)
- grouping_id(*cols)
- histogram_numeric(col, nBins)
- hll_sketch_agg(col[, lgConfigK])
- hll_union_agg(col[, allowDifferentLgConfigK])
- kurtosis(col)
- last(col[, ignorenulls])
- last_value(col[, ignoreNulls])
- max(col)
- max_by(col, ord)
- mean(col)
- median(col)
- min(col)
- min_by(col, ord)
- mode(col)
- percentile(col, percentage)
- percentile_approx(col, percentage, accuracy=10000)
- product(col)
- regr_avgx(y, x)
- regr_avgy(y, x)
- regr_count()
- regr_intercept(y, x)
- regr_r2()
- regr_slope(y, x)
- regr_syy()
- skewness(col)
- some(col)
- stddev(col)
- stddev_pop(col)
- stddev_samp(col)
- sum(col)
- sum_distinct(col)
- var_pop(col)
- var_samp(col)
- variance(col)
array functions pyspark
- array(*cols)
- array_append(col, value)
- array_compact(col)
- array_contains(col, value)
- array_distinct(col)
- array_insert(arr, pos, value)
- array_intersect(col1, col2)
- array_join(col, delimiter[, null_replacement])
- array_max(col)
- array_min(col)
- array_position(col, value)
- array_prepend(col, value)
- array_remove(col, element)
- array_repeat(col, count)
- array_size(col)
- array_sort(col[, comparator])
- array_union(col1, col2)
- arrays_overlap(a1, a2)
- arrays_zip(*cols)
- cardinality(column)
- concat(*cols)
- element_at(col, extraction)
- exists(col, f)
- explode(column)
- explode_outer(column)
- flatten(col)
- forall(col, f)
- inline(column)
- inline_outer(column)
- json_array_length(json_array)
- json_object_keys(json_object)
- map_filter(map, f)
- map_zip_with(map1, map2, function)
- posexplode(column)
- posexplode_outer(column)
- reduce(col, initialValue, merge, finish=None)
- reverse(col)
- sequence(start, stop[, step])
- shuffle(col)
- size(col)
- slice(x, start, length)
- sort_array(column, asc=True)
- stack(n, expr1, ..., exprN)
- struct(*cols)
- transform(col, f)
- transform_keys(map, f)
- transform_values(map, f)
- try_element_at()
- zip_with(left, right, f)
basic functions
bitmap functions
- bitmap_construct_agg(col)
- bitmap_count(bitmap)
- bitmap_or_agg(bitmap)
- bitwise_not(col)
- bitwise_or(col1, col2)
- bitwise_xor(col1, col2)
bitwise functions
- bit_count()
- bit_get()
- bit_set(column, pos)
- bitwiseAND(column1, column2)
- bitwiseNOT(column)
- bitwiseOR(column1, column2)
- bitwiseXOR(column1, column2)
- getbit()
- setbit(column, pos)
- shiftleft(column, numBits)
- shiftright(column, numBits)
- shiftrightunsigned(column, numBits)
call functions
comparison functions
conditional functions
conversion functions
csv functions
datetime functions pyspark
- add_months(start_date, num_months)
- curdate()
- current_date()
- current_time()
- current_timezone()
- date_add()
- date_diff(endDate, startDate)
- date_part(field, source)
- date_sub()
- date_trunc()
- dateadd(unit, value, date)
- datediff(end_date, start_date)
- datepart(field, source)
- day(date)
- dayofmonth(col)
- dayofweek(date)
- dayofyear(date)
- extract(field FROM source)
- from_utc_timestamp(timestamp, timezone)
- hour(timestamp)
- localtimestamp()
- make_date(year, month, day)
- make_dt_interval(days, hours, minutes, seconds)
- make_timestamp(year, month, day, hour, min, sec)
- make_timestamp_ltz(year, month, day, hour, minute, second)
- make_ym_interval(years, months)
- minute(timestamp)
- month(date)
- months_between(end_date, start_date)
- next_day(date, day_of_week)
- now()
- quarter(date)
- second(timestamp)
- session_window(timeColumn, gapDuration)
- timestamp_micros(microseconds)
- timestamp_millis(milliseconds)
- timestamp_seconds(seconds)
- timestampadd(unit, interval, timestamp)
- to_date(col, format=None)
- to_timestamp(col, format=None)
- to_timestamp_ltz(col, format=None)
- to_timestamp_ntz(col, format=None)
- to_unix_timestamp(col, format=None)
- to_utc_timestamp(timestamp, timezone)
- trunc(date, format)
- try_to_timestamp(col, format=None)
- unix_date(col)
- unix_micros(col)
- unix_millis(col)
- unix_seconds(col)
- unix_timestamp(timestamp=None, format=None)
- weekday(date)
- weekofyear(date)
- window_time(col)
- year(date)
- years_between(end, start)
encryption functions
expression functions
hash functions
java functions
json functions
- from_json(col, schema, options={})
- get_json_object()
- json_tuple(col, *fields)
- to_json(col, options={})
map functions
- create_map(*cols)
- map_concat(map1, map2, ...)
- map_contains_key(col, value)
- map_entries(map)
- map_from_arrays(keys, values)
- map_from_entries(array)
- map_keys(map)
- map_values(map)
- str_to_map(text[, pairDelim, keyValueDelim])
mathematical functions pyspark
- abs(col)
- asin(col)
- atan(col)
- atan2(y, x)
- bin(col)
- bround(col, scale=0)
- cbrt(col)
- ceil(col)
- ceiling(col)
- conv(num, from_base, to_base)
- cos(col)
- cosh(col)
- cot(col)
- csc(col)
- e()
- exp()
- expm1()
- factorial()
- floor(col)
- hypot(col1, col2)
- log(col)
- log10(col)
- negative(col)
- pmod(dividend, divisor)
- positive(col)
- power(col1, col2)
- rand(seed=None)
- randn(seed=None)
- rint(col)
- round(col, scale=0)
- sec(col)
- sign(col)
- signum(col)
- tan(col)
- tanh(col)
- toDegrees(col)
- try_add()
- try_avg()
- try_divide()
- try_multiply()
- try_subtract()
- try_sum()
- try_to_number()
normal functions
null functions
- coalesce(expr1, expr2, ...)
- equal_null(expr1, expr2)
- ifnull(expr1, expr2)
- isnotnull(expr)
- isnull(expr)
- nanvl(col1, col2)
- nullif(expr1, expr2)
- nvl(expr1, expr2)
- nvl2(expr1, expr2, expr3)
sketch functions
sort functions
- asc(col)
- asc_nulls_first(col)
- asc_nulls_last(col)
- desc(col)
- desc_nulls_first(col)
- desc_nulls_last(col)
string functions pyspark
- char_length(col)
- eval(expr)
- left(col, len)
- levenshtein(col1, col2)
- mask(col, upperChar, lowerChar, digitChar, otherChar)
- regexp(col, pattern)
- regexp_extract_all(col, pattern)
- regexp_instr(col, pattern, pos, occurrence)
- regexp_like(col, pattern)
- regexp_substr(col, pattern, pos, occurrence)
- repeat(col, n)
- replace(col, search, replace)
- right(col, len)
- rlike(col, pattern)
- rpad(col, len, pad)
- rtrim(col)
- sentences(str, lang, country)
- soundex(col)
- split(str, pattern)
- split_part(str, delimiter, partNum)
- sql(query)
- startswith(col, prefix)
- substr(col, pos, len)
- substring_index(col, delim, count)
- to_varchar(col)
- translate(col, matchingString, replaceString)
- trim(col)
- ucase(col)
- unbase64(col)
- unhex(col)
- upper(col)
- url_decode(str)
- url_encode(str)
- uuid()
struct functions
system functions
- current_catalog()
- current_database()
- current_schema()
- current_user()
- input_file_block_length()
- input_file_block_start()
- input_file_name()
- spark_partition_id()
- user()
type functions
window functions
- cume_dist()
- dense_rank()
- lag(col, offset, default)
- lead(col, offset, default)
- monotonically_increasing_id()
- nth_value(col, n)
- ntile(n)
- percent_rank()
- rank()
- row_number()
- window(timeColumn, windowDuration, slideDuration, startTime)
xml functions
- xml_tuple(xml, *paths)
- xpath(xml, path)
- xpath_boolean(xml, path)
- xpath_double(xml, path)
- xpath_float(xml, path)
- xpath_int(xml, path)
- xpath_long(xml, path)
- xpath_number(xml, path)
- xpath_short(xml, path)
- xpath_string(xml, path)
агрегатные функции
битовые операции
оконные функции
- diff(col)
- session_window(timeColumn, gapDuration)
- sliding_window(timeColumn, windowDuration, slideDuration)
- tumbling_window(timeColumn, windowDuration)