利用python进行批量TIF转NC并进行像元尺度的MK检验

原创已于 2024-10-25 16:01:45 修改 · 469 阅读

0 ·

本内容遵循CC 4.0 BY-SA版权协议

标签

#python #数据分析 #水文气象环境

于 2024-06-06 10:47:38 首次发布

python 同时被 3 个专栏收录

22 篇文章

订阅专栏

科研

12 篇文章

订阅专栏

算法

5 篇文章

订阅专栏

批量TIF转NC并进行MK检验

这里主要记录一个批量进行tif文件转nc，并且将长序列数据进行mk检验的python代码。

主要用到的库有os/numpy/xarray/gdal/pymannkendall。

1.通过Search_File函数实现文件夹的搜索与遍历

dirname为文件夹路径，suffix为后缀名称，如果你的文件是nc的，那么可以写为".nc"。

import os
import numpy as np
import xarray as xr
from osgeo import gdal, osr

def Search_File(dirname,suffix):
    '''
    This function can search all files with the specified suffix in this dir.
    :param dirname: string, the path need to be searched
    :param suffix: string, the specified suffix need to be seached
    :return:
    filter_list: list, the path list need to be searched.
    '''
    filter = [suffix]  # 设置过滤后的文件类型 当然可以设置多个类型
    filter_list = []
    for maindir, subdir, file_name_list in os.walk(dirname):
        #print(maindir) #当前主目录
        for filename in file_name_list:
            apath = os.path.join(maindir, filename)#合并成一个完整路径
            portion = os.path.splitext(apath)
            ext = portion[1]  # 获取文件后缀 [0]获取的是除了文件名以外的内容

            if ext in filter:
                newname = portion[0] + suffix

                filter_list.append((newname,portion[0].split("\\")[-1]))
    # print(filter_list)
    return filter_list

2、利用combin_tif_to_nc函数和gdal库进行tif到nc的转换

主要需要更改input_dir（输入文件夹名称）
gdal.Warp(r"E:\530\VQI\vqi{}.nc".format(n),p,height=553,width=1638,format=‘NetCDF’)中的输出路径，以及height和width。


def combin_tif_to_nc():
    '''
    第一步，先进行tif到nc的转换
    :return:
    '''
    # 设置输入和输出文件路径
    input_dir = r'E:\530\VQI'  # 包含TIFF文件的目录
    input_list = Search_File(input_dir,".tif")

    for p,n in input_list:
        # 使用gdal.Warp()函数进行转换，这儿可以设定转换出的文件行列号
        gdal.Warp(r"E:\530\VQI\vqi\{}.nc".format(n),p,height=553,width=1638,format='NetCDF')
        print("转换完成！")

3.利用combin函数进行多个nc文件的时间轴合并，为了更好的进行mk检验。

data_folder 需要改成nc文件夹路径。
output_nc 需要改成输出文件夹路径。

def combin():
    '''
    数据合并，沿时间轴
    :return:
    '''
    import xarray as xr
    import pandas as pd
    import os

    # 假设你的NC文件存放在‘data_folder’目录下
    data_folder = r"E:\530\VQI\vqi"  # 请替换为你的NC文件所在的文件夹路径
    file_n = Search_File(data_folder,".nc")
    nc_files = [p for p,n in file_n]
    # n = [n for p,n in file_n]
    print(nc_files)
    # nc_files = [os.path.join(data_folder, f) for f in os.listdir(data_folder) if f.endswith('.nc')]

    first_file = xr.open_dataset(nc_files[0])
    x = first_file["lat"].values
    y = first_file["lon"].values
    change_file_p = r"{}\change".format(data_folder)
    if not os.path.exists(change_file_p):
        os.makedirs(change_file_p)

    for file,n in file_n:
        # file_path = os.path.join(folder_path, file)
        ds = xr.open_dataset(file)
        ds['lat'] = x
        ds['lon'] = y
        ds.to_netcdf(r"{}/{}.nc".format(change_file_p,n))
        ds.close()

    nc_files_new = [os.path.join(change_file_p, f) for f in os.listdir(change_file_p) if f.endswith('.nc')]
    # 排序文件保证时间顺序
    nc_files_new.sort()

    # 使用open_mfdataset来读取所有的.nc文件，假设所有文件都有相同的x和y坐标
    combined_ds = xr.open_mfdataset(nc_files_new, concat_dim='time', combine='nested')

    # 创建时间坐标（这里我们从文件名中提取时间信息，假设格式为YYYY.*）
    print(nc_files_new)
    time_coords = [pd.to_datetime(f.split('.')[0][-4:], format='%Y') for f in nc_files_new]

    # 将时间坐标添加到数据集中
    combined_ds = combined_ds.assign_coords(time=('time', time_coords))
    combined_ds = combined_ds.rename({'Band1': 'vqi'})
    #combined_ds["GPP"] = combined_ds["GPP"] * 0.001 * 0.001

    # 如果你想保存合并后的数据到新的NC文件中
    output_nc = r'E:\530\VQI\vqi\combined_data.nc'
    combined_ds.to_netcdf(output_nc)

    print(f"Combined dataset saved to {output_nc}")

4.进行像素级别的mk检验

r’E:\530\VQI\vqi\combined_data.nc’需要更改为你合并后的nc文件
r’E:\530\VQI\vqi\mk_results.nc’是你的结果输出
mk_trend为三个值，-1为显著下降，1为显著上升，0为变化不显著
mk_sen为sen监测的结果
mk_h为是否有趋势

def mk_test():
    import xarray as xr
    import numpy as np
    import pymannkendall as mk

    # 打开NetCDF文件
    ds = xr.open_dataset(r'E:\530\VQI\vqi\combined_data.nc')

    # 获取数据和坐标
    data = ds['vqi'].values
    times = ds['time'].values
    lats = ds['lat'].values
    lons = ds['lon'].values

    # 创建空的结果数组
    sen_value = np.zeros_like(data[0]) * np.nan  # 初始化为NaN
    trend_value = np.zeros_like(data[0]) * np.nan
    h_value = np.zeros_like(data[0]) * np.nan
    # 遍历每个网格点
    for i in range(data.shape[1]):
        for j in range(data.shape[2]):
            # 获取当前网格点的时间序列
            ts = data[:, i, j]
            print(ts)

            # 检查是否有缺失值
            if np.any(ts == -3.4e+38):
                continue  # 跳过当前网格点

            if np.isnan(ts).any():
                continue

            # 进行MK检验
            trend, h, p, z, Tau, s, var_s, slope, intercept = mk.original_test(ts)

            if trend == "decreasing":
                trend_id = -1
            elif trend == "increasing":
                trend_id = 1
            elif trend == "no trend":
                trend_id = 0

            # 存储结果
            sen_value[i, j] = slope
            trend_value[i, j] = trend_id
            h_value[i, j] = h

    # 创建NetCDF文件
    ds_out = xr.Dataset({'mk_trend': (['y', 'x'], trend_value),
                         'mk_sen' : (['y', 'x'], sen_value),
                         "mk_h" : (['y', 'x'], h_value)},
                        coords={'y': lats, 'x': lons})
    ds_out.to_netcdf(r'E:\530\VQI\vqi\mk_results.nc')
    print('Successfully created mk_results.nc')