python批量处理网格数据中点数据的去重等问题

``` python
import arcpy
import os

# 设置工作环境
arcpy.env.overwriteOutput = True
input_gdb = r"E:\zmx\2025\mxphdproject\tem_data\tem_data.gdb"
output_gdb = r"E:\zmx\2025\mxphdproject\tem_data\SpeciesAnalysis.gdb"

# 确保输出数据库存在
if not arcpy.Exists(output_gdb):
    arcpy.CreateFileGDB_management(os.path.dirname(output_gdb), os.path.basename(output_gdb))

# 网格配置
grid_config = {
    "50km": "grid_50km_Project",
    "100km": "grid_100km_Project", 
    "150km": "grid_150km_Project",
    "200km": "grid_200km_Project"
}

# 点数据图层名称
point_layer = "medicial_plant_point"

# 验证输入数据存在
if not arcpy.Exists(os.path.join(input_gdb, point_layer)):
    raise Exception(f"点数据 '{point_layer}' 在输入数据库中不存在！")

# 循环处理每个网格
for grid_size, grid_name in grid_config.items():
    print(f"正在处理 {grid_size} 网格...")
    
    # 验证网格数据存在
    input_grid = os.path.join(input_gdb, grid_name)
    if not arcpy.Exists(input_grid):
        print(f"警告: 网格数据 '{grid_name}' 不存在，跳过处理")
        continue
    
    input_points = os.path.join(input_gdb, point_layer)
    
    try:
        # 1. 创建临时点图层
        temp_points_name = f"temp_points_{grid_size}"
        temp_points = arcpy.management.MakeFeatureLayer(input_points, temp_points_name)
        
        # 检查临时点图层是否创建成功
        if not arcpy.Exists(temp_points_name):
            raise Exception(f"临时点图层创建失败: {temp_points_name}")
        
        # 2. 创建临时空间连接输出路径
        joined_points = os.path.join(output_gdb, f"temp_joined_{grid_size}")
        
        # 3. 执行空间连接将网格ID连接到点数据
        print("  正在执行空间连接...")
        arcpy.analysis.SpatialJoin(
            target_features=temp_points,
            join_features=input_grid,
            out_feature_class=joined_points,
            join_operation="JOIN_ONE_TO_ONE",
            join_type="KEEP_ALL",
            match_option="INTERSECT",
            field_mapping=f"NEW_ID \"NEW_ID\" true true false 20 Text 0 0,First,#,{input_points},NEW_ID,0,20;wangid \"wangid\" true true false 20 Text 0 0,First,#,{input_grid},wangid,0,20",
            distance_field_name=""
        )
        
        # 检查空间连接结果
        if not arcpy.Exists(joined_points):
            raise Exception(f"空间连接失败，无法创建临时数据集: {joined_points}")
        
        # 打印空间连接结果信息
        point_count = int(arcpy.GetCount_management(joined_points).getOutput(0))
        print(f"  空间连接完成，共连接 {point_count} 个点")
        
        # 4. 删除重复记录 (wangid + NEW_ID)
        print("  正在删除重复记录...")
        arcpy.management.DeleteIdentical(
            in_dataset=joined_points,
            fields=["wangid", "NEW_ID"]
        )
        
        # 打印去重后结果信息
        unique_count = int(arcpy.GetCount_management(joined_points).getOutput(0))
        print(f"  删除重复后，剩余 {unique_count} 条唯一记录")
        
        # 5. 计算每个网格的物种数量
        print("  正在统计物种数量...")
        stats_table = os.path.join(output_gdb, f"stats_{grid_size}")
        arcpy.analysis.Statistics(
            in_table=joined_points,
            out_table=stats_table,
            statistics_fields=[["NEW_ID", "COUNT"]],
            case_field="wangid"
        )
        
        # 检查统计结果
        if not arcpy.Exists(stats_table):
            raise Exception(f"统计表创建失败: {stats_table}")
        
        # 打印统计结果信息
        stats_count = int(arcpy.GetCount_management(stats_table).getOutput(0))
        print(f"  统计完成，共 {stats_count} 个网格有物种数据")
        
        # 6. 将统计结果关联回网格
        print("  正在关联结果到网格...")
        output_grid = os.path.join(output_gdb, f"{grid_name}_SpeciesCount")
        arcpy.management.CopyFeatures(input_grid, output_grid)
        
        # 添加新字段存储物种数量
        count_field = f"SP_COUNT_{grid_size}"
        arcpy.management.AddField(output_grid, count_field, "LONG")
        
        # 创建字段映射字典
        count_dict = {}
        
        # 获取统计表字段名 - 解决可能的字段名变化问题
        fields = [f.name for f in arcpy.ListFields(stats_table)]
        count_field_name = None
        
        # 可能的字段名变体
        possible_fields = ["COUNT_NEW_ID", "COUNT_NEW__ID", "FREQUENCY"]
        
        for field in possible_fields:
            if field in fields:
                count_field_name = field
                break
        
        if not count_field_name:
            print(f"  警告: 未找到统计字段，可用字段: {', '.join(fields)}")
            # 使用第一个统计字段作为备选
            for field in fields:
                if field.startswith("COUNT") or field == "FREQUENCY":
                    count_field_name = field
                    print(f"  使用备选字段: {count_field_name}")
                    break
        
        if not count_field_name:
            raise Exception(f"在统计表中未找到合适的统计字段")
        
        print(f"  使用统计字段: {count_field_name}")
        
        # 读取统计结果
        with arcpy.da.SearchCursor(stats_table, ["wangid", count_field_name]) as cursor:
            for row in cursor:
                wangid = row[0]
                count_value = row[1]
                # 确保值有效
                if count_value is None:
                    count_value = 0
                count_dict[wangid] = count_value
                print(f"    网格 {wangid}: 物种数量 = {count_value}")
        
        # 检查网格数据中的wangid字段是否存在
        grid_fields = [f.name for f in arcpy.ListFields(output_grid)]
        if "wangid" not in grid_fields:
            # 尝试不同大小写变体
            wangid_variants = ["WANGID", "WangID", "wangId"]
            wangid_field = None
            for variant in wangid_variants:
                if variant in grid_fields:
                    wangid_field = variant
                    print(f"  警告: 使用替代字段名 '{variant}' 代替 'wangid'")
                    break
            
            if not wangid_field:
                # 尝试查找包含"wangid"的字段
                for field in grid_fields:
                    if "wangid" in field.lower():
                        wangid_field = field
                        print(f"  警告: 使用相似字段名 '{field}' 代替 'wangid'")
                        break
            
            if not wangid_field:
                raise Exception("在网格数据中找不到wangid字段")
        else:
            wangid_field = "wangid"
        
        # 更新网格数据
        updated_count = 0
        with arcpy.da.UpdateCursor(output_grid, [wangid_field, count_field]) as cursor:
            for row in cursor:
                wangid = row[0]
                # 将网格ID和字典键都转换为字符串进行比较
                grid_id_str = str(wangid)
                found = False
                
                # 在字典中查找匹配的ID
                for dict_id, count_val in count_dict.items():
                    dict_id_str = str(dict_id)
                    if grid_id_str == dict_id_str:
                        row[1] = count_val
                        updated_count += 1
                        found = True
                        break
                
                if not found:
                    row[1] = 0  # 没有物种的网格设为0
                cursor.updateRow(row)
        
        print(f"  已更新 {updated_count} 个网格的物种数量")
        print(f"已完成 {grid_size} 网格处理，输出: {output_grid}")
        print(f"  中间数据已保留: ")
        print(f"    - 临时点图层: {temp_points_name}")
        print(f"    - 空间连接结果: {joined_points}")
        print(f"    - 统计表: {stats_table}")
    
    except Exception as e:
        print(f"处理 {grid_size} 网格时出错: {str(e)}")
        import traceback
        print(traceback.format_exc())
    
    finally:
        # 不再删除任何临时数据，全部保留用于调试
        print(f"  保留所有中间数据用于调试")
        # 只删除临时点图层（内存中的图层）
        try:
            if "temp_points_name" in locals():
                arcpy.management.Delete(temp_points_name)
                print(f"  已删除内存中的临时点图层: {temp_points_name}")
        except:
            pass

print("所有网格处理完成！")
print(f"所有中间数据保留在: {output_gdb}")

```
python批量处理网格数据中点数据的去重等问题

发表回复取消回复

Z

Related Posts

python 进行空间自相关-聚类-异常值分析

[R] 生成随机数列

R处理wordclim未来数据

发表回复 取消回复

Z

发表回复取消回复