import pandas as pd from sklearn.linear_model import LinearRegression from xgboost import XGBRegressor def pipeline(city_name, year, land_type, policy_factors): """ 预测主函数 :param land_type: 土地类别 :param city_name: 城市名 :param year: 年份 :param policy_factors: 政策因子 :return: """ print(f"{city_name},{year},{land_type}") land_type_list = ["工业用地(万元/㎡)", "住宅用地(万元/㎡)", "商服用地(万元/㎡)", "其他用地(万元/㎡)"] land_type2feature_list = {} for sample_land in land_type_list: with open(f"jupyter/{sample_land.split('(')[0]}", "r", encoding="utf-8") as f: land_type2feature_list[sample_land] = f.read().split(",") # -------------------------------- 1. 读取历年土地基本信息 -------------------- feature_file_path = r"jupyter/浙江省经济和社会指标-汇总.xlsx" feature_data = pd.read_excel(feature_file_path, dtype={"年度": str}) feature_data.bfill(inplace=True) feature_data.set_index(['城市', '年度'], inplace=True) # -------------------------------- 2. 预测土地类别对应的特征 -------------------- feature_list = [] for sample_feature in land_type2feature_list[land_type]: if sample_feature == "政策影响" and policy_factors: feature_list.append(round(float(policy_factors), 4)) continue y_data = [row[sample_feature] for _, row in feature_data.iterrows() if _[0] == city_name and _[1] in ["2022", "2023"]] x_data = [[int(_[1])] for _, row in feature_data.iterrows() if _[0] == city_name and _[1] in ["2022", "2023"]] y_data = list(reversed(y_data)) x_data = list(reversed(x_data)) model = LinearRegression() model.fit(x_data, y_data) pred_feature = model.predict([[year]]) pred_data = max(round(pred_feature[0], 4), 0) if year == 2025: x_data.append([2024]) y_data.append(pred_data) model.fit(x_data, y_data) pred_feature = model.predict([[year]]) pred_data = round(pred_feature[0], 4) feature_list.append(pred_data) # -------------------------------- 3. 预测当年土地类别的地价 -------------------- land_type2xgb_model_path = {"工业用地(万元/㎡)": 'jupyter/industrial_land_xgb_regressor_boston.model', "住宅用地(万元/㎡)": "jupyter/residential_land_xgb_regressor_boston.model", "商服用地(万元/㎡)": "jupyter/business_land_xgb_regressor_boston.model", "其他用地(万元/㎡)": "jupyter/other_land_xgb_regressor_boston.model"} model = XGBRegressor() model.load_model(land_type2xgb_model_path[land_type]) y_pred = model.predict([feature_list]) if land_type == "商服用地(万元/㎡)" and year > 2023: policy_factors = policy_factors if policy_factors else 0.5 y_pred[0] += round(float(policy_factors) * (3 / 5), 4) if policy_factors else 0 return y_pred[0], {land_type2feature_list[land_type][num]: i for num, i in enumerate(feature_list)}