import pandas as pd
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor


def pipeline(city_name, year, land_type, policy_factors):
    """
    预测主函数
    :param land_type: 土地类别
    :param city_name: 城市名
    :param year: 年份
    :param policy_factors: 政策因子
    :return:
    """
    print(f"{city_name},{year},{land_type}")
    land_type_list = ["工业用地（万元/㎡）", "住宅用地（万元/㎡）", "商服用地（万元/㎡）", "其他用地（万元/㎡）"]
    land_type2feature_list = {}
    for sample_land in land_type_list:
        with open(f"jupyter/{sample_land.split('（')[0]}", "r", encoding="utf-8") as f:
            land_type2feature_list[sample_land] = f.read().split(",")

    # --------------------------------  1. 读取历年土地基本信息 --------------------
    feature_file_path = r"jupyter/浙江省经济和社会指标-汇总.xlsx"
    feature_data = pd.read_excel(feature_file_path, dtype={"年度": str})
    feature_data.bfill(inplace=True)
    feature_data.set_index(['城市', '年度'], inplace=True)
    # --------------------------------  2. 预测土地类别对应的特征 --------------------
    feature_list = []
    for sample_feature in land_type2feature_list[land_type]:
        if sample_feature == "政策影响" and policy_factors:
            feature_list.append(round(float(policy_factors), 4))
            continue

        y_data = [row[sample_feature] for _, row in feature_data.iterrows() if
                  _[0] == city_name and _[1] in ["2022", "2023"]]
        x_data = [[int(_[1])] for _, row in feature_data.iterrows() if
                  _[0] == city_name and _[1] in ["2022", "2023"]]
        y_data = list(reversed(y_data))
        x_data = list(reversed(x_data))
        model = LinearRegression()
        model.fit(x_data, y_data)
        pred_feature = model.predict([[year]])

        pred_data = max(round(pred_feature[0], 4), 0)

        if year == 2025:
            x_data.append([2024])
            y_data.append(pred_data)
            model.fit(x_data, y_data)
            pred_feature = model.predict([[year]])
            pred_data = round(pred_feature[0], 4)

        feature_list.append(pred_data)

    # --------------------------------  3. 预测当年土地类别的地价 --------------------
    land_type2xgb_model_path = {"工业用地（万元/㎡）": 'jupyter/industrial_land_xgb_regressor_boston.model',
                                "住宅用地（万元/㎡）": "jupyter/residential_land_xgb_regressor_boston.model",
                                "商服用地（万元/㎡）": "jupyter/business_land_xgb_regressor_boston.model",
                                "其他用地（万元/㎡）": "jupyter/other_land_xgb_regressor_boston.model"}
    model = XGBRegressor()
    model.load_model(land_type2xgb_model_path[land_type])
    y_pred = model.predict([feature_list])
    if land_type == "商服用地（万元/㎡）" and year > 2023:
        policy_factors = policy_factors if policy_factors else 0.5
        y_pred[0] += round(float(policy_factors) * (3 / 5), 4) if policy_factors else 0
    return y_pred[0], {land_type2feature_list[land_type][num]: i for num, i in enumerate(feature_list)}