使用Python绘制Beta分布图

lx0bsm1f  于 2022-10-30  发布在  Python
关注(0)|答案(1)|浏览(505)

给定一些测量,我试图创建一个beta分布。给定一个最大值,最小值,平均值,以及一个alpha和beta,我如何调用beta.ppf或beta.pfd来生成一个合适的数据集?
工作样品https://www.kaggle.com/iancoetzer/betaworking
破损样本https://www.kaggle.com/iancoetzer/betaproblem

import matplotlib.pyplot as plt
from scipy.stats import beta

# 

# Set the shape paremeters

# 

a = 2.8754
b = 3.0300

minv = 82.292
maxv = 129.871
mean = 105.46

# 

# Generate the value between

# 

x = np.linspace(beta.ppf(minv, a, b),beta.ppf(maxv, a, b), 100)

# 

# Plot the beta distribution

# 

plt.figure(figsize=(7,7))
plt.xlim(0.7, 1)
plt.plot(x, beta.pdf(x, a, b), 'r-')
plt.title('Beta Distribution', fontsize='15')
plt.xlabel('Values of Random Variable X (0, 1)', fontsize='15')
plt.ylabel('Probability', fontsize='15')
plt.show()```
zc0qhyus

zc0qhyus1#

我们设法编写了一个简单的解决方案来计算和绘制Beta分布,如下所示:请看红色的beta曲线。现在我们要绘制威布尔分布...


# import libraries

import pandas as pd, numpy as np, gc, time, os, uuid, math, datetime
from joblib import Parallel, delayed
from numpy.random import default_rng
from scipy.stats import beta
from scipy import special
from scipy.stats import exponweib

import matplotlib.pyplot as plt

# sample parameters

low, high, mean, a, b, trials = 82.292, 129.871, 105.46, 2.8754, 3.0300, 10000
scale = (high-low)/6

# normal

normal_arr = np.random.normal(loc=mean, scale=scale, size=trials)

# triangular

triangular_arr = np.random.triangular(left=low, mode=mean, right=high, size=trials)

# log normal

mu = math.log(math.pow(mean,2) / math.sqrt(math.pow(scale,2) + math.pow(mean,2)))
sigma = math.sqrt(math.log(math.pow(scale,2)/(math.pow(mean,2)) + 1))            
lognorm_arr = np.random.lognormal(mean=mu, sigma=sigma, size=trials) 

# beta

beta_x = np.linspace(beta.ppf(0.0, a, b),beta.ppf(1, a, b), trials)

# by = beta.pdf(bx, a, b)

beta_arr = beta.ppf(beta_x, a, b, loc=low, scale=high - low)

# define binning(arr) method:

def binning(arr):
    df = pd.DataFrame(arr)
    df["Trial"] = range(1, len(df) + 1)
    df[0] = df[0].astype(float)
    df.rename(columns = {0: "Result"}, inplace=True)

    minval = df["Result"].min()
    maxval = df["Result"].max()

    binCount = 100

    bins = np.linspace(minval, maxval, binCount + 1)
    labels = np.arange(1, binCount + 1)

    df["bins"] = pd.cut(df["Result"], bins = bins, labels = labels, include_lowest = True)
    dfBin = df.groupby(["bins"])["Result"].mean() 
    dfCount = df.groupby(["bins"])["Result"].count() 

    dfBin.replace(np.nan, 0.0, inplace=True)
    dfCount.replace(np.nan, 0, inplace=True)

    dfCount = pd.DataFrame(dfCount)
    dfBin = pd.DataFrame(dfBin)  
    dfBin["bin"] = range(1, len(dfBin) + 1)
    dfBin["Result"] = dfBin["Result"].astype(float)

    df = pd.merge(dfBin, dfCount, left_index=True, right_index=True) 

    #Rename the resulting columns
    df.rename(columns = {'Result_x':'Mean'}, inplace = True)
    df.rename(columns = {'Result_y':'Trials'}, inplace = True)  

    return df     

dfNormal = binning(normal_arr)
dfLog = binning(lognorm_arr)
dfTriangular = binning(triangular_arr)
dfBeta = binning(beta_arr)
dfWeibull = binning(wei_arr)

dfNormal.drop(dfNormal[dfNormal["Mean"] == 0].index, inplace=True)
dfLog.drop(dfLog[dfLog["Mean"] == 0].index, inplace=True)
dfTriangular.drop(dfTriangular[dfTriangular["Mean"] == 0].index, inplace=True)
dfBeta.drop(dfBeta[dfBeta["Mean"] == 0].index, inplace=True)
dfWeibull.drop(dfWeibull[dfWeibull["Mean"] == 0].index, inplace=True)

plt.plot(dfNormal["Mean"], dfNormal["Trials"], label="Normal")
plt.plot(dfLog["Mean"], dfLog["Trials"], label="Lognormal")
plt.plot(dfTriangular["Mean"], dfTriangular["Trials"], label="Triangular")
plt.plot(dfBeta["Mean"], dfBeta["Trials"], label="Beta")
plt.plot(dfWeibull["Mean"], dfWeibull["Trials"], label="Weibull")

plt.legend(loc='upper right')

plt.xlabel("R amount")
plt.ylabel("# Trials")

# plt.xlim(low, high)

plt.show()

相关问题