统计学习方法 —— 决策树模型
import math def createDataSet(): dataset = [['青年', '否', '否', '一般', '否'], ['青年', '否', '否', '好', '否'], ['青年', '是', '否', '好', '是'], ['青年', '是', '是', '一般', '是'], ['青年', '否', '否', '一般', '否'], ['中年', '否', '否', '一般', '否'], ['中年', '否', '否', '好', '否'], ['中年', '是', '是', '好', '是'], ['中年', '否', '是', '非常好', '是'], ['中年', '否', '是', '非常好', '是'], ['老年', '否', '是', '非常好', '是'], ['老年', '否', '是', '好', '是'], ['老年', '是', '否', '好', '是'], ['老年', '是', '否', '非常好', '是'], ['老年', '否', '否', '一般', '否']] labels = ['年龄', '有工作', '有自己的房子', '信贷情况'] return dataset, labels def calcEntropy(dataset): """ :return: """ num = len(dataset) labelCounts = {} for data in dataset: currLabel = data[-1] if currLabel not in labelCounts: labelCounts[currLabel] = 0 labelCounts[currLabel] += 1 entropy = 0 for key in labelCounts: prob = float(labelCounts[key]) / num entropy += -1 * prob * math.log(prob, 2) return entropy def splitDataSet(dataset, axis, value): """ :param dataset: :param axis: :param value: :return: """ ret = [] for data in dataset: if data[axis] == value: reduceFeatVec = data[:axis] reduceFeatVec.extend(data[axis + 1:]) ret.append(reduceFeatVec) return ret def chooseBestFeature(dataset): """ :param dataset: :return: """ numFeatures = len(dataset[0]) - 1 baseEntropy = calcEntropy(dataset) bestInfoGain = 0 bestFeature = 0 for i in range(numFeatures): features = [example[i] for example in dataset] uniqueVals = set(features) newEntropy = 0 for value in uniqueVals: subDataSet = splitDataSet(dataset, i, value) prob = len(subDataSet) / float(len(dataset)) newEntropy += prob * calcEntropy(subDataSet) infoGain = baseEntropy - newEntropy if infoGain > bestInfoGain: bestInfoGain = infoGain bestFeature = i return bestFeature def createTree(dataset, labels): """ :param dataset: :param labels: :return: """ classList = [example[-1] for example in dataset] if classList.count(classList[0]) == len(classList): return classList[0] if len(dataset[0]) == 1: return classList bestFeat = chooseBestFeature(dataset) bestFeatLabel = labels[bestFeat] myTree = {bestFeatLabel:{}} del(labels[bestFeat]) featValues = [example[bestFeat] for example in dataset] uniqueVals = set(featValues) for value in uniqueVals: subLabels = labels[:] myTree[bestFeatLabel][value] = createTree(splitDataSet (dataset, bestFeat, value), subLabels) return myTree if __name__ == '__main__': dataset, labels = createDataSet() print(createTree(dataset, labels))
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117相关知识
西瓜书+花书圣经+统计学习方法+南瓜书|四大人工智能名著分享
周志华西瓜书+花书圣经+李航统计学习方法+南瓜书|四大人工智能名著分享
决策树的过拟合问题及解决方案
分类算法3:决策树及R语言实现
基于决策树构建鸢尾花数据的分类模型并绘制决策树模型
基于决策树的水稻病虫害发生程度预测模型——以芜湖市为例
《统计学习方法》第 2 章“感知机”学习笔记
决策树模型——鸢尾花分类 剪枝前后正确率
决策树模型
python利用c4.5决策树对鸢尾花卉数据集进行分类(iris)
原文链接: 统计学习方法 —— 决策树模型 https://www.huajiangbk.com/newsview1911817.html
| 上一篇: 提升树算法详解与应用 | 下一篇: 鸢尾花数据集的决策树模型构建与优... |
推荐分享

- 1明日花キララ:明日花绮罗年度... 4661
- 2君子兰什么品种最名贵 十大名... 4470
- 3世界上最名贵的10种兰花图片... 3778
- 4花圈挽联怎么写? 3716
- 5鲜花养护:帝王花的养殖方法以... 3640
- 6明日花キララ(明日花绮罗)经... 2439
- 7迷信说家里不能放假花 家里摆... 2256
- 8香山红叶什么时候红 1873
- 9十大致癌花卉排行榜,哪些花卉... 1787
- 10花的意思,花的解释,花的拼音... 1582




