本文章介绍了如何使用Python语言来实现经典的机器学习算法之一——ID3决策树。通过详细讲解和代码示例,帮助读者理解并应用这一强大的分类模型。
# -*- coding:utf-8 -*-
from numpy import *
import numpy as np
import pandas as pd
from math import log
def calcShannonEnt(dataSet):
numEntries = len(dataSet)
labelCounts = {}
# 给所有可能的分类创建字典
for featVec in dataSet:
currentLabel = featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel] = 0
labelCounts[currentLabel] += 1
shannonEnt = 0.0
# 计算香农熵
for key in labelCounts:
prob = float(labelCounts[key]) / numEntries
shannonEnt -= prob * log(prob, 2)
return shannonEnt