基于SVM的多标签数据集分类

cazzi123cazzi123 成员职位:3.贡献我
2019年11月编辑 帮助
我试图将SVM应用于20个新闻组数据集,但没有成功。我已经应用了一些预处理,如标记化,词干和更改大小写。该过程将SVM算子嵌套在一个多标称的二元分类算子中。它运行了几个小时,最后由于内存问题而放弃。

我已经应用了朴素贝叶斯和K-NN,没有任何问题,而且两者都很快完成。

你能看一下下面的过程,并对我如何使用SVM加速分类提出任何建议吗?

数据集有20个标签。

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<过程version = " 5.3.015 " >
> <上下文
<输入>
<位置> / / RM_Repository /项目/ Newsgroups_TrainVector > < /位置
输入> < /
< >输出
< >位置Newsgroups_TrainVectorTH > < /位置
< >位置Newsgroups_TrainVector > < /位置
< / >输出
<宏/ >
> < /上下文

<过程扩展= " true " >

<列出关键= " text_directories " >
<参数键= " alt。无神论" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\alt.atheism"/>
<参数键= " comp。" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\comp.graphics"/>
<参数键= "混杂。forsale" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\misc.forsale"/>
<参数键= "矩形。autos" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\rec.autos"/>
<参数键= " comp.os.ms-windows。misc" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\comp.os.ms-windows.misc"/>
<参数键= " comp.sys.ibm.pc。" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\comp.sys.ibm.pc.hardware"/>
<参数键= " comp.windows。x" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\comp.windows.x"/> .x
<参数键= "矩形。摩托车" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\rec.摩托车"/>
<参数键= " rec.sport。棒球" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\rec.sport.baseball"/>
<参数键= " rec.sport。曲棍球" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\rec.sport.hockey"/>
<参数键= "科学。crypt" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\ science .crypt"/>
<参数键= "科学。电子" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\sci.electronics"/>
<参数键= "科学。" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\sci.med"/>
<参数键= "科学。space" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\ science .space"/>
<参数键= " soc.religion。christian" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\soc.religion.christian"/>
<参数键= " talk.politics。枪械" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\talk.politics.guns"/>
<参数键= " talk.politics。middle " value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\talk.politics. middle "/>
<参数键= " talk.politics。misc" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\talk.politics.misc"/>
<参数键= " talk.religion。misc" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\talk.religion.misc"/>
<参数键= " comp.sys.mac。" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\comp.sys.mac.hardware"/>
< / >列表



<过程扩展= " true " >






< /操作符>









> < /过程
< /操作符>


<过程扩展= " true " >


< /操作符>



<过程扩展= " true " >





> < /过程
< /操作符>

<列出关键= " class_weights " / >
< /操作符>





> < /过程
<过程扩展= " true " >

<列出关键= " application_parameters " / >
< /操作符>


<列出关键= " class_weights " / >
< /操作符>









> < /过程
< /操作符>

<列出关键= " text_directories " >
<参数键= " alt。无神论" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\alt.atheism"/>
<参数键= " comp。" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\comp.graphics"/>
<参数键= "混杂。forsale" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\misc.forsale"/>
<参数键= "矩形。autos" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\rec.autos"/>
<参数键= " comp.os.ms-windows。misc" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\comp.os.ms-windows.misc"/>
<参数键= " comp.sys.ibm.pc。" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\comp.sys.ibm.pc.hardware"/>
<参数键= " comp.windows。x" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-train\comp.windows.x"/> .x
<参数键= "矩形。摩托车" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\rec.motorcycles"/>
<参数键= " rec.sport。棒球" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\rec.sport.baseball"/>
<参数键= " rec.sport。曲棍球" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\rec.sport.hockey"/>
<参数键= "科学。crypt" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\ science .crypt"/>
<参数键= "科学。电子" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\sci.electronics"/>
<参数键= "科学。" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\sci.med"/>
<参数键= "科学。space" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\ science .space"/>
<参数键= " soc.religion。christian" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\soc.religion.christian"/>
<参数键= " talk.politics。枪械" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\talk.politics.guns"/>
<参数键= " talk.politics。middle " value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\talk.politics. middle "/>
<参数键= " talk.politics。misc" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\talk.politics.misc"/>
<参数键= " talk.religion。misc" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\talk.religion.misc"/>
<参数键= " comp.sys.mac。" value="E:\Year 2\Text Mining\Practical\20news-bydate\20news-bydate-test\comp.sys.mac.hardware"/>
< / >列表



<过程扩展= " true " >






< /操作符>









> < /过程
< /操作符>

<列出关键= " application_parameters " / >
< /操作符>









> < /过程
< /操作符>
> < /过程
    登录注册置评。