在java代码中用weka
?
import weka.core.Instances; import java.io.BufferedReader; import java.io.FileReader; ... BufferedReader reader = new BufferedReader( new FileReader("/some/where/data.arff")); Instances data = new Instances(reader); reader.close(); // setting class attribute data.setClassIndex(data.numAttributes() - 1);
import weka.core.converters.ConverterUtils.DataSource; ... DataSource source = new DataSource("/some/where/data.arff"); Instances data = source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1);
jdbcDriver=org.gjt.mm.mysql.Driver jdbcURL=jdbc:mysql://localhost:3306/some_databaseSecondly, your Java code needs to look like this to load the data from the database:
import weka.core.Instances; import weka.experiment.InstanceQuery; ... InstanceQuery query = new InstanceQuery(); query.setUsername("nobody"); query.setPassword(""); query.setQuery("select * from whatsoever"); // You can declare that your data set is sparse // query.setSparseData(true); Instances data = query.retrieveInstances();
String[] options = new String[2]; options[0] = "-R"; options[1] = "1";
String[] options = weka.core.Utils.splitOptions("-R 1");
classifiers.functions.SMO
// create new instance of scheme weka.classifiers.functions.SMO scheme = new weka.classifiers.functions.SMO(); // set options scheme.setOptions(weka.core.Utils.splitOptions("-C 1.0 -L 0.0010 -P 1.0E-12 -N 0 -V -1 -W 1 -K "weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0""));Also, the?
filters.unsupervised.attribute.Removewith this option
-R 1If you have an?Instances?object, called?data, you can create and apply the filter like this:
import weka.core.Instances; import weka.filters.Filter; import weka.filters.unsupervised.attribute.Remove; ... String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1"; // first attribute Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options remove.setInputFormat(data); // inform filter about dataset **AFTER** setting options Instances newData = Filter.useFilter(data, remove); // apply filter
import weka.classifiers.meta.FilteredClassifier; import weka.classifiers.trees.J48; import weka.filters.unsupervised.attribute.Remove; ... Instances train = ... // from somewhere Instances test = ... // from somewhere // filter Remove rm = new Remove(); rm.setAttributeIndices("1"); // remove 1st attribute // classifier J48 j48 = new J48(); j48.setUnpruned(true); // using an unpruned J48 // meta-classifier FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(j48); // train and make predictions fc.buildClassifier(train); for (int i = 0; i < test.numInstances(); i++) { double pred = fc.classifyInstance(test.instance(i)); System.out.print("ID: " + test.instance(i).value(0)); System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); System.out.println(", predicted: " + test.classAttribute().value((int) pred)); }
= ... // from somewhere Instances test = ... // from somewhere Standardize filter = new Standardize(); filter.setInputFormat(train); // initializing the filter once with training set Instances newTrain = Filter.useFilter(train, filter); // configures the Filter based on train instances and returns filtered instances Instances newTest = Filter.useFilter(test, filter); // create new test set
classifiers
import weka.classifiers.trees.J48; ... String[] options = new String[1]; options[0] = "-U"; // unpruned tree J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options tree.buildClassifier(data); // build classifier
// load data ArffLoader loader = new ArffLoader(); loader.setFile(new File("/some/where/data.arff")); Instances structure = loader.getStructure(); structure.setClassIndex(structure.numAttributes() - 1);? // train NaiveBayes NaiveBayesUpdateable nb = new NaiveBayesUpdateable(); nb.buildClassifier(structure); Instance current; while ((current = loader.getNextInstance(structure)) != null) nb.updateClassifier(current);
import weka.classifiers.Evaluation; import java.util.Random; ... Evaluation eval = new Evaluation(newData); eval.crossValidateModel(tree, newData, 10, new Random(1));
import weka.core.Instances; import weka.classifiers.Evaluation; import weka.classifiers.trees.J48; ... Instances train = ... // from somewhere Instances test = ... // from somewhere // train classifier Classifier cls = new J48(); cls.buildClassifier(train); // evaluate classifier and print some statistics Evaluation eval = new Evaluation(train); eval.evaluateModel(cls, test); System.out.println(eval.toSummaryString("\nResults\n======\n", false));
import weka.classifiers.trees.J48; import weka.classifiers.Evaluation; ... String[] options = new String[2]; options[0] = "-t"; options[1] = "/some/where/somefile.arff"; System.out.println(Evaluation.evaluateModel(new J48(), options));
import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import weka.core.Instances; ... // load unlabeled data Instances unlabeled = new Instances( new BufferedReader( new FileReader("/some/where/unlabeled.arff")));? // set class attribute unlabeled.setClassIndex(unlabeled.numAttributes() - 1);? // create copy Instances labeled = new Instances(unlabeled);? // label instances for (int i = 0; i < unlabeled.numInstances(); i++) { double clsLabel = tree.classifyInstance(unlabeled.instance(i)); labeled.instance(i).setClassValue(clsLabel); } // save labeled data BufferedWriter writer = new BufferedWriter( new FileWriter("/some/where/labeled.arff")); writer.write(labeled.toString()); writer.newLine(); writer.flush(); writer.close();
System.out.println(clsLabel + " -> " + unlabeled.classAttribute().value((int) clsLabel));
clusterers
import weka.clusterers.EM; ... String[] options = new String[2]; options[0] = "-I"; // max. iterations options[1] = "100"; EM clusterer = new EM(); // new instance of clusterer clusterer.setOptions(options); // set the options clusterer.buildClusterer(data); // build the clusterer
// load data ArffLoader loader = new ArffLoader(); loader.setFile(new File("/some/where/data.arff")); Instances structure = loader.getStructure();? // train Cobweb Cobweb cw = new Cobweb(); cw.buildClusterer(structure); Instance current; while ((current = loader.getNextInstance(structure)) != null) cw.updateClusterer(current); cw.updateFinished();
import weka.clusterers.ClusterEvaluation; import weka.clusterers.Clusterer; ... ClusterEvaluation eval = new ClusterEvaluation(); Clusterer clusterer = new EM(); // new clusterer instance, default options clusterer.buildClusterer(data); // build clusterer eval.setClusterer(clusterer); // the cluster to evaluate eval.evaluateClusterer(newData); // data to evaluate the clusterer on System.out.println("# of clusters: " + eval.getNumClusters()); // output # of clusters
import weka.clusterers.ClusterEvaluation; import weka.clusterers.DensityBasedClusterer; import weka.core.Instances; import java.util.Random; ... Instances data = ... // from somewhere DensityBasedClusterer clusterer = new ... // the clusterer to evaluate double logLikelyhood = ClusterEvaluation.crossValidateModel( // cross-validate clusterer, data, 10, // with 10 folds new Random(1)); // and random number generator with seed 1
import weka.clusterers.EM; import weka.clusterers.ClusterEvaluation; ... String[] options = new String[2]; options[0] = "-t"; options[1] = "/some/where/somefile.arff"; System.out.println(ClusterEvaluation.evaluateClusterer(new EM(), options));
= new Instances(new BufferedReader(new FileReader("/some/where/file.arff"))); data.setClassIndex(data.numAttributes() - 1);
filters.unsupervised.attribute.Remove filter = new weka.filters.unsupervised.attribute.Remove(); filter.setAttributeIndices("" + (data.classIndex() + 1)); filter.setInputFormat(data); Instances dataClusterer = Filter.useFilter(data, filter);
= new EM(); // set further options for EM, if necessary... clusterer.buildClusterer(dataClusterer);
= new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(data);
System.out.println(eval.clusterResultsToString());
= ... // from somewhere AttributeSelectedClassifier classifier = new AttributeSelectedClassifier(); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); J48 base = new J48(); classifier.setClassifier(base); classifier.setEvaluator(eval); classifier.setSearch(search); // 10-fold cross-validation Evaluation evaluation = new Evaluation(data); evaluation.crossValidateModel(classifier, data, 10, new Random(1)); System.out.println(evaluation.toSummaryString());
= ... // from somewhere AttributeSelection filter = new AttributeSelection(); // package weka.filters.supervised.attribute! CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); filter.setEvaluator(eval); filter.setSearch(search); filter.setInputFormat(data); // generate new data Instances newData = Filter.useFilter(data, filter); System.out.println(newData);
= ... // from somewhere AttributeSelection attsel = new AttributeSelection(); // package weka.attributeSelection! CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); attsel.setEvaluator(eval); attsel.setSearch(search); attsel.SelectAttributes(data); // obtain the attribute indices that were selected int[] indices = attsel.selectedAttributes(); System.out.println(Utils.arrayToString(indices));
?