<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" xmlns="http://www.loc.gov/MARC21/slim">
 <record>
  <leader>00000ctm a22000003a 4500</leader>
  <controlfield tag="001">UP-8027390931316196214</controlfield>
  <controlfield tag="003">Buklod</controlfield>
  <controlfield tag="005">20090825110100.0</controlfield>
  <controlfield tag="006">a     r    |||| u|</controlfield>
  <controlfield tag="007">ta</controlfield>
  <controlfield tag="008">090825s        xx     d     r    |||| u|</controlfield>
  <datafield tag="035" ind1=" " ind2=" ">
   <subfield code="a">(iLib)UPMIN-00003211650</subfield>
  </datafield>
  <datafield tag="040" ind1=" " ind2=" ">
   <subfield code="a">DLC</subfield>
   <subfield code="d">LC</subfield>
   <subfield code="d">upmin</subfield>
  </datafield>
  <datafield tag="041" ind1=" " ind2=" ">
   <subfield code="a">eng</subfield>
  </datafield>
  <datafield tag="090" ind1=" " ind2="0">
   <subfield code="a">LG993.5 2008</subfield>
   <subfield code="b">A64 P44</subfield>
  </datafield>
  <datafield tag="100" ind1="1" ind2=" ">
   <subfield code="a">Pelpinosas, Frank B.</subfield>
  </datafield>
  <datafield tag="245" ind1="0" ind2="0">
   <subfield code="a">Clustering of datasets with missing values using principal feature analysis as a feature selection tool</subfield>
   <subfield code="c">Frank B. Pelpinosas.</subfield>
  </datafield>
  <datafield tag="264" ind1=" " ind2="1">
   <subfield code="c">2008</subfield>
  </datafield>
  <datafield tag="300" ind1=" " ind2=" ">
   <subfield code="a">51 leaves.</subfield>
  </datafield>
  <datafield tag="502" ind1=" " ind2=" ">
   <subfield code="a">Thesis (BS Applied Mathematics) -- University of the Philippines Mindanao, 2008</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
   <subfield code="a">One of the most prevalent problems</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
   <subfield code="a">One of the most prevalent problems in clustering is the presence of redundant and irrelevant features, which could damage and misguide the clustering results of the data. Principal Feature Analysis is used as a filter feature selection tool to reduce highly dimensional datasets into smaller dimensions yet preserving the original structure of the data. The problem is worsened with the presence of missing values in the data. The study provides a comparison of the clustering results of the complete (base) datasets and imputed datasets using K-NN and mean imputation across three levels of degradation. The features retained by PFA were used to cluster the samples and were assessed using the Adjusted Rand Index. Results showed that PFA indeed had reduced the dimensions of the data. Principal Feature Analysis also can hardly drop some feature seven when charges in the levels of degradation appear. Both feature retention and cluster recovery were negatively affected by the number of missing values in the data in all the comparison</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
   <subfield code="a">Clustering.</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
   <subfield code="a">Feature selections.</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
   <subfield code="a">Missing values.</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
   <subfield code="a">PFA Principal feature analysis.</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
   <subfield code="a">Datasets.</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
   <subfield code="a">Adjusted Rand Index.</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
   <subfield code="a">MCAR (Missing completely at random)</subfield>
  </datafield>
  <datafield tag="658" ind1=" " ind2=" ">
   <subfield code="a">Undergraduate Thesis</subfield>
   <subfield code="c">AMAT200</subfield>
   <subfield code="2">BSAM.</subfield>
  </datafield>
  <datafield tag="905" ind1=" " ind2=" ">
   <subfield code="a">FI</subfield>
  </datafield>
  <datafield tag="905" ind1=" " ind2=" ">
   <subfield code="a">UP</subfield>
  </datafield>
  <datafield tag="852" ind1="0" ind2=" ">
   <subfield code="a">UPMIN</subfield>
   <subfield code="b">UPMIN-MAIN</subfield>
   <subfield code="h">LG993.5 2008</subfield>
   <subfield code="i">A64 P44</subfield>
  </datafield>
  <datafield tag="942" ind1=" " ind2=" ">
   <subfield code="a">Thesis</subfield>
  </datafield>
 </record>
</collection>
