<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>Dive into Data Science</title>
    <link>https://dive-into-ds.tistory.com/</link>
    <description></description>
    <language>ko</language>
    <pubDate>Wed, 8 Apr 2026 14:43:59 +0900</pubDate>
    <generator>TISTORY</generator>
    <ttl>100</ttl>
    <managingEditor>고슴군</managingEditor>
    <item>
      <title>RNN Auto-Encoder (RAE)</title>
      <link>https://dive-into-ds.tistory.com/122</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;RNN Auto-Encoder (RAE) : 순차 데이터를 복원하는 오토 인코더&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;978&quot; data-origin-height=&quot;485&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/MRvir/btrN5GloWQA/4dRFWtnVVfkOHKlf0cYGX0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/MRvir/btrN5GloWQA/4dRFWtnVVfkOHKlf0cYGX0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/MRvir/btrN5GloWQA/4dRFWtnVVfkOHKlf0cYGX0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMRvir%2FbtrN5GloWQA%2F4dRFWtnVVfkOHKlf0cYGX0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;719&quot; height=&quot;357&quot; data-origin-width=&quot;978&quot; data-origin-height=&quot;485&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1023&quot; data-origin-height=&quot;567&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/YCCyV/btrN5jxdezb/OrGpAwWxU485A0p0nTObOk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/YCCyV/btrN5jxdezb/OrGpAwWxU485A0p0nTObOk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/YCCyV/btrN5jxdezb/OrGpAwWxU485A0p0nTObOk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FYCCyV%2FbtrN5jxdezb%2FOrGpAwWxU485A0p0nTObOk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1023&quot; height=&quot;567&quot; data-origin-width=&quot;1023&quot; data-origin-height=&quot;567&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;950&quot; data-origin-height=&quot;567&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/oiRzt/btrN78V3c4F/6VsdsRxP69oRcpAhuVOiUk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/oiRzt/btrN78V3c4F/6VsdsRxP69oRcpAhuVOiUk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/oiRzt/btrN78V3c4F/6VsdsRxP69oRcpAhuVOiUk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FoiRzt%2FbtrN78V3c4F%2F6VsdsRxP69oRcpAhuVOiUk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;803&quot; height=&quot;479&quot; data-origin-width=&quot;950&quot; data-origin-height=&quot;567&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;</description>
      <category>Deep Learning</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/122</guid>
      <comments>https://dive-into-ds.tistory.com/122#entry122comment</comments>
      <pubDate>Sat, 8 Oct 2022 14:22:52 +0900</pubDate>
    </item>
    <item>
      <title>Attention in RNN (Bahadanau, Luong)</title>
      <link>https://dive-into-ds.tistory.com/121</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Attention
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;어느 시점 정보가 RNN의 최종 출력 값에 영향을 미치는지를 알려줄 수 있는 메커니즘&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;960&quot; data-origin-height=&quot;654&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/tKqAQ/btrN3ngazOS/dB1G5XkBebqhtIdQM0lph1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/tKqAQ/btrN3ngazOS/dB1G5XkBebqhtIdQM0lph1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/tKqAQ/btrN3ngazOS/dB1G5XkBebqhtIdQM0lph1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FtKqAQ%2FbtrN3ngazOS%2FdB1G5XkBebqhtIdQM0lph1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;616&quot; height=&quot;420&quot; data-origin-width=&quot;960&quot; data-origin-height=&quot;654&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;①&amp;nbsp;각각의 hidden state가 어느 정도의 중요도를 갖는지(=attention score) 산출하는 NN&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;② &lt;span style=&quot;background-color: #ffffff; color: #4d5156;&quot;&gt;&amp;alpha;&lt;/span&gt; = attention score&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;③ 기존 hidden state를 그대로 가져오는게 아니라, 현재 시점의 output을 만드는데 중요하게 역할을 하는 시점이 어느 시점인지를 scalar 값으로 산출하고, 이렇게 산출된 scalar 값(&lt;span style=&quot;background-color: #ffffff; color: #4d5156;&quot;&gt;&amp;alpha;)&lt;/span&gt;과 hidden state 들을 선형 결합해서 하나의 vector로 표현함 &amp;rarr;&amp;nbsp; context vector&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1508&quot; data-origin-height=&quot;794&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bbjxI6/btrN4Enpx0P/zldWXjecE0BVVgUnOfHPu1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bbjxI6/btrN4Enpx0P/zldWXjecE0BVVgUnOfHPu1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bbjxI6/btrN4Enpx0P/zldWXjecE0BVVgUnOfHPu1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbbjxI6%2FbtrN4Enpx0P%2FzldWXjecE0BVVgUnOfHPu1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1508&quot; height=&quot;794&quot; data-origin-width=&quot;1508&quot; data-origin-height=&quot;794&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Luong attention은 attention score를 따로 학습하지는 않지만, Bahdanau attention 의 성능과 크게 차이가 나지 않기 때문에, Luong attention을 조금 더 효율적으로 사용할 수 있다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Vanilla RNN (LSTM, GRU)&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1017&quot; data-origin-height=&quot;506&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bnzfhC/btrN23bgLsQ/nimow8dDvpenzy8gl1edL0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bnzfhC/btrN23bgLsQ/nimow8dDvpenzy8gl1edL0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bnzfhC/btrN23bgLsQ/nimow8dDvpenzy8gl1edL0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbnzfhC%2FbtrN23bgLsQ%2Fnimow8dDvpenzy8gl1edL0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;667&quot; height=&quot;332&quot; data-origin-width=&quot;1017&quot; data-origin-height=&quot;506&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Luong attention
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Attention 이 있으면 context vector를 만들어낸다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1019&quot; data-origin-height=&quot;820&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dk1TWa/btrN3ntI5tc/b99bacvlMLZaUPn0WKVtO0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dk1TWa/btrN3ntI5tc/b99bacvlMLZaUPn0WKVtO0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dk1TWa/btrN3ntI5tc/b99bacvlMLZaUPn0WKVtO0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fdk1TWa%2FbtrN3ntI5tc%2Fb99bacvlMLZaUPn0WKVtO0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;570&quot; height=&quot;459&quot; data-origin-width=&quot;1019&quot; data-origin-height=&quot;820&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;1. Attention score (&lt;span style=&quot;background-color: #ffffff; color: #4d5156;&quot;&gt;&amp;alpha;&lt;/span&gt;)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- score(h*, hi) 의 계산을, Bahdanau attention에서는 학습하지만, Luong attention 에서는 logic으로 계산한다.&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 가장 간단한 logic은 벡터의 내적. 벡터의 내적이 높다는 것은 두 벡터의 유사성이 높다는 것.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1419&quot; data-origin-height=&quot;843&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/MUwb6/btrN3eKsFJI/fDpK7aIe7X50FalksdJvFk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/MUwb6/btrN3eKsFJI/fDpK7aIe7X50FalksdJvFk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/MUwb6/btrN3eKsFJI/fDpK7aIe7X50FalksdJvFk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMUwb6%2FbtrN3eKsFJI%2FfDpK7aIe7X50FalksdJvFk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1419&quot; height=&quot;843&quot; data-origin-width=&quot;1419&quot; data-origin-height=&quot;843&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;2. Context vector&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1356&quot; data-origin-height=&quot;821&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bFmVPW/btrN38P0GHo/1bNt7kVK5t1mlASB83hq80/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bFmVPW/btrN38P0GHo/1bNt7kVK5t1mlASB83hq80/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bFmVPW/btrN38P0GHo/1bNt7kVK5t1mlASB83hq80/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbFmVPW%2FbtrN38P0GHo%2F1bNt7kVK5t1mlASB83hq80%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1356&quot; height=&quot;821&quot; data-origin-width=&quot;1356&quot; data-origin-height=&quot;821&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;3. Attention이 고려된 hidden state&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- h* : decoder 단에서의 hidden state가 지금 하나라고 가정한 것&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1410&quot; data-origin-height=&quot;809&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/CtpN9/btrN40qiCJp/qSNE4rVPEM7gEcg6QY6e51/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/CtpN9/btrN40qiCJp/qSNE4rVPEM7gEcg6QY6e51/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/CtpN9/btrN40qiCJp/qSNE4rVPEM7gEcg6QY6e51/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FCtpN9%2FbtrN40qiCJp%2FqSNE4rVPEM7gEcg6QY6e51%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;766&quot; height=&quot;439&quot; data-origin-width=&quot;1410&quot; data-origin-height=&quot;809&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;4. output 계산&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 분류 문제일 경우 softmax 취함&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1386&quot; data-origin-height=&quot;818&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/s12nR/btrN399dMIZ/AcLWG3gOPz67AtAAKpnA6K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/s12nR/btrN399dMIZ/AcLWG3gOPz67AtAAKpnA6K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/s12nR/btrN399dMIZ/AcLWG3gOPz67AtAAKpnA6K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fs12nR%2FbtrN399dMIZ%2FAcLWG3gOPz67AtAAKpnA6K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;747&quot; height=&quot;441&quot; data-origin-width=&quot;1386&quot; data-origin-height=&quot;818&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Decoder의 단계마다, &lt;span style=&quot;background-color: #ffffff; color: #4d5156;&quot;&gt;&amp;alpha; 값이 다르게 산출된다.&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1470&quot; data-origin-height=&quot;609&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/v9GqR/btrN4ZdTa1t/wnsUquL6LUoKcbSpKG4kA0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/v9GqR/btrN4ZdTa1t/wnsUquL6LUoKcbSpKG4kA0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/v9GqR/btrN4ZdTa1t/wnsUquL6LUoKcbSpKG4kA0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fv9GqR%2FbtrN4ZdTa1t%2FwnsUquL6LUoKcbSpKG4kA0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1470&quot; height=&quot;609&quot; data-origin-width=&quot;1470&quot; data-origin-height=&quot;609&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1417&quot; data-origin-height=&quot;769&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ciM0pT/btrN41bJuAl/U8HIi8g3lWOkVnZJ71kKoK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ciM0pT/btrN41bJuAl/U8HIi8g3lWOkVnZJ71kKoK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ciM0pT/btrN41bJuAl/U8HIi8g3lWOkVnZJ71kKoK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FciM0pT%2FbtrN41bJuAl%2FU8HIi8g3lWOkVnZJ71kKoK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1417&quot; height=&quot;769&quot; data-origin-width=&quot;1417&quot; data-origin-height=&quot;769&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;예시 : Blood pressure 예측&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- 3개의 센서값을 이용(multivariate)해서 5초씩 window로 묶어서 예측한다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- input data는 3개의 feature를 갖고 있는 것이고, 길이는 5초일 것&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1185&quot; data-origin-height=&quot;679&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/xgLDR/btrN4af6hZF/eyuG8t6oJKwzc6Aa7moxc0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/xgLDR/btrN4af6hZF/eyuG8t6oJKwzc6Aa7moxc0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/xgLDR/btrN4af6hZF/eyuG8t6oJKwzc6Aa7moxc0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FxgLDR%2FbtrN4af6hZF%2FeyuG8t6oJKwzc6Aa7moxc0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;788&quot; height=&quot;452&quot; data-origin-width=&quot;1185&quot; data-origin-height=&quot;679&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;- blood pressure가 갑자기 높아졌을 때, attention score로 shading을 했을 때 영역이 다른 것을 볼 수 있다. 즉, 이러한 영역 안에서의 파동이나 패턴이 일반적인 상황과 어떻게 다른지 해석해볼 수 있는 것&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1354&quot; data-origin-height=&quot;763&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/db5OsJ/btrN3VpTRkx/T970OSt4G7xSTFH38M0zQk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/db5OsJ/btrN3VpTRkx/T970OSt4G7xSTFH38M0zQk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/db5OsJ/btrN3VpTRkx/T970OSt4G7xSTFH38M0zQk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fdb5OsJ%2FbtrN3VpTRkx%2FT970OSt4G7xSTFH38M0zQk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1354&quot; height=&quot;763&quot; data-origin-width=&quot;1354&quot; data-origin-height=&quot;763&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;</description>
      <category>Deep Learning/논문 리뷰</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/121</guid>
      <comments>https://dive-into-ds.tistory.com/121#entry121comment</comments>
      <pubDate>Fri, 7 Oct 2022 19:17:40 +0900</pubDate>
    </item>
    <item>
      <title>Ensemble model - CatBoost</title>
      <link>https://dive-into-ds.tistory.com/120</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Gradient Boosting Model (GBM)의 Background&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;마지막 행 : negative gradient는 잔차를 의미한다. 잔차를 target value로 두고 squared loss를 최소화하는 함수를 찾아내는 것.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1607&quot; data-origin-height=&quot;789&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/F1Bcw/btrHZC3rgoP/SVuSKzok2xnmoT8KAdmbgk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/F1Bcw/btrHZC3rgoP/SVuSKzok2xnmoT8KAdmbgk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/F1Bcw/btrHZC3rgoP/SVuSKzok2xnmoT8KAdmbgk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FF1Bcw%2FbtrHZC3rgoP%2FSVuSKzok2xnmoT8KAdmbgk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1607&quot; height=&quot;789&quot; data-origin-width=&quot;1607&quot; data-origin-height=&quot;789&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1131&quot; data-origin-height=&quot;808&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ceCuRp/btrHZePErGh/FFznGl1EI41wgA377u2tA0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ceCuRp/btrHZePErGh/FFznGl1EI41wgA377u2tA0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ceCuRp/btrHZePErGh/FFznGl1EI41wgA377u2tA0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FceCuRp%2FbtrHZePErGh%2FFFznGl1EI41wgA377u2tA0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1131&quot; height=&quot;808&quot; data-origin-width=&quot;1131&quot; data-origin-height=&quot;808&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;train data에 대한 t-1 시점의 함수값과 test data에 대한 t-1 시점의 함수값이 shift 된다, 다르다는 것.
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;같아야 통계적으로 좋은 모델이다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1340&quot; data-origin-height=&quot;718&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bAGqyN/btrHWu6QFyZ/OwRJHmnXKoXjHdNrPMkfy1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bAGqyN/btrHWu6QFyZ/OwRJHmnXKoXjHdNrPMkfy1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bAGqyN/btrHWu6QFyZ/OwRJHmnXKoXjHdNrPMkfy1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbAGqyN%2FbtrHWu6QFyZ%2FOwRJHmnXKoXjHdNrPMkfy1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1340&quot; height=&quot;718&quot; data-origin-width=&quot;1340&quot; data-origin-height=&quot;718&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Gradint Boosting의 두 번째 issue&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1560&quot; data-origin-height=&quot;807&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/Si6JU/btrHYYM7jRU/8WMVka7hR1W4lCngQUEC5K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/Si6JU/btrHYYM7jRU/8WMVka7hR1W4lCngQUEC5K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/Si6JU/btrHYYM7jRU/8WMVka7hR1W4lCngQUEC5K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FSi6JU%2FbtrHYYM7jRU%2F8WMVka7hR1W4lCngQUEC5K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1560&quot; height=&quot;807&quot; data-origin-width=&quot;1560&quot; data-origin-height=&quot;807&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1383&quot; data-origin-height=&quot;754&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bEZYX4/btrHYKhfKBI/Kkog6NsXfaMdlGLFBKs7f1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bEZYX4/btrHYKhfKBI/Kkog6NsXfaMdlGLFBKs7f1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bEZYX4/btrHYKhfKBI/Kkog6NsXfaMdlGLFBKs7f1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbEZYX4%2FbtrHYKhfKBI%2FKkog6NsXfaMdlGLFBKs7f1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1383&quot; height=&quot;754&quot; data-origin-width=&quot;1383&quot; data-origin-height=&quot;754&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Ordered TS&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1312&quot; data-origin-height=&quot;764&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/A5kCN/btrHVBkV3CM/04KADLs2E3IHMOtT6RYVYK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/A5kCN/btrHVBkV3CM/04KADLs2E3IHMOtT6RYVYK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/A5kCN/btrHVBkV3CM/04KADLs2E3IHMOtT6RYVYK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FA5kCN%2FbtrHVBkV3CM%2F04KADLs2E3IHMOtT6RYVYK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1312&quot; height=&quot;764&quot; data-origin-width=&quot;1312&quot; data-origin-height=&quot;764&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1368&quot; data-origin-height=&quot;735&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bMIVAn/btrHZDVMdIF/obVFkTjBRI2sgKO7IYXrxK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bMIVAn/btrHZDVMdIF/obVFkTjBRI2sgKO7IYXrxK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bMIVAn/btrHZDVMdIF/obVFkTjBRI2sgKO7IYXrxK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbMIVAn%2FbtrHZDVMdIF%2FobVFkTjBRI2sgKO7IYXrxK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1368&quot; height=&quot;735&quot; data-origin-width=&quot;1368&quot; data-origin-height=&quot;735&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;만약, categorical 변수의 값이 c 인게 1개 있으면, TS로 계산하면 1이 된다
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;이 경우를 우려하는 거다. outlier로 c가 1개 들어왔는데, TS로 계산하면 1의 값을 가지게 된다.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;그래서 a와 p라는 파라미터를 사용하여 smoothing 시키게 된다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1603&quot; data-origin-height=&quot;598&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/Mly6c/btrHXUEvxJT/EJRtCHPk03KJiafkjHuY9k/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/Mly6c/btrHXUEvxJT/EJRtCHPk03KJiafkjHuY9k/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/Mly6c/btrHXUEvxJT/EJRtCHPk03KJiafkjHuY9k/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMly6c%2FbtrHXUEvxJT%2FEJRtCHPk03KJiafkjHuY9k%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1603&quot; height=&quot;598&quot; data-origin-width=&quot;1603&quot; data-origin-height=&quot;598&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1381&quot; data-origin-height=&quot;768&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/k2t75/btrH1atgygf/3kH0wLMASmeFXUV9ccd5Xk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/k2t75/btrH1atgygf/3kH0wLMASmeFXUV9ccd5Xk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/k2t75/btrH1atgygf/3kH0wLMASmeFXUV9ccd5Xk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fk2t75%2FbtrH1atgygf%2F3kH0wLMASmeFXUV9ccd5Xk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1381&quot; height=&quot;768&quot; data-origin-width=&quot;1381&quot; data-origin-height=&quot;768&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1344&quot; data-origin-height=&quot;765&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bip65x/btrH1A6jsTN/2oAVdEDIIfJrR2TdDfKjxK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bip65x/btrH1A6jsTN/2oAVdEDIIfJrR2TdDfKjxK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bip65x/btrH1A6jsTN/2oAVdEDIIfJrR2TdDfKjxK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fbip65x%2FbtrH1A6jsTN%2F2oAVdEDIIfJrR2TdDfKjxK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1344&quot; height=&quot;765&quot; data-origin-width=&quot;1344&quot; data-origin-height=&quot;765&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1313&quot; data-origin-height=&quot;692&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bciHbX/btrH5e2gGCK/TKKeIRbC2qfntGQmjCdhe0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bciHbX/btrH5e2gGCK/TKKeIRbC2qfntGQmjCdhe0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bciHbX/btrH5e2gGCK/TKKeIRbC2qfntGQmjCdhe0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbciHbX%2FbtrH5e2gGCK%2FTKKeIRbC2qfntGQmjCdhe0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1313&quot; height=&quot;692&quot; data-origin-width=&quot;1313&quot; data-origin-height=&quot;692&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1256&quot; data-origin-height=&quot;668&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/MCWAI/btrH0cSVWfE/k9PYxsP92tC4uD4h2uztDk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/MCWAI/btrH0cSVWfE/k9PYxsP92tC4uD4h2uztDk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/MCWAI/btrH0cSVWfE/k9PYxsP92tC4uD4h2uztDk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMCWAI%2FbtrH0cSVWfE%2Fk9PYxsP92tC4uD4h2uztDk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1256&quot; height=&quot;668&quot; data-origin-width=&quot;1256&quot; data-origin-height=&quot;668&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;이렇게 계산한 값 조차 target leakage를 피할 수는 없다.&amp;nbsp;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;이 외에도 target leakage를 피할 수 있는 기법에 대해 여러 제안이 있었지만 (강의에서는 설명함), 결국 target leakage를 피할 수 없었다.&lt;/li&gt;
&lt;li&gt;이러한 약점을 보완하기 위해, catboost는 ordered TS를 사용한다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1497&quot; data-origin-height=&quot;659&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bJKDBO/btrH5dChMot/FVAOdBzi7KCQCXjeUxaLW0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bJKDBO/btrH5dChMot/FVAOdBzi7KCQCXjeUxaLW0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bJKDBO/btrH5dChMot/FVAOdBzi7KCQCXjeUxaLW0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbJKDBO%2FbtrH5dChMot%2FFVAOdBzi7KCQCXjeUxaLW0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1497&quot; height=&quot;659&quot; data-origin-width=&quot;1497&quot; data-origin-height=&quot;659&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Ordered TS
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;인공적인 시간을 도입해서, 인공적인 시간 순서를 가지고 와서 이것을 바탕으로 앞서서 존재하는 객체들의 값을 활용하여 계산한다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1338&quot; data-origin-height=&quot;783&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dP1uId/btrH0bmg6xF/FcLvkEQ3S0MI2KFsscdn3K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dP1uId/btrH0bmg6xF/FcLvkEQ3S0MI2KFsscdn3K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dP1uId/btrH0bmg6xF/FcLvkEQ3S0MI2KFsscdn3K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdP1uId%2FbtrH0bmg6xF%2FFcLvkEQ3S0MI2KFsscdn3K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1338&quot; height=&quot;783&quot; data-origin-width=&quot;1338&quot; data-origin-height=&quot;783&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Conditional shift를 방지하기 위해 ordered boosting을 사용한다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1574&quot; data-origin-height=&quot;723&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bbOrC0/btrH1CiL4cf/yXmh95kEqcIYUr3lzZxIo1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bbOrC0/btrH1CiL4cf/yXmh95kEqcIYUr3lzZxIo1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bbOrC0/btrH1CiL4cf/yXmh95kEqcIYUr3lzZxIo1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbbOrC0%2FbtrH1CiL4cf%2FyXmh95kEqcIYUr3lzZxIo1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1574&quot; height=&quot;723&quot; data-origin-width=&quot;1574&quot; data-origin-height=&quot;723&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1143&quot; data-origin-height=&quot;785&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dIUdu6/btrHZ6ZbJzK/acArmQ7c6rgyqE7tdBvAZ0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dIUdu6/btrHZ6ZbJzK/acArmQ7c6rgyqE7tdBvAZ0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dIUdu6/btrHZ6ZbJzK/acArmQ7c6rgyqE7tdBvAZ0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdIUdu6%2FbtrHZ6ZbJzK%2FacArmQ7c6rgyqE7tdBvAZ0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1143&quot; height=&quot;785&quot; data-origin-width=&quot;1143&quot; data-origin-height=&quot;785&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1354&quot; data-origin-height=&quot;788&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cBzi3w/btrH0RgskNU/mjV07qasePplcxYQ5LKbxK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cBzi3w/btrH0RgskNU/mjV07qasePplcxYQ5LKbxK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cBzi3w/btrH0RgskNU/mjV07qasePplcxYQ5LKbxK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcBzi3w%2FbtrH0RgskNU%2FmjV07qasePplcxYQ5LKbxK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1354&quot; height=&quot;788&quot; data-origin-width=&quot;1354&quot; data-origin-height=&quot;788&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;XGBoost, LGBM, Catboost 중 상황에 따라서 최적의 모델이 다르기 때문에, 이 세 가지를 모두 자유롭게 활용할 수 있는 상태로 본인을 준비시키는게 좋은 전략이다.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Catboost 장단점&amp;nbsp;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;a href=&quot;https://velog.io/@tjddls321/CatBoost&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot;&gt;https://velog.io/@tjddls321/CatBoost&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&quot;catboost란&quot; data-ke-size=&quot;size26&quot;&gt;[CatBoost란?]&lt;/h2&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;GBM의 치명적인 문제점 중 하나로 과적합 문제가 존재합니다. 이 과적합 문제를 해결하면서 동시에 기존 GBM계열의 알고리즘인 XGBoost, LightGBM 알고리즘보다 학습 속도를 개선하는 장점을 앞세워 개발되었습니다.
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;GBM은 최초의 데이터를 학습할 때만 예측값을 기반으로 하고 그 이후의 데이터를 학습할 때는 예측값을 활용해서 계산한 잔차에만 포커스를 맞추어서 학습하게 됩니다.&lt;/li&gt;
&lt;li&gt;하지만 이렇게 잔차에만 포커스를 맞추어 학습하는 것이 매우 이상적이라고 보일지도 모르겠지만 모델이 본적 없는 데이터에는 예측을 잘 하지 못하는 과적합(Overfitting) 문제를 유발할 가능성이 매우 높다는 것이 치명적인 단점입니다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;범주형 변수의 예측모델에 최적화된 모델입니다.&lt;/li&gt;
&lt;li&gt;기존의 그래디언트 부스팅 알고리즘을 조작하여 타겟 누수(target leakage)를 개선합니다.
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;target leakage는 예측 시점에서 사용할 수 없는 데이터가 데이터셋에 포함되는 오류를 말합니다.&lt;/li&gt;
&lt;li&gt;즉, 모델이 독립변수들인 x만을 활용하여 종속변수인 y를 예측해야 하는데, y에 대한 정보가 x에 포함되어 있는 경우를 말합니다.&lt;/li&gt;
&lt;li&gt;기존의 그래디언트 부스팅 방법들은 손실함수를 target value에 대해 편미분한 그래디언트 값을 활용하기에 좋은 아이디어 같지만, target value를 활용하여 생기는 target leakage로 인해 training/test 데이터 셋의 output의 분포에 차이가 생기게 되고 오버피팅을 발생시킵니다.&lt;/li&gt;
&lt;li&gt;즉, 다음 스텝의 트리를 만들 때, 이전에 사용했던 데이터를 다시 재사용하여 과적합이 쉽게 되었습니다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;XGBoost, LightGBM이 Hyper-parameter에 따라 성능이 달라지는 민감한 문제를 해결하는 것에도 초점을 맞추었습니다.&lt;/li&gt;
&lt;/ul&gt;
&lt;h2 id=&quot;catboost의-장단점&quot; data-ke-size=&quot;size26&quot;&gt;[Catboost의 장단점]&lt;/h2&gt;
&lt;h3 id=&quot;1-장점&quot; data-ke-size=&quot;size23&quot;&gt;1. 장점&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;다른 GBM에 비해 overfitting이 적다.&lt;/li&gt;
&lt;li&gt;범주형 변수에 대해 특정 인코딩 방식으로 인하여 모델의 정확도와 속도가 높습니다.&lt;/li&gt;
&lt;li&gt;One-hot Encoding, Label Encoding 등 encoding 작업을 하지 않고도 그대로 모델의 input으로 사용할 수 있습니다.&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 id=&quot;2-단점&quot; data-ke-size=&quot;size23&quot;&gt;2. 단점&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;missing data를 처리해주지 않습니다.&lt;b&gt; (xgboost는 default로 missing value 처리해준다)&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;Sparse Matrix 즉, 결측치가 매우 많은 데이터셋에는 부적합한 모델입니다.&amp;nbsp;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;예를 들어, 추천시스템에 자주 사용되는 사용자-아이템 행렬 데이터를 살펴보면 보통 Sparse한 형태로 이루어져 있습니다. 만약 이러한 데이터를 활용하려면 Sparse한 특성이 없도록 Embedding을 적용한다던지 등 데이터를 변형한 후 CatBoost에 활용하는 것이 적합할 것입니다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;데이터 대부분이 수치형 변수인 경우, LightGBM보다 학습 속도가 느립니다. (즉 대부분이 범주형 변수인 경우 사용)&lt;/li&gt;
&lt;/ul&gt;</description>
      <category>Machine Learning/Algorithm</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/120</guid>
      <comments>https://dive-into-ds.tistory.com/120#entry120comment</comments>
      <pubDate>Fri, 22 Jul 2022 16:47:29 +0900</pubDate>
    </item>
    <item>
      <title>Ensemble model - LightGBM</title>
      <link>https://dive-into-ds.tistory.com/119</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Microsoft 에서 개발 되었고, XGBoost 보다 나중에 제안된 모델&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Category 변수가 많은 데이터에 대해서 상당한 효과를 보는 알고리즘이다. 데이터의 특성을 잘 살펴서, 범주형 변수가 많을 경우 LightGBM을 우선순위로 염두에 두는 것도 좋다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1569&quot; data-origin-height=&quot;765&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/20vXp/btrHVhsZZwY/JQ304aQ0WFZfnIUqf10KWK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/20vXp/btrHVhsZZwY/JQ304aQ0WFZfnIUqf10KWK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/20vXp/btrHVhsZZwY/JQ304aQ0WFZfnIUqf10KWK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F20vXp%2FbtrHVhsZZwY%2FJQ304aQ0WFZfnIUqf10KWK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1569&quot; height=&quot;765&quot; data-origin-width=&quot;1569&quot; data-origin-height=&quot;765&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;그래서 XGBoost에서도 모든 데이터를 스캔하지 않고, 부분부분으로 나누어서 스캔했었다.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Gradient-based One-Side Sampling (GOSS)&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;상대적으로 gradient가 클 수록, 많이 틀렸다는 의미이니까 걔네들에 대한 정보를 많이 반영해야함&lt;/li&gt;
&lt;li&gt;graidnet가 작을수록 틀린 정도가 크지 않기 때문에 해당 데이터 포인트들은 다음 모델을 구축할 때 덜 중요하게 생각되어도 된다.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;따라서, Large gradient를 가진 데이터는 keep하고, small gradient를 가지면 randomly drop하자.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;즉, 스캔을 해야하는 데이터의 총량을 좀 줄이자!&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Exclusive Feature Bundling (EFB)&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;변수 길이를 줄이자&lt;/li&gt;
&lt;li&gt;&lt;b&gt;모든 변수에 대해서 모든 개체를 scan해서 split point를 찾아야 하니, GOSS는 관측치 수 줄이는거고, EFB는 변수의 수 줄이는거다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;sparse한 변수는 좀 줄이자!&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;GOSS&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1285&quot; data-origin-height=&quot;753&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/XH7y6/btrHXVbDLYk/3jEGozA0L1zeMBTuwF9N0K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/XH7y6/btrHXVbDLYk/3jEGozA0L1zeMBTuwF9N0K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/XH7y6/btrHXVbDLYk/3jEGozA0L1zeMBTuwF9N0K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FXH7y6%2FbtrHXVbDLYk%2F3jEGozA0L1zeMBTuwF9N0K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1285&quot; height=&quot;753&quot; data-origin-width=&quot;1285&quot; data-origin-height=&quot;753&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1438&quot; data-origin-height=&quot;757&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/KR3rt/btrHVAlq7qR/0tSyarwEkNaXgJg3dJIyvk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/KR3rt/btrHVAlq7qR/0tSyarwEkNaXgJg3dJIyvk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/KR3rt/btrHVAlq7qR/0tSyarwEkNaXgJg3dJIyvk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKR3rt%2FbtrHVAlq7qR%2F0tSyarwEkNaXgJg3dJIyvk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1438&quot; height=&quot;757&quot; data-origin-width=&quot;1438&quot; data-origin-height=&quot;757&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;알파(a)와 베타(b) 하이퍼파라미터 사용한다.
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;(1-a)/b = 1 보다 크도록 조절한다.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;a가 작을수록, b가 작을수록 실제 탐색하는 데이터의 수가 줄어들기 때문에 효율성은 증가하지만 정확도에 대해서 손실을 볼 수 있는 잠재적인 risk 가 있다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&amp;nbsp;Top a * 100% 데이터는 모두 남겨놓는다.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;a=0.2, b=0.5, (1-a)/b=1.6&amp;gt;1&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Gradient 상위 20%는 split 포인트 찾는데에 모두 사용하겠다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Gradinet 하위 80%에 대해서는, 50%만 sampling 해서 얘네들만 사용해서 split 포인트 찾는데에 사용하겠다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;EFB&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1322&quot; data-origin-height=&quot;562&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/IGpHA/btrHYKHMJ0G/9tJ4nKGpv7fUST0nk1EvN1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/IGpHA/btrHYKHMJ0G/9tJ4nKGpv7fUST0nk1EvN1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/IGpHA/btrHYKHMJ0G/9tJ4nKGpv7fUST0nk1EvN1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FIGpHA%2FbtrHYKHMJ0G%2F9tJ4nKGpv7fUST0nk1EvN1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1322&quot; height=&quot;562&quot; data-origin-width=&quot;1322&quot; data-origin-height=&quot;562&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1346&quot; data-origin-height=&quot;710&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/NQlws/btrHZj36pQ1/32AqrMR2PvvUijvd0jdcP1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/NQlws/btrHZj36pQ1/32AqrMR2PvvUijvd0jdcP1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/NQlws/btrHZj36pQ1/32AqrMR2PvvUijvd0jdcP1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FNQlws%2FbtrHZj36pQ1%2F32AqrMR2PvvUijvd0jdcP1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1346&quot; height=&quot;710&quot; data-origin-width=&quot;1346&quot; data-origin-height=&quot;710&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&amp;nbsp;차례로, 4가지 색깔만 있으면 인접한 노드끼리 다른 색이면서 연결시킬 수 있다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1368&quot; data-origin-height=&quot;663&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/825h9/btrHY5SzfOx/mHehdDYTsFuPq2Ff6UBfk0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/825h9/btrHY5SzfOx/mHehdDYTsFuPq2Ff6UBfk0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/825h9/btrHY5SzfOx/mHehdDYTsFuPq2Ff6UBfk0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F825h9%2FbtrHY5SzfOx%2FmHehdDYTsFuPq2Ff6UBfk0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1368&quot; height=&quot;663&quot; data-origin-width=&quot;1368&quot; data-origin-height=&quot;663&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;둘 다 0이 아니었던 데이터의 수를 계산한다.&lt;/li&gt;
&lt;li&gt;그러고나서, 각 변수마다 그 값을 더해서, 각 변수의 conflict를 계산한다.&amp;nbsp;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;즉, conflict가 작다는 것은, 둘다 0이거나 하나의 값이 0인 데이터가 많다는 뜻이므로 묶을 수 있다.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;conflict가 크면 그 변수는 묶으면 안될 것이다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1165&quot; data-origin-height=&quot;685&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cp9IcB/btrHZDHLoj5/iUUESrCHXO9MDWtpI3btR0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cp9IcB/btrHZDHLoj5/iUUESrCHXO9MDWtpI3btR0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cp9IcB/btrHZDHLoj5/iUUESrCHXO9MDWtpI3btR0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fcp9IcB%2FbtrHZDHLoj5%2FiUUESrCHXO9MDWtpI3btR0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;637&quot; height=&quot;375&quot; data-origin-width=&quot;1165&quot; data-origin-height=&quot;685&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;그래프를 coloring을 하기 위해서 edge를 끊어내는 작업을 한다.&amp;nbsp;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;cut-off=0.2 라면, 2번 이상 conflict가 있다면 잘라내는 것이다.&lt;/li&gt;
&lt;li&gt;즉 conflict가 크면 묶지 말고 잘라내자.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1184&quot; data-origin-height=&quot;681&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/broZHm/btrHY7bOls8/11TGYF3frKc74D6q8WTMh0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/broZHm/btrHY7bOls8/11TGYF3frKc74D6q8WTMh0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/broZHm/btrHY7bOls8/11TGYF3frKc74D6q8WTMh0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbroZHm%2FbtrHY7bOls8%2F11TGYF3frKc74D6q8WTMh0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;686&quot; height=&quot;395&quot; data-origin-width=&quot;1184&quot; data-origin-height=&quot;681&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1174&quot; data-origin-height=&quot;733&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/vOiCj/btrHY6Yf8Pn/rBOVsQBT8jnAqc8rPOag00/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/vOiCj/btrHY6Yf8Pn/rBOVsQBT8jnAqc8rPOag00/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/vOiCj/btrHY6Yf8Pn/rBOVsQBT8jnAqc8rPOag00/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FvOiCj%2FbtrHY6Yf8Pn%2FrBOVsQBT8jnAqc8rPOag00%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;653&quot; height=&quot;408&quot; data-origin-width=&quot;1174&quot; data-origin-height=&quot;733&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;3개 색을 칠했기 때문에, 3개 변수로 bundling된 것&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1258&quot; data-origin-height=&quot;670&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bwUT02/btrHWGTBLOQ/3xl4P8tQKqOuIZYVdU9ZP0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bwUT02/btrHWGTBLOQ/3xl4P8tQKqOuIZYVdU9ZP0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bwUT02/btrHWGTBLOQ/3xl4P8tQKqOuIZYVdU9ZP0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbwUT02%2FbtrHWGTBLOQ%2F3xl4P8tQKqOuIZYVdU9ZP0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1258&quot; height=&quot;670&quot; data-origin-width=&quot;1258&quot; data-origin-height=&quot;670&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;기준이 되는 변수가 가지고 있는 최대값을 두 번째 변수 값에 더해준다.&amp;nbsp;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;이렇게 하다보니, exclusive 하지 않는 경우(둘다 0이거나, 둘다 값 가지지 않는 경우)는 약간의 왜곡이 발생하기 때문에, 성능이 저하될 가능성도 있다. 실제적으로 해봤을 때는 상황에 따라 다르지만 category 데이터가 많을 경우 현실에서 작동 잘하는게 lightgbm이다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1346&quot; data-origin-height=&quot;726&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bmGUte/btrHVJC4tws/3ns1JC9qikzxkrLxq8Z4G0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bmGUte/btrHVJC4tws/3ns1JC9qikzxkrLxq8Z4G0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bmGUte/btrHVJC4tws/3ns1JC9qikzxkrLxq8Z4G0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbmGUte%2FbtrHVJC4tws%2F3ns1JC9qikzxkrLxq8Z4G0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1346&quot; height=&quot;726&quot; data-origin-width=&quot;1346&quot; data-origin-height=&quot;726&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;LightGBM 도 &lt;span&gt;XGBoost 처럼 GBM의 대표적인 variant 중 하나이다.&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span&gt;데이터에 따라서 속도나 성능 비등비등하다.&amp;nbsp;&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;&lt;span&gt;LightGBM을 더 효과적인 상황은 cagegorical 변수가 많아서 binding이 잘 될 수 있는 환경이다.&lt;/span&gt;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Machine Learning/Algorithm</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/119</guid>
      <comments>https://dive-into-ds.tistory.com/119#entry119comment</comments>
      <pubDate>Fri, 22 Jul 2022 14:56:39 +0900</pubDate>
    </item>
    <item>
      <title>Ensemble model - XGBoost</title>
      <link>https://dive-into-ds.tistory.com/118</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;XGBoost : A Scalable Tree Boosting System&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;XGBoost는 Gradient Boosting Machine 인데, 어떻게 하면 제한된 computing resource 하에서 좀더 빠르게 더 많은 데이터를 활용할 수 있을까 고민해낸 technique 이라고 보면 된다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;GBM이 가지고 있는 장점을 다 흡수했기 때문에, XGBoost를 더 많이 사용한다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1390&quot; data-origin-height=&quot;720&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/2zzTb/btrGRvYGpR1/OIXvrwLTlXVJAMVBtge6u1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/2zzTb/btrGRvYGpR1/OIXvrwLTlXVJAMVBtge6u1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/2zzTb/btrGRvYGpR1/OIXvrwLTlXVJAMVBtge6u1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F2zzTb%2FbtrGRvYGpR1%2FOIXvrwLTlXVJAMVBtge6u1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1390&quot; height=&quot;720&quot; data-origin-width=&quot;1390&quot; data-origin-height=&quot;720&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;XGBoost : An optimized versioni of GBM enabling&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;GBM에 대한 opimized version. 소프트웨어적인 부분 뿐만 아니라 하드웨어적인 부분도 최적화.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1310&quot; data-origin-height=&quot;686&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/IVhS2/btrGRiS4LrC/gXRBbnomCBrCzubntMGBx0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/IVhS2/btrGRiS4LrC/gXRBbnomCBrCzubntMGBx0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/IVhS2/btrGRiS4LrC/gXRBbnomCBrCzubntMGBx0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FIVhS2%2FbtrGRiS4LrC%2FgXRBbnomCBrCzubntMGBx0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1310&quot; height=&quot;686&quot; data-origin-width=&quot;1310&quot; data-origin-height=&quot;686&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Split Finding Algorithm&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1522&quot; data-origin-height=&quot;603&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bYgIEw/btrGRkpRfni/TRYy2YiuTp0F6nQyPKVYNK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bYgIEw/btrGRkpRfni/TRYy2YiuTp0F6nQyPKVYNK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bYgIEw/btrGRkpRfni/TRYy2YiuTp0F6nQyPKVYNK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbYgIEw%2FbtrGRkpRfni%2FTRYy2YiuTp0F6nQyPKVYNK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1522&quot; height=&quot;603&quot; data-origin-width=&quot;1522&quot; data-origin-height=&quot;603&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Decision tree는 위와 같이 각 변수를 오름차순 정렬한 후에, 하나씩 분기를 지정해보면서 Information gain을 계산한다. 그래서 가장 information gain이 큰 분기를 찾는다.
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;항상 최적의 분기점 찾아냄. 하지만 계산복잡도 매우 높음&lt;/li&gt;
&lt;li&gt;위의 단점들을 극복하기 위해 XGBoost는 Approximate algorithm 사용&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1277&quot; data-origin-height=&quot;769&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cRxWIt/btrGQvymMge/7e5iYma2kkkfQiX3Erhlf1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cRxWIt/btrGQvymMge/7e5iYma2kkkfQiX3Erhlf1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cRxWIt/btrGQvymMge/7e5iYma2kkkfQiX3Erhlf1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcRxWIt%2FbtrGQvymMge%2F7e5iYma2kkkfQiX3Erhlf1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;692&quot; height=&quot;417&quot; data-origin-width=&quot;1277&quot; data-origin-height=&quot;769&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1647&quot; data-origin-height=&quot;782&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dfSvlf/btrGPDKGMmR/iJy97yo7bVedLER1tUfjO1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dfSvlf/btrGPDKGMmR/iJy97yo7bVedLER1tUfjO1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dfSvlf/btrGPDKGMmR/iJy97yo7bVedLER1tUfjO1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdfSvlf%2FbtrGPDKGMmR%2FiJy97yo7bVedLER1tUfjO1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1647&quot; height=&quot;782&quot; data-origin-width=&quot;1647&quot; data-origin-height=&quot;782&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;즉, 전체를 한번에 쭉 훑는 것이 아니라, 부분부분 잘라서 그 부분마다 훑어간다. 그렇기 때문에 병렬처리 가능하다. 하지만 무조건 최적의 해를 찾는다는 보장은 사라진다. 일어한 보장을 희생하는 대신에 빠르게 근사해를 찾는다.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1112&quot; data-origin-height=&quot;509&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/qVuO5/btrGQbfPJj3/wzXS9S4ChOySEublaAENq0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/qVuO5/btrGQbfPJj3/wzXS9S4ChOySEublaAENq0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/qVuO5/btrGQbfPJj3/wzXS9S4ChOySEublaAENq0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FqVuO5%2FbtrGQbfPJj3%2FwzXS9S4ChOySEublaAENq0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1112&quot; height=&quot;509&quot; data-origin-width=&quot;1112&quot; data-origin-height=&quot;509&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1087&quot; data-origin-height=&quot;494&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cDdnSI/btrGSxI2sGW/E9G70vkroYhLWG4bPmWdBk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cDdnSI/btrGSxI2sGW/E9G70vkroYhLWG4bPmWdBk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cDdnSI/btrGSxI2sGW/E9G70vkroYhLWG4bPmWdBk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcDdnSI%2FbtrGSxI2sGW%2FE9G70vkroYhLWG4bPmWdBk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1087&quot; height=&quot;494&quot; data-origin-width=&quot;1087&quot; data-origin-height=&quot;494&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Global variant : 기존의 부분집합을 최대한 같은 개수를 유지하면서 depth가 깊어진다.&amp;nbsp;&lt;/li&gt;
&lt;li&gt;Local variant : split이 진행되면 진행될수록, 전체 부분집합 개수를 유지한다. 그렇기 때문에 depth가 깊어질수록 조금더 촘촘하게 부분집합이 구분된다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;854&quot; data-origin-height=&quot;547&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/AJ8PR/btrGRi6FXIl/GYo21bJwDGumLBtkYZkkAk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/AJ8PR/btrGRi6FXIl/GYo21bJwDGumLBtkYZkkAk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/AJ8PR/btrGRi6FXIl/GYo21bJwDGumLBtkYZkkAk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FAJ8PR%2FbtrGRi6FXIl%2FGYo21bJwDGumLBtkYZkkAk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;660&quot; height=&quot;423&quot; data-origin-width=&quot;854&quot; data-origin-height=&quot;547&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;exact geedy는 best case이고, global eps=0.3은, 30%씩 듬성듬성 부분집합 잡은 것 --&amp;gt; 성능 상당히 낮아짐&lt;/li&gt;
&lt;li&gt;local eps=0.3 은 정확도가 높음.&lt;/li&gt;
&lt;li&gt;global eps=0.05 는 애초에 bucket을 20개로 한다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;정리하면, 분산 처리를 통해 빠르게 split을 해갈 수 있는 장치가 위의 장치였다.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Sparsity-Aware Split Finding&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;분기의 기본방향인 default direction을 설정. 즉 결측치가 있는 그런 상황에서는 무조건 처음부터 그 데이터에 대해서는 어느 쪽으로 가라 하는 것을 학습 과정에서 설정해준다.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;914&quot; data-origin-height=&quot;522&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/mzibK/btrGQbfSpaO/ZRfTdy1BTNTAplHq2fHnf0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/mzibK/btrGQbfSpaO/ZRfTdy1BTNTAplHq2fHnf0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/mzibK/btrGQbfSpaO/ZRfTdy1BTNTAplHq2fHnf0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FmzibK%2FbtrGQbfSpaO%2FZRfTdy1BTNTAplHq2fHnf0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;765&quot; height=&quot;437&quot; data-origin-width=&quot;914&quot; data-origin-height=&quot;522&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;missing value를 왼쪽으로 몰았을 때 최적의 split을 찾아냈다
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;이렇게 학습이 되고 나면, 나중에 이 변수에 대해서 missing value가 나타났을 때는 missing value를 무조건 왼쪽으로 넘긴다. 즉, 변수마다 missing value일 때 어느 분기로 갈지가 결정되는 것.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1642&quot; data-origin-height=&quot;832&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cU7ADd/btrGS4mn2cJ/3fMGwPOY01OK9w8Ys1trxK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cU7ADd/btrGS4mn2cJ/3fMGwPOY01OK9w8Ys1trxK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cU7ADd/btrGS4mn2cJ/3fMGwPOY01OK9w8Ys1trxK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcU7ADd%2FbtrGS4mn2cJ%2F3fMGwPOY01OK9w8Ys1trxK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1642&quot; height=&quot;832&quot; data-origin-width=&quot;1642&quot; data-origin-height=&quot;832&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1534&quot; data-origin-height=&quot;655&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/eypN47/btrGQoNomA3/5qEWcx3opLnIKlMh1NHrYK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/eypN47/btrGQoNomA3/5qEWcx3opLnIKlMh1NHrYK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/eypN47/btrGQoNomA3/5qEWcx3opLnIKlMh1NHrYK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FeypN47%2FbtrGQoNomA3%2F5qEWcx3opLnIKlMh1NHrYK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1534&quot; height=&quot;655&quot; data-origin-width=&quot;1534&quot; data-origin-height=&quot;655&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;시간이 줄어듬. 시간을 효율적으로 사용할 수 있다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1527&quot; data-origin-height=&quot;817&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cD7CHE/btrGS4GGZhc/Mv4eKAJidIlAY0fTjUETeK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cD7CHE/btrGS4GGZhc/Mv4eKAJidIlAY0fTjUETeK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cD7CHE/btrGS4GGZhc/Mv4eKAJidIlAY0fTjUETeK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcD7CHE%2FbtrGS4GGZhc%2FMv4eKAJidIlAY0fTjUETeK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1527&quot; height=&quot;817&quot; data-origin-width=&quot;1527&quot; data-origin-height=&quot;817&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;데이터를 각 변수값으로 정렬한 후에, 분기값을 정하고 split을 한다. 정렬하는 과정에 가장 오래 시간이 소요되기 때문에, 처음에 정렬한 데이터를 미리 저장해놓고, 필요할 때마다 불러와서 쓴다.&amp;nbsp; --&amp;gt; 시간 훨씬 줄어듬&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1553&quot; data-origin-height=&quot;799&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bMGsW7/btrGQDpYQBz/16lQbn738w4OE02K8VZLH1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bMGsW7/btrGQDpYQBz/16lQbn738w4OE02K8VZLH1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bMGsW7/btrGQDpYQBz/16lQbn738w4OE02K8VZLH1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbMGsW7%2FbtrGQDpYQBz%2F16lQbn738w4OE02K8VZLH1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1553&quot; height=&quot;799&quot; data-origin-width=&quot;1553&quot; data-origin-height=&quot;799&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;GBM보다 일반적으로 XGBoost를 더 많이 사용한다. 알고리즘에 대한 개념은 GBM으로 이해하면 좋고, 실질적으로 사용할 때는 XGBoost 사용하면 된다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;</description>
      <category>Machine Learning/Algorithm</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/118</guid>
      <comments>https://dive-into-ds.tistory.com/118#entry118comment</comments>
      <pubDate>Sat, 9 Jul 2022 10:25:41 +0900</pubDate>
    </item>
    <item>
      <title>Ensemble model - GBM (Gradient Boosting Machine)</title>
      <link>https://dive-into-ds.tistory.com/117</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Random Forest와 함께 굉장히 널리 사용되는 기법 중 하나&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;GBM에서 파생된 중요 알고리즘 세 가지 : XGBoost, LightGBM, CatBoost&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Gradient Boosting = Gradient Descent + Boosting&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Motivation (for regression problem) (classification도 가능)
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;만일 회귀 모형의 잔차를 다음 단계에서 학습하는 모델을 구축한다면?&lt;/li&gt;
&lt;li&gt;즉, 현재 모델이 못 맞춘 것 만큼만 맞추는 모델을 만들어서 결합하면 맞출 수 있는것 아닌가?&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;834&quot; data-origin-height=&quot;445&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/KXODl/btrGIZUT3Mr/jjxzphWjbEHFhoqr2yYDY0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/KXODl/btrGIZUT3Mr/jjxzphWjbEHFhoqr2yYDY0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/KXODl/btrGIZUT3Mr/jjxzphWjbEHFhoqr2yYDY0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKXODl%2FbtrGIZUT3Mr%2FjjxzphWjbEHFhoqr2yYDY0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;738&quot; height=&quot;394&quot; data-origin-width=&quot;834&quot; data-origin-height=&quot;445&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;핵심 아이디어&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1360&quot; data-origin-height=&quot;730&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/duYqiv/btrGMp41QDN/t1HHM4jAmoaKVAWpbc1VA1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/duYqiv/btrGMp41QDN/t1HHM4jAmoaKVAWpbc1VA1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/duYqiv/btrGMp41QDN/t1HHM4jAmoaKVAWpbc1VA1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FduYqiv%2FbtrGMp41QDN%2Ft1HHM4jAmoaKVAWpbc1VA1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;776&quot; height=&quot;417&quot; data-origin-width=&quot;1360&quot; data-origin-height=&quot;730&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;즉, 충분히 에러값이 작아질 때까지 모델을 충분히 만들어서 합하면 되지 않나? (y=f1(x)+f2(x)+...)&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Adaboost는 못 맞추는 객체의 선택 확률값을 조정해서, 즉 선택 확률 분포를 조정했는데, GBM은 y값을 조정해서 맞추어야 하는 값이 무엇인지를 정해주자, 그렇게 guide 해주자는게 GBM.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Gradient Boosting
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;개별 모델을 forward 방식으로 학습 (Adaboost와 동일)&lt;/li&gt;
&lt;li&gt;각 단계에서 새로운 Base Learner를 학습하여 이전 단계의 Base Learner의 단점을 보완&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Gradient Boosting에서는 단점이 손실 함수(loss function)의 그래디언트(gradient)에 반영됨&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;757&quot; data-origin-height=&quot;386&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cF23t4/btrGKpdRCYc/Po0EgJF4wwSZ61kVQZ5Lik/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cF23t4/btrGKpdRCYc/Po0EgJF4wwSZ61kVQZ5Lik/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cF23t4/btrGKpdRCYc/Po0EgJF4wwSZ61kVQZ5Lik/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcF23t4%2FbtrGKpdRCYc%2FPo0EgJF4wwSZ61kVQZ5Lik%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;530&quot; height=&quot;270&quot; data-origin-width=&quot;757&quot; data-origin-height=&quot;386&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1439&quot; data-origin-height=&quot;797&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/8lbeg/btrGJGt2nmq/jkD89fNseAWojrh1zmUOs0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/8lbeg/btrGJGt2nmq/jkD89fNseAWojrh1zmUOs0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/8lbeg/btrGJGt2nmq/jkD89fNseAWojrh1zmUOs0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F8lbeg%2FbtrGJGt2nmq%2FjkD89fNseAWojrh1zmUOs0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1439&quot; height=&quot;797&quot; data-origin-width=&quot;1439&quot; data-origin-height=&quot;797&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;loss function을 찾고자 하는 함수 f(x)로 미분한 값이 잔차의 음수값이 됨&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;DL에서도 loss를 최소화 하기 위한 weight를 찾는데, 이 때 찾고자 하는 weight로 loss를 미분하여 gradient 계산한다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;DL에서는 negative gradient 방향으로 weight를 조정해서 loss를 최소화하는 w 값을 찾아간다. negative gradient는 이런 의미를 가진다.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;즉, 잔차는 gradient의 음수와 같다&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;gradient의 음수의 의미는, gradient descent 를 할 때 gradient 정보를 갖고 있으면 0이 아닐 경우에는 역방향으로 움직여서 최소화된 함수를 찾아갈 수 있다는 것이다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;실제값에서 함수값을 뺀 잔차를 활용하는 것이 negative gradient를 활용하는 것과 같아지는 것이다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;그래서 gradient를 이용한 boosting이다. gradient boosting machine 이라고 표현한다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;예시 (learner : stumpt tree)&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1477&quot; data-origin-height=&quot;813&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dzm8KF/btrGL9QFnoz/Mlffn4WNtn4SKMe63rUTR0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dzm8KF/btrGL9QFnoz/Mlffn4WNtn4SKMe63rUTR0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dzm8KF/btrGL9QFnoz/Mlffn4WNtn4SKMe63rUTR0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fdzm8KF%2FbtrGL9QFnoz%2FMlffn4WNtn4SKMe63rUTR0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1477&quot; height=&quot;813&quot; data-origin-width=&quot;1477&quot; data-origin-height=&quot;813&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1361&quot; data-origin-height=&quot;797&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/mMOEX/btrGM3CBQzF/feznXAZ78ZRQzsf5tXC3VK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/mMOEX/btrGM3CBQzF/feznXAZ78ZRQzsf5tXC3VK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/mMOEX/btrGM3CBQzF/feznXAZ78ZRQzsf5tXC3VK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FmMOEX%2FbtrGM3CBQzF%2FfeznXAZ78ZRQzsf5tXC3VK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1361&quot; height=&quot;797&quot; data-origin-width=&quot;1361&quot; data-origin-height=&quot;797&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Gradient Boosting Algorithm&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1037&quot; data-origin-height=&quot;706&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/d2CUyc/btrGM4IkVPq/ThR107LmcxpOBU5kOVSa6K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/d2CUyc/btrGM4IkVPq/ThR107LmcxpOBU5kOVSa6K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/d2CUyc/btrGM4IkVPq/ThR107LmcxpOBU5kOVSa6K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fd2CUyc%2FbtrGM4IkVPq%2FThR107LmcxpOBU5kOVSa6K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;620&quot; height=&quot;422&quot; data-origin-width=&quot;1037&quot; data-origin-height=&quot;706&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;손실함수에 대한 gradient를 사용하기 때문에, 손실함수에 따라서 GBM의 성능이 매우 달라질 수 있다.&amp;nbsp;&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;회귀모형의 경우 이렇게 네 가지 loss가 가장 많이 사용된다. &lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;처음부터 loss 까지 건드리는 것은 크게 성능을 dramatic 하게 변화를 내는 것은 아니기 때문에, 가장 기본적인 squared loss를 사용해보고, 거기에서부터 성능이 예상과 다르면 loss에 대한 설계를 바꿔가면서 tuning 해보는 것을 추천한다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1440&quot; data-origin-height=&quot;735&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b2Xbmi/btrGL76pW8C/riQRYqJKARPSXe3kjBJw6K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b2Xbmi/btrGL76pW8C/riQRYqJKARPSXe3kjBJw6K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b2Xbmi/btrGL76pW8C/riQRYqJKARPSXe3kjBJw6K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb2Xbmi%2FbtrGL76pW8C%2FriQRYqJKARPSXe3kjBJw6K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1440&quot; height=&quot;735&quot; data-origin-width=&quot;1440&quot; data-origin-height=&quot;735&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;분류 모형에 대한 손실함수&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;베르누이 loss는 adaptive loss 보다 log 값이기 때문에 더 작다. 그래서 adaptive loss가 오분류에 더 민감하게 반응하는 특징이 있다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1436&quot; data-origin-height=&quot;812&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/1V758/btrGPKnON4M/Gj2XuWymxdKctzEsikDwU0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/1V758/btrGPKnON4M/Gj2XuWymxdKctzEsikDwU0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/1V758/btrGPKnON4M/Gj2XuWymxdKctzEsikDwU0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F1V758%2FbtrGPKnON4M%2FGj2XuWymxdKctzEsikDwU0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1436&quot; height=&quot;812&quot; data-origin-width=&quot;1436&quot; data-origin-height=&quot;812&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;GBM의 로직을 이해했다면 한 가지 의문점이 들 수 있다.&lt;/b&gt;&lt;b&gt;&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;모든 데이터는 y=f(x)+e, 로 함수에 의해서 만들어지는 f(x) 와 함께 자연 발생적인 노이즈(e) 가 반드시 포함되어 있다. 무조건 들어있다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;만약 틀린 것 만큼, 즉 y-f1(x)를 목적으로 하는 f2(x) 모델을 만든다는 것은 노이즈도 학습하겠다는 것이다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;노이즈는 학습하면 안된다. 자연발생적인 변동성이라서 노이즈까지 학습하게 되면 과적합의 위험이 굉장히 크다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;따라서, 위의 설명대로 original 버전을 사용하면 과적합의 위험이 매우 높다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;과적합 방지를 위한 여러가지 장치를 사용한다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;A 그래프의 빨간선으로 추정하면 좋지만, 과적합 방지 장치가 없으면 검정선과 같이 spiky하게 학습된다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;B 그래프를 봐도 과적합된 것을 볼 수 있다. &lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1414&quot; data-origin-height=&quot;875&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/4cCVh/btrGPa8GNIW/l7ZrlMFIBMMuUBji9aZAkk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/4cCVh/btrGPa8GNIW/l7ZrlMFIBMMuUBji9aZAkk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/4cCVh/btrGPa8GNIW/l7ZrlMFIBMMuUBji9aZAkk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F4cCVh%2FbtrGPa8GNIW%2Fl7ZrlMFIBMMuUBji9aZAkk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1414&quot; height=&quot;875&quot; data-origin-width=&quot;1414&quot; data-origin-height=&quot;875&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;첫 번째 장치 : subsampling&amp;nbsp;&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;DL의 dropout 개념과 비슷하다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;일부러 학습 데이터의 일부분만 사용하는 것. 예를 들어 80%만 사용. 단, 비복원 추출한다. 80% 데이터의 gradient만 계산해서 학습한다. 일부를 억지로 놀림으로써, 모든 데이터에 과적합되는 것을 방지한다.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;두 번째 장치 : shrinkage&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;뒷 모델의 경우에는 영향력을 줄여가도록 하는 것. 관례적으로 0.9 내외의 값을 사용한다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1395&quot; data-origin-height=&quot;658&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cbPESe/btrGO9aP04h/XhSkfBCEmhiNArSYAQ7Ox1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cbPESe/btrGO9aP04h/XhSkfBCEmhiNArSYAQ7Ox1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cbPESe/btrGO9aP04h/XhSkfBCEmhiNArSYAQ7Ox1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcbPESe%2FbtrGO9aP04h%2FXhSkfBCEmhiNArSYAQ7Ox1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1395&quot; height=&quot;658&quot; data-origin-width=&quot;1395&quot; data-origin-height=&quot;658&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;과적합 방지 장치까지 된 gradient boosting machine은 RF에 필적할 정도로 성능이 상당히 좋다.
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;RF처럼 변수 중요도도 추출해준다.&lt;/li&gt;
&lt;li&gt;IG : information gain&lt;/li&gt;
&lt;li&gt;stumpt tree를 사용하기 때문에 L=2 이다.&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Influence_j(T) : 결국 이 변수가 사용이 되었을 때, 그 상황에서의 information gain이 이정도다를 나타낸다. 이 방식으로 하나하나 tree에 대한 개별적인 변수의 중요도를 계산한다.&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Influence_j : 모든 tree에 대한 j번째 변수에 대한 중요도를 더해서 평균내서 변수 최종 변수 중요도 계산한다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;RF와 쌍벽을 이룬다.&amp;nbsp;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;성능 둘 다 좋다&lt;/li&gt;
&lt;li&gt;두 알고리즘 모두 변수 중요도를 산출해줄 수 있다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Machine Learning/Algorithm</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/117</guid>
      <comments>https://dive-into-ds.tistory.com/117#entry117comment</comments>
      <pubDate>Thu, 7 Jul 2022 22:46:59 +0900</pubDate>
    </item>
    <item>
      <title>Ensemble model - Adaboost (Adaptive Boosting)</title>
      <link>https://dive-into-ds.tistory.com/116</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Strong models v.s. Weak models&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Weak model이란 랜덤 모델에 비해 약간의 성능 개선이 있는 모델을 의미하며, 이 모델은 &lt;b&gt;적절한 조치를 취함으로 인해 성능이 우수한 모델로 boosted 될 수 있음&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Adaboost (Adaptive boosting)는 weak learner을 base learner로 사용함&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Adaboost는 weak model 중에서도 stumpt tree를 사용함
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;stumtp tree : split을 1회만 하는 tree&lt;/li&gt;
&lt;li&gt;그냥 DT 사용해도 된다. 어떤 알고리즘을 사용해도 가능하다. 예측하지 못한 샘플의 선택 확률만 증가시키면 되는 것.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;bagging은 병렬적으로 boostrap 만들고 학습 가능했다. &lt;b&gt;boost 기법은 sequential하게 간다. 현재 모델이 잘 해결하지 못하는 어려운 케이스에 집중해서 학습한다.&amp;nbsp;&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;1단계 : 현재 데이터셋에 대해서 단순한 모델을 이용하여 학습&lt;/li&gt;
&lt;li&gt;2단계 : &lt;b&gt;학습 오류가 큰 개체의 선택 확률을 증가시키고 학습 오류가 작은 개체의 선택 확률을 감소시킴&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;3단계 : 앞 단계에서 조정된 확률을 기반으로 다음 단계에서 사 용될 학습 데이터셋을 구성&lt;/li&gt;
&lt;li&gt;1단계로 되돌아감&lt;/li&gt;
&lt;li&gt;최종 결과물은 각 모델의 성능 지표를 가중치로 사용하여 결합&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;bagging은 boostrap 만들 때 각 객체마다 선택될 확률 동일하다 (동일한 확률로 복원추출). 하지만 boosting은 앞선 모델들이 맞추지 못했던 객체들은 더 높은 확률로 선택한다.&lt;/b&gt;&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;단일 모델 vs. bagging vs. boosting&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;970&quot; data-origin-height=&quot;380&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/crSepa/btrGFGHAdUD/BWNmxtzygBhA3lFRionDK0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/crSepa/btrGFGHAdUD/BWNmxtzygBhA3lFRionDK0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/crSepa/btrGFGHAdUD/BWNmxtzygBhA3lFRionDK0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcrSepa%2FbtrGFGHAdUD%2FBWNmxtzygBhA3lFRionDK0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;970&quot; height=&quot;380&quot; data-origin-width=&quot;970&quot; data-origin-height=&quot;380&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Adaboost 알고리즘&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;977&quot; data-origin-height=&quot;510&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cT8LZQ/btrGKcyrP01/hpJB6PfiXnanMRpUUmfho0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cT8LZQ/btrGKcyrP01/hpJB6PfiXnanMRpUUmfho0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cT8LZQ/btrGKcyrP01/hpJB6PfiXnanMRpUUmfho0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcT8LZQ%2FbtrGKcyrP01%2FhpJB6PfiXnanMRpUUmfho0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;977&quot; height=&quot;510&quot; data-origin-width=&quot;977&quot; data-origin-height=&quot;510&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Adaboost 예시&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1269&quot; data-origin-height=&quot;828&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/9uGBJ/btrGGa9xyHu/SXpfsOFsWcEqrbPrUU7Rok/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/9uGBJ/btrGGa9xyHu/SXpfsOFsWcEqrbPrUU7Rok/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/9uGBJ/btrGGa9xyHu/SXpfsOFsWcEqrbPrUU7Rok/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F9uGBJ%2FbtrGGa9xyHu%2FSXpfsOFsWcEqrbPrUU7Rok%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;787&quot; height=&quot;514&quot; data-origin-width=&quot;1269&quot; data-origin-height=&quot;828&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1456&quot; data-origin-height=&quot;863&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/wdasm/btrGJ2Qsb3I/KVqJtIWyruBCpp2vhkeoYk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/wdasm/btrGJ2Qsb3I/KVqJtIWyruBCpp2vhkeoYk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/wdasm/btrGJ2Qsb3I/KVqJtIWyruBCpp2vhkeoYk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fwdasm%2FbtrGJ2Qsb3I%2FKVqJtIWyruBCpp2vhkeoYk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;852&quot; height=&quot;505&quot; data-origin-width=&quot;1456&quot; data-origin-height=&quot;863&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;공부 잘하는 놈이 틀리면, 더 가중치(알파) 주어서 선택확률 증가시키고, 공부 못하는 놈이 틀리면, 뭐 그럴 수도 있으니 선택확률을 증가시키되 적은 가중치(알파) 로 선택확률 증가시킴&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1478&quot; data-origin-height=&quot;731&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/c9e1sm/btrGJGtk5rA/nbKpr9kV3yu9QsxUol7nZ1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/c9e1sm/btrGJGtk5rA/nbKpr9kV3yu9QsxUol7nZ1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/c9e1sm/btrGJGtk5rA/nbKpr9kV3yu9QsxUol7nZ1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fc9e1sm%2FbtrGJGtk5rA%2FnbKpr9kV3yu9QsxUol7nZ1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1478&quot; height=&quot;731&quot; data-origin-width=&quot;1478&quot; data-origin-height=&quot;731&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1456&quot; data-origin-height=&quot;716&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dPMVMJ/btrGJ1KKSzg/KOK7LW5tdAJKQ1Az6V5Gi1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dPMVMJ/btrGJ1KKSzg/KOK7LW5tdAJKQ1Az6V5Gi1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dPMVMJ/btrGJ1KKSzg/KOK7LW5tdAJKQ1Az6V5Gi1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdPMVMJ%2FbtrGJ1KKSzg%2FKOK7LW5tdAJKQ1Az6V5Gi1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1456&quot; height=&quot;716&quot; data-origin-width=&quot;1456&quot; data-origin-height=&quot;716&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1276&quot; data-origin-height=&quot;694&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/KhZSd/btrGGQpvfIP/y6h4iRwqDu5rBGPJV8fZrk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/KhZSd/btrGGQpvfIP/y6h4iRwqDu5rBGPJV8fZrk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/KhZSd/btrGGQpvfIP/y6h4iRwqDu5rBGPJV8fZrk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKhZSd%2FbtrGGQpvfIP%2Fy6h4iRwqDu5rBGPJV8fZrk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1276&quot; height=&quot;694&quot; data-origin-width=&quot;1276&quot; data-origin-height=&quot;694&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Adaboost 예시 2&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1417&quot; data-origin-height=&quot;786&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cabc3m/btrGKdKX1xq/SjiY06RmeOOtCzTo2BXBB1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cabc3m/btrGKdKX1xq/SjiY06RmeOOtCzTo2BXBB1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cabc3m/btrGKdKX1xq/SjiY06RmeOOtCzTo2BXBB1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fcabc3m%2FbtrGKdKX1xq%2FSjiY06RmeOOtCzTo2BXBB1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1417&quot; height=&quot;786&quot; data-origin-width=&quot;1417&quot; data-origin-height=&quot;786&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Adaboost 예시 3&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1451&quot; data-origin-height=&quot;827&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cAKPOR/btrGIZzXedx/RhQRXyGczKdEjKWc1x8NK0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cAKPOR/btrGIZzXedx/RhQRXyGczKdEjKWc1x8NK0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cAKPOR/btrGIZzXedx/RhQRXyGczKdEjKWc1x8NK0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcAKPOR%2FbtrGIZzXedx%2FRhQRXyGczKdEjKWc1x8NK0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1451&quot; height=&quot;827&quot; data-origin-width=&quot;1451&quot; data-origin-height=&quot;827&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;배깅과 부스팅에서의 개체 선택의 차이&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1184&quot; data-origin-height=&quot;712&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/Prjun/btrGKceeraL/PYqzTysAZlW86ZDWitp5d1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/Prjun/btrGKceeraL/PYqzTysAZlW86ZDWitp5d1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/Prjun/btrGKceeraL/PYqzTysAZlW86ZDWitp5d1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FPrjun%2FbtrGKceeraL%2FPYqzTysAZlW86ZDWitp5d1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;719&quot; height=&quot;432&quot; data-origin-width=&quot;1184&quot; data-origin-height=&quot;712&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Machine Learning/Algorithm</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/116</guid>
      <comments>https://dive-into-ds.tistory.com/116#entry116comment</comments>
      <pubDate>Thu, 7 Jul 2022 16:59:20 +0900</pubDate>
    </item>
    <item>
      <title>Ensemble model - Bagging</title>
      <link>https://dive-into-ds.tistory.com/115</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Bagging : Bootstrap Aggregating&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;앙상블의 각 멤버(모델)는 서로 다른 학습 데이터셋을 이용&lt;/li&gt;
&lt;li&gt;각 데이터셋은 &lt;b&gt;복원 추출(sampling with replacement)&amp;nbsp;&lt;/b&gt;를 통해 원래 데이터의 수 만큼의 크기를 갖도록 샘플링&lt;/li&gt;
&lt;li&gt;개별 데이터셋을 붓스트랩(bootstrap)이라 부름&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;이론적으로 한 개체가 하나의 붓스트랩에 한번도 선택되지 않을 확률&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;606&quot; data-origin-height=&quot;112&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/l6Ffh/btrGzvGfp7a/hoOmjmL58W16vxmyvuCML0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/l6Ffh/btrGzvGfp7a/hoOmjmL58W16vxmyvuCML0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/l6Ffh/btrGzvGfp7a/hoOmjmL58W16vxmyvuCML0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fl6Ffh%2FbtrGzvGfp7a%2FhoOmjmL58W16vxmyvuCML0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;606&quot; height=&quot;112&quot; data-origin-width=&quot;606&quot; data-origin-height=&quot;112&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;즉, 하나의 붓스트랩에 약 2/3 샘플이 선택되어 들어가 있음
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;선택되지 않은 샘플 : Out Of Bag (OOB)&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;개별 모델의 분산은 높고 편향이 낮은 알고리즘 (복잡도가 높은 알고리즘)에 적절함 (인공신경망, SVM(RBF, kernel width가 작은) 등) *****&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;배깅은 학습 데이터의 분포를 왜곡해서 noise를 주기 때문에, noise에 민감하게 반응하는 모델에 적용&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;복잡도가 높은 모델은 학습 데이터대로 과적합될 가능성이 높다. 그래서 bagging을 통해 학습 데이터 분포를 왜곡시키는 것이 효과가 좋다. &lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;로지스틱 회귀를 가지고 배깅을 하는것은?&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;가능은 하지만, 크게 효과는 보지 못할 것이다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Bagging과 합이 잘 맞는 알고리즘 들은 모델의 복잡도가 높은 기법들이고, 그러한 기법들은 개별 모델의 분산이 높고 noise의 변화에 민감하게 반응하고 편향이 낮다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Result Aggregating&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;For classification problem
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Majority voting : 다수결의 원칙에 따라&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1068&quot; data-origin-height=&quot;587&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cwG9hr/btrGBg2F2eF/RuAcpRNRXXCCKSFbHeK0A0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cwG9hr/btrGBg2F2eF/RuAcpRNRXXCCKSFbHeK0A0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cwG9hr/btrGBg2F2eF/RuAcpRNRXXCCKSFbHeK0A0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcwG9hr%2FbtrGBg2F2eF%2FRuAcpRNRXXCCKSFbHeK0A0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;673&quot; height=&quot;370&quot; data-origin-width=&quot;1068&quot; data-origin-height=&quot;587&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Weighted voting (weight = validation accuracy of individual models&lt;/b&gt;)
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;majority의 경우에는 4, 6은 0.4, 0.6의 확률이라고 할 수 있음&lt;/li&gt;
&lt;li&gt;분모 : 전체 validation accuracy의 총합&lt;/li&gt;
&lt;li&gt;분자 : 각 클래스의 validation accuracy 합&lt;/li&gt;
&lt;li&gt;즉 P(y) 값은 안 쓴다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;996&quot; data-origin-height=&quot;490&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/baP75a/btrGB4H8wbZ/otKVsMExhJkO3lCsFdZK00/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/baP75a/btrGB4H8wbZ/otKVsMExhJkO3lCsFdZK00/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/baP75a/btrGB4H8wbZ/otKVsMExhJkO3lCsFdZK00/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbaP75a%2FbtrGB4H8wbZ%2FotKVsMExhJkO3lCsFdZK00%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;685&quot; height=&quot;337&quot; data-origin-width=&quot;996&quot; data-origin-height=&quot;490&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Weighted voting (weight = predicted probability for each class)&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1093&quot; data-origin-height=&quot;583&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/baAv3W/btrGASnV1vm/gekZE7GIcnzCk1aQMcjOtK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/baAv3W/btrGASnV1vm/gekZE7GIcnzCk1aQMcjOtK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/baAv3W/btrGASnV1vm/gekZE7GIcnzCk1aQMcjOtK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbaAv3W%2FbtrGASnV1vm%2FgekZE7GIcnzCk1aQMcjOtK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;701&quot; height=&quot;374&quot; data-origin-width=&quot;1093&quot; data-origin-height=&quot;583&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;validation accuracy, P(y=1) 동시에 고려해서 결과 aggregate 해도 된다. &lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;논리적으로 설득이 가능하다면 가능한 것이다. survey paper를 봐도, 수십가지가 된다. 뭐가 좋다고 할 수 없고, majority voting부터 시작하되 조금 더 성능 향상을 취하고 싶을 때는 가중합 방식을 고려해보면 좋다.*****&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;&lt;b&gt;Result Aggregating&lt;/b&gt;&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Stacking&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;결과값을 다른 모델의 input으로 넣는 것&lt;/li&gt;
&lt;li&gt;bagging의 결과로 여러 붓스트랩마다 결과 나옴&amp;nbsp; &lt;b&gt;&amp;rarr;&lt;span&gt;&amp;nbsp;&amp;nbsp;&lt;/span&gt;&lt;/b&gt;&lt;span&gt;그 결과들을 input으로 모델 생성&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;새로운 함수(모델)을 이용하여 aggregate 하는 것이다&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;실제로 많은 현실 문제에서 competition 같은 경우에도 현업의 데이터에서는 앙상블 기법이 매우 유용하게 쓰인다. 이 중에서 성능을 극한으로 끌어올리는 기술로서 stacking 사용된다.&amp;nbsp;&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;964&quot; data-origin-height=&quot;783&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/o4dls/btrGE40ZfeB/65PNrapMbOZPphGcSO7mCk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/o4dls/btrGE40ZfeB/65PNrapMbOZPphGcSO7mCk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/o4dls/btrGE40ZfeB/65PNrapMbOZPphGcSO7mCk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fo4dls%2FbtrGE40ZfeB%2F65PNrapMbOZPphGcSO7mCk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;592&quot; height=&quot;481&quot; data-origin-width=&quot;964&quot; data-origin-height=&quot;783&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;알고리즘&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1229&quot; data-origin-height=&quot;449&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cXLY6b/btrGzEiViRd/nqnvakSH8bJu1WyqGQg4mK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cXLY6b/btrGzEiViRd/nqnvakSH8bJu1WyqGQg4mK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cXLY6b/btrGzEiViRd/nqnvakSH8bJu1WyqGQg4mK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcXLY6b%2FbtrGzEiViRd%2FnqnvakSH8bJu1WyqGQg4mK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;805&quot; height=&quot;294&quot; data-origin-width=&quot;1229&quot; data-origin-height=&quot;449&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Bagging: Example
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;이래서 복잡도가 높은 모델과 합이 잘 맞는구나..!!&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;975&quot; data-origin-height=&quot;702&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bbEh1H/btrGEntuLIp/02kxfiDGMFJ5JKsVb6oK40/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bbEh1H/btrGEntuLIp/02kxfiDGMFJ5JKsVb6oK40/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bbEh1H/btrGEntuLIp/02kxfiDGMFJ5JKsVb6oK40/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbbEh1H%2FbtrGEntuLIp%2F02kxfiDGMFJ5JKsVb6oK40%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;672&quot; height=&quot;484&quot; data-origin-width=&quot;975&quot; data-origin-height=&quot;702&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;결과물 취합하는 방식은, logical 하다면 무엇이든 사용될 수 있다.&amp;nbsp;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;703&quot; data-origin-height=&quot;474&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/eGaDgi/btrGF6Tkisp/uSBX0nujfXJcIcdHHmTNMk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/eGaDgi/btrGF6Tkisp/uSBX0nujfXJcIcdHHmTNMk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/eGaDgi/btrGF6Tkisp/uSBX0nujfXJcIcdHHmTNMk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FeGaDgi%2FbtrGF6Tkisp%2FuSBX0nujfXJcIcdHHmTNMk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;653&quot; height=&quot;440&quot; data-origin-width=&quot;703&quot; data-origin-height=&quot;474&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Out of bag error (OOB error)&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;배깅을 사용할 경우, 학습/검증 집합을 사전에 나누지 않고 붓스트랩에 포함되지 않는 데이터들을 검증 집합으로 사용함&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;즉, train과 test만 나눠놓고, train data에서 OOB 데이터셋들로 검증할 수 있음&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Machine Learning/Algorithm</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/115</guid>
      <comments>https://dive-into-ds.tistory.com/115#entry115comment</comments>
      <pubDate>Tue, 5 Jul 2022 22:18:25 +0900</pubDate>
    </item>
    <item>
      <title>Ensemble model - 핵심 아이디어/효과, 배깅-부스팅 비교</title>
      <link>https://dive-into-ds.tistory.com/114</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;앙상블의 목적 : 다수의 모델을 학습하여 오류의 감소를 추구&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;분산의 감소에 의한 오류 감소 : 배깅(Bagging), 배깅의 special case인 랜덤 포레스트(Random Forest)&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;편향의 감소에 의한 오류 감소 : 부스팅(Boosting) - Adaboost, GBM, XGboost, LightGBM, Catboost 등&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;분산과 편향의 동시 감소 : Mixture of Experts - 존재하기는 하나, 실제로 모델링할 때 control이 상당히 어려움&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;앙상블 구성의 두 가지 핵심 아이디어&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;다양성(diversity)을 어떻게 확보할 것인가?&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;최종 결과물을 어떻게 결합(combine, aggregate)할 것인가?&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;앙상블의 효과&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1517&quot; data-origin-height=&quot;835&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cDc5Pa/btrGzOK0AqS/D6LdUOgXOIDaF1yzrnxDTk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cDc5Pa/btrGzOK0AqS/D6LdUOgXOIDaF1yzrnxDTk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cDc5Pa/btrGzOK0AqS/D6LdUOgXOIDaF1yzrnxDTk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcDc5Pa%2FbtrGzOK0AqS%2FD6LdUOgXOIDaF1yzrnxDTk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1517&quot; height=&quot;835&quot; data-origin-width=&quot;1517&quot; data-origin-height=&quot;835&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;앙상블의 다양성&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1492&quot; data-origin-height=&quot;701&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/deY84W/btrGAqQFBnE/HK4INGGkL9fGbtiihKpdbk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/deY84W/btrGAqQFBnE/HK4INGGkL9fGbtiihKpdbk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/deY84W/btrGAqQFBnE/HK4INGGkL9fGbtiihKpdbk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FdeY84W%2FbtrGAqQFBnE%2FHK4INGGkL9fGbtiihKpdbk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1492&quot; height=&quot;701&quot; data-origin-width=&quot;1492&quot; data-origin-height=&quot;701&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;letter-spacing: 0px;&quot;&gt;Implicit : train data를 다르게 subset 구성하여 학습시키는 방식&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;letter-spacing: 0px;&quot;&gt;Explicit : 모델에게 이전 모델과는 다른 이런 것을 학습해야 한다. 라고 명시적으로 알려주는 방식&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1484&quot; data-origin-height=&quot;807&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cH5air/btrGAqwoxjF/GTYY2KvWIFVKE3W0wOtkMk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cH5air/btrGAqwoxjF/GTYY2KvWIFVKE3W0wOtkMk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cH5air/btrGAqwoxjF/GTYY2KvWIFVKE3W0wOtkMk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcH5air%2FbtrGAqwoxjF%2FGTYY2KvWIFVKE3W0wOtkMk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1484&quot; height=&quot;807&quot; data-origin-width=&quot;1484&quot; data-origin-height=&quot;807&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Bagging이 병렬처리 가능해서 학습속도가 더 빠를 것 같지만 그렇지 않다.
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;base learner가 계산 복잡도가 높은 Neural Net과 같은 무거운 모델인 경우가 많다. 하나 돌아가는데도 시간이 매우 걸린다.&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Boosting은 tree와 같은 단순한 모델 사용하기 때문에, 순차적으로 학습함에도 불구하고 학습시간이 더 빠르다.&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Machine Learning/Algorithm</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/114</guid>
      <comments>https://dive-into-ds.tistory.com/114#entry114comment</comments>
      <pubDate>Tue, 5 Jul 2022 16:55:05 +0900</pubDate>
    </item>
    <item>
      <title>Anomaly Detection - Extended Isolation Forest</title>
      <link>https://dive-into-ds.tistory.com/113</link>
      <description>&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Isolation Forest&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;첫번째 그림 : 구 형태로 등고선이 그려지는게 맞을 것&lt;/li&gt;
&lt;li&gt;오른쪽 그림 : 정상 데이터가 없는데도 불구하고, 정상으로 판단할 수 있는 영역이 생김&lt;/li&gt;
&lt;li&gt;마지막 그림 : sin 함수 곡선의 사이사이 모두 정상으로 판단&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;918&quot; data-origin-height=&quot;497&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/rxnq4/btrGuSVKS9w/t4sRJjAs5scA3Qzu0LMvYK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/rxnq4/btrGuSVKS9w/t4sRJjAs5scA3Qzu0LMvYK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/rxnq4/btrGuSVKS9w/t4sRJjAs5scA3Qzu0LMvYK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Frxnq4%2FbtrGuSVKS9w%2Ft4sRJjAs5scA3Qzu0LMvYK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;696&quot; height=&quot;377&quot; data-origin-width=&quot;918&quot; data-origin-height=&quot;497&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Extended IF&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;기존의 IF는 변수의 값으로 split 하기 때문에, 각 축에 대해서 수직 혹은 수평인 방향으로 split을 하게 됨&lt;/li&gt;
&lt;li&gt;&lt;b&gt;기울기를 적용하자. 기울기도 random하게 적용할 것이다.&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1434&quot; data-origin-height=&quot;752&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/wupfo/btrGw2ckbFO/JmKpIqUnTMnCS7oFtESiL0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/wupfo/btrGw2ckbFO/JmKpIqUnTMnCS7oFtESiL0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/wupfo/btrGw2ckbFO/JmKpIqUnTMnCS7oFtESiL0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fwupfo%2FbtrGw2ckbFO%2FJmKpIqUnTMnCS7oFtESiL0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;702&quot; height=&quot;368&quot; data-origin-width=&quot;1434&quot; data-origin-height=&quot;752&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Example&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;Standard IF : 항상 축에 수직인 직선을 이용해서 영역을 분할함&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;Extended IF : 분할하는 직선의 slope가 random하게 적용되어 분할&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;872&quot; data-origin-height=&quot;444&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/wE4fv/btrGzDPZB6a/2U5ExwskU00wqkZJlZY221/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/wE4fv/btrGzDPZB6a/2U5ExwskU00wqkZJlZY221/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/wE4fv/btrGzDPZB6a/2U5ExwskU00wqkZJlZY221/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FwE4fv%2FbtrGzDPZB6a%2F2U5ExwskU00wqkZJlZY221%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;677&quot; height=&quot;345&quot; data-origin-width=&quot;872&quot; data-origin-height=&quot;444&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;동그라미친 부분은 상대적으로 split을 많이 필요로 하는 구간이기 때문에 anomaly score가 낮음&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1342&quot; data-origin-height=&quot;758&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/XrAUK/btrGupsPKQY/I0dnnITivxUvDOT9ee03Wk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/XrAUK/btrGupsPKQY/I0dnnITivxUvDOT9ee03Wk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/XrAUK/btrGupsPKQY/I0dnnITivxUvDOT9ee03Wk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FXrAUK%2FbtrGupsPKQY%2FI0dnnITivxUvDOT9ee03Wk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;660&quot; height=&quot;373&quot; data-origin-width=&quot;1342&quot; data-origin-height=&quot;758&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;알고리즘
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;random 하게 intercept와 slope를 할당함&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;685&quot; data-origin-height=&quot;787&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bYxJfv/btrGzjYxIuX/0gr0xPkkx6uZkj9Zk2LF0K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bYxJfv/btrGzjYxIuX/0gr0xPkkx6uZkj9Zk2LF0K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bYxJfv/btrGzjYxIuX/0gr0xPkkx6uZkj9Zk2LF0K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbYxJfv%2FbtrGzjYxIuX%2F0gr0xPkkx6uZkj9Zk2LF0K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;468&quot; height=&quot;538&quot; data-origin-width=&quot;685&quot; data-origin-height=&quot;787&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1226&quot; data-origin-height=&quot;788&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/1qifO/btrGuUeZXFi/sle5gvwLB8ByCyGwDkwQl0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/1qifO/btrGuUeZXFi/sle5gvwLB8ByCyGwDkwQl0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/1qifO/btrGuUeZXFi/sle5gvwLB8ByCyGwDkwQl0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F1qifO%2FbtrGuUeZXFi%2Fsle5gvwLB8ByCyGwDkwQl0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;571&quot; height=&quot;367&quot; data-origin-width=&quot;1226&quot; data-origin-height=&quot;788&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;실제 데이터에서 IF가 잘 작동하는 데이터가 자주 발생하느냐? 그렇지는 않다&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;교수님 개인적으로는, Standard IF 먼저 해보고 성능이 안 나오면 그 다음 단계로 Extended IF를 시도하는 것을 추천&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;</description>
      <category>Machine Learning/Algorithm</category>
      <author>고슴군</author>
      <guid isPermaLink="true">https://dive-into-ds.tistory.com/113</guid>
      <comments>https://dive-into-ds.tistory.com/113#entry113comment</comments>
      <pubDate>Tue, 5 Jul 2022 14:35:03 +0900</pubDate>
    </item>
  </channel>
</rss>