<?php
//
// ±¹¸³±¹¾î¿ø Ç¥Áر¹¾î´ë»çÀü Å¬¸®ÇΠÇÁ·Î±×·¥
//
// ÀÌ ÇÁ·Î±×·¥Àº ´©±¸³ª ¼öÁ¤ÇÒ ¼ö ÀÖÁö¸¸ ¹èÆ÷´Â ±ÝÁöÇÕ´Ï´Ù.
// ÀÌ ÇÁ·Î±×·¥¿¡ ´ëÇÑ ¾î¶°ÇÑ Áú¹®µµ ¹ÞÁö ¾Ê½À´Ï´Ù.
// µû¶ó¼­ ÀÌ ÇÁ·Î±×·¥À» ½ÇÇàÇؼ­ Àß µ¿ÀÛÇϸ頿ô°í, ±×·¸Áö ¾ÊÀ¸¸é ÀÌ ÇÁ·Î±×·¥À» ¹Ù·Î Áö¿ì±â ¹Ù¶ø´Ï´Ù.
// 
// Æǿø²µµ ¾ø°í ¹ö±× ¼öÁ¤µµ ¾ø½À´Ï´Ù.
// ÀÌ ÇÁ·Î±×·¥À» »ç¿ëÇÔÀ¸·Î ¹ß»ýÇϴ ¸ðµç Ã¥ÀÓÀº º»ÀΠ½º½º·Î¿¡°Ô ÀÖ½À´Ï´Ù.
//
// $min, $max·Î Å¬¸®ÇΠ¹üÀ§¸¦ ÁöÁ¤
// ÇÁ·Î±×·¥°ú °°Àº Æú´õ¿¡ korea.$max.txt ÇüÅÂÀÇ ÆÄÀϷΠµ¥ÀÌŸ°¡ Å¬¸®ÇεÊ
// À̹ÌÁö°¡ ÀÖÀ¸¸é ÀÚµ¿À¸·Î À̹ÌÁö¸¦ ³»·Á¹ÞÀ½
// À̹ÌÁö »Ó¸¸ ¾Æ´Ï¶ó º»¹®¿¡ Æ÷ÇԵȠÇÑÀÚ, °í¾î, ¼ö½Äµî ¸ðµç À̹ÌÁö¸¦ Ã³¸®
//
set_time_limit(0);
include_once(
'simple_html_dom.php');
$min=1;
$max=518063;

function 
getQuery($host_ip$port$query) {
    global 
$nuke_url;
    
$hostname ereg_replace("^http://([^/]*)[/]*""\\1"$nuke_url);
    
$referer "http://".$_SERVER["HTTP_HOST"].$_SERVER["REQUEST_URI"];
    
$fp = @fsockopen($host_ip$port,  &$errno, &$errstr10);
    if(!
$fp) {
        echo 
"$errstr: $errno <br>\n";
    }else {
        @
fwrite($fp"GET $query HTTP/1.0\r\nHost: $hostname\r\nUser-Agent: DoA/1.1\nReferer: $referer\nConnection: Close\r\n\r\n");
        while(!@
feof($fp)) {
            
$list .= @fgets($fp1024);
        }
    }
    @
fclose($fp); 

    list(
$header$body) = preg_split("/\r\n\r\n/"$list2);
    return 
$body;
}

function 
getDict($str_html) {
    
$str_html str_replace('¡®<¡¯''¡®<¡¯',$str_html);
    
$str_html str_replace('¡®>¡¯''¡®>¡¯',$str_html);
    
$str_html strip_tags($str_html'<div><ul><li><img><span><td><br>');
    
$pattern=array('sword''provtitle','exp''sdblue''NumRG''NumRG2''NumNO''Use_icon''idiom_list''prov''idiom');
    
$replace=array(
        
'font-family:AppleGothic, Sans-serif ; font-size:14px; font-weight:bold; color:#336699'
        
''
        
'font-family:AppleGothic, Sans-serif;color:#000000; line-height:1.5; padding:5px;list-style: none;'
        
'font-family:AppleGothic, Sans-serif; font-size:13px;color:#336699;',
        
'font-family:tahoma; font-weight:bold;color:#549606; padding-top:-10px; margin-bottom:-0.1;vertical-align:top'
        
'font-family:tahoma;  font-weight:bold;color:#336699;padding-left:8px; margin-top:-10;vertical-align:top;',
        
'width:45px; font-weight:bold;color:#cb4a00;vertical-align:top; text-align:right'
        
'color:#444444; padding-left:20px;font-size: 13px',
        
'list-style: none;padding:5px;',
        
'background:url(icon_prov.gif) no-repeat; background-position:0px 5px; padding:5px 0px 5px 30px;line-height:1.3'
        
'background:url(icon_idiom.gif) no-repeat;background-position:0px 5px;  padding:5px 0px 5px 30px; line-height:1.3');
    
$str_html str_replace($pattern$replace$str_html);
    
$htmlstr_get_html($str_html);

    foreach(
$html->find('span[id=print_area]') as $article) {

        
$item['title1'] = trim($article->find('span.word_title'0)->plaintext);
        
$pattern = array("^""-""0""1""2""3""4""5""6""7""8""9");
        
$replace = array('''''''''''''''''''''''''''');
        
$item['title1'] = str_replace($pattern$replace$item['title1']);
        
$item['title2'] = trim($article->find('td.pb10'0)->plaintext);
        
$item['use']=trim($article->find('td[id=use_title]'0)->plaintext);
        
$pattern = array("\r\n""\n""\r""\t"" ");
        
$replace = array('''''''''');
        
$item['title2'] = str_replace($pattern$replace$item['title2']);
        
$item['use'] = str_replace($pattern$replace$item['use']);
        list(
$temp$item['use'])=split(':'$item['use']);
        
$item['list1'] = trim($article->find('div.list'0)->innertext);
        if(
$article->find('div.list2'0)) {
            
$item['list1'] .= "<br>".trim($article->find('div.list2'0)->innertext);
            
$diff++;
        }
        if(
$article->find('div.list2'1)) {
            
$item['list1'] .= "<br>".trim($article->find('div.list2'1)->innertext);
            
$diff++;
        }

        
$pattern = array("\r\n""\n""\r""\t");
        
$replace = array('''''''');
        
$item['list1'] = str_replace($pattern$replace$item['list1']);
        
$item['list2'] = trim($article->find('div'1+$diff)->innertext);
        
$item['list2'] = str_replace($pattern$replace$item['list2']);
        
$item['list3'] = trim($article->find('div'2+$diff)->innertext);
        
$item['list3'] = str_replace($pattern$replace$item['list3']);
        
$item['list4'] = trim($article->find('div'3+$diff)->innertext);
        
$item['list4'] = str_replace($pattern$replace$item['list4']);
        
$item['list5'] = trim($article->find('div'4+$diff)->innertext);
        
$item['list5'] = str_replace($pattern$replace$item['list5']);
        
$item['list6'] = trim($article->find('div'5+$diff)->innertext);
        
$item['list6'] = str_replace($pattern$replace$item['list6']);
    }

    
$html->clear();
    unset(
$html);
    return 
$item;
}

function 
getHtml($num) {
    
$query '/search/View.jsp?idx='.$num;
    
$str_html getQuery('stdweb2.korean.go.kr'80$query);
    return 
$str_html;
}

function 
saveImg($url) {
    
$host=parse_url($url);
    
$img getQuery($host['host'], 80$host['path']);
    
$file=split('/'$host['path']);
    
$filename=array_pop($file);
    
$fp=fopen($filename'w');
    
fwrite($fp$img);
    
fclose($fp);
    return 
$filename;
}

function 
getImgInfo($img) {
    
$htmlstr_get_html($img);
    
$url=$html->find('img'0)->src;
    
$alt=$html->find('img'0)->alt;
    return array(
$url$alt);
}

function 
saveDict($filename$cont) {
    
$fp=fopen($filename'a');
    
fwrite($fp$cont);
    
fclose($fp);
}

header("Content-Type: text/html; charset=utf-8");

for(
$i=$min$i <= $max$i++) {
$str_html=getHtml($i);
$dict=getDict($str_html);

if(
$dict['img']!='') {
    
$dict['img']=saveImg($dict['img']);
}

if(
$dict['title1']=='') {
    
$count++;
    continue;
}
$saveDict $dict['title1']."\t<div style='font-weight: bold; font-size: 20px; color: #0000ff'>".$dict['title2']."</div>";
if(
$dict['use']!=''$saveDict .= "<br><b>¢Â È°¿ë</b>:".$dict['use']."<br>";

if(
$dict['list1']!=''){
    if(
preg_match("/<img.*/",$dict['list1'])) {
        list(
$url$alt) = getImgInfo($dict['list1']);
        
$filename=saveImg($url);
        if(
preg_match("/\.gif/is"$filename)) {
            
$dict['list1']=str_replace($url"\"$filename\""$dict['list1']);
            
$saveDict .= $dict['list1'];
        }else 
$saveDict .= "<center><img src='".$filename."' width='300'><br>".$alt."</center>";
    }else 
$saveDict .= $dict['list1'];
}

if(
$dict['list2']!=''){
    if(
preg_match("/<img.*/",$dict['list2'])) {
        list(
$url$alt) = getImgInfo($dict['list2']);
        
$filename=saveImg($url);
        if(
preg_match("/\.gif/is"$filename)) {
            
$dict['list2']=str_replace($url"\"$filename\""$dict['list2']);
            
$saveDict .= $dict['list2'];
        }else 
$saveDict .= "<center><img src='".$filename."' width='300'><br>".$alt."</center>";
    }else 
$saveDict .= "<br>".$dict['list2'];
}

if(
$dict['list3']!=''){
    if(
preg_match("/<img.*/",$dict['list3'])) {
        list(
$url$alt) = getImgInfo($dict['list3']);
        
$filename=saveImg($url);
        if(
preg_match("/\.gif/is"$filename)) {
            
$dict['list3']=str_replace($url"\"$filename\""$dict['list3']);
            
$saveDict .= $dict['list3'];
        }else 
$saveDict .= "<center><img src='".$filename."' width='300'><br>".$alt."</center>";
    }else 
$saveDict .= "<br>".$dict['list3'];
}

if(
$dict['list4']!=''){
    if(
preg_match("/<img.*/",$dict['list4'])) {
        list(
$url$alt) = getImgInfo($dict['list4']);
        
$filename=saveImg($url);
        if(
preg_match("/\.gif/is"$filename)) {
            
$dict['list4']=str_replace($url"\"$filename\""$dict['list4']);
            
$saveDict .= $dict['list4'];
        }else 
$saveDict .= "<center><img src='".$filename."' width='300'><br>".$alt."</center>";
    }else 
$saveDict .= "<br>".$dict['list4'];
}

if(
$dict['list5']!=''){
    if(
preg_match("/<img.*/",$dict['list5'])) {
        list(
$url$alt) = getImgInfo($dict['list5']);
        
$filename=saveImg($url);
        if(
preg_match("/\.gif/is"$filename)) {
            
$dict['list5']=str_replace($url"\"$filename\""$dict['list5']);
            
$saveDict .= $dict['list5'];
        }else 
$saveDict .= "<center><img src='".$filename."' width='300'><br>".$alt."</center>";
    }else 
$saveDict .= "<br>".$dict['list5'];
}

if(
$dict['list6']!=''){
    if(
preg_match("/<img.*/",$dict['list6'])) {
        list(
$url$alt) = getImgInfo($dict['list6']);
        
$filename=saveImg($url);
        if(
preg_match("/\.gif/is"$filename)) {
            
$dict['list6']=str_replace($url"\"$filename\""$dict['list6']);
            
$saveDict .= $dict['list6'];
        }else 
$saveDict .= "<center><img src='".$filename."' width='300'><br>".$alt."</center>";
    }else 
$saveDict .= "<br>".$dict['list6'];
}

$saveDict=str_replace('class=''style='$saveDict);
$saveDict=str_replace('id="idiom_list"''style="list-style: none;"'$saveDict);
$saveDict=str_replace('id="''style="'$saveDict);
$saveDict=preg_replace("/<span style=\"Definition\">(.*?)<\/span>/is""$1"$saveDict);
$saveDict=preg_replace("/<ul style=\"\">(.*?)<\/ul>/is""<span style='font-size: 20px; color: #CC00FF; font-weight: bold;'>$1</span>"$saveDict);
$saveDict=preg_replace("/<span style=\"color:#444444; padding-left:20px;font-size: 13px\">¢Ò<\/span> *<span style=\"Use\">(.*?)<\/span>/is""<div style='color:#444444; padding-left: 20px; font-size: 12px; line-height: 120%'>¢Ò $1</div>"$saveDict);
    
saveDict('korea'.$max.'.txt'$saveDict."\n");
    
$time=date("H:i:s");
    echo 
"<br>$i : $count : $time -".$saveDict;
}
?>