| 
 public static void initMRJob(Path crawlDb, Path linkDb,  
                           Collection segments,  
                           JobConf job) {  
 
    final String DIR_CACHE = "content";  
 
    LOG.info("IndexerMapReduce: crawldb: " + crawlDb);  
    LOG.info("IndexerMapReduce: linkdb: " + linkDb);  
 
    for (final Path segment : segments) {  
      LOG.info("IndexerMapReduces: adding segment: " + segment);  
      FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.FETCH_DIR_NAME));  
      FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.PARSE_DIR_NAME));  
      FileInputFormat.addInputPath(job, new Path(segment, ParseData.DIR_NAME));  
      FileInputFormat.addInputPath(job, new Path(segment, ParseText.DIR_NAME));  
 
      FileInputFormat.addInputPath(job, new Path(segment, DIR_CACHE));  
    }  
 
    FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));  
    FileInputFormat.addInputPath(job, new Path(linkDb, LinkDb.CURRENT_NAME));  
    job.setInputFormat(SequenceFileInputFormat.class);  
 
    job.setMapperClass(IndexerMapReduce.class);  
    job.setReducerClass(IndexerMapReduce.class);  
 
    job.setOutputFormat(IndexerOutputFormat.class);  
    job.setOutputKeyClass(Text.class);  
    job.setMapOutputValueClass(NutchWritable.class);  
    job.setOutputValueClass(NutchWritable.class);  
  }  
 
 |