$ curl http://elasticmapreduce.s3.amazonaws.com/bootstrap-actions/configurations/latest/memory-intensive
#!/usr/bin/ruby
## Copyright 2010-2010 Amazon.com, Inc. or its affiliates. All Rights Reserved.
## Licensed under the Apache License, Version 2.0 (the "Liense"). You may not use this file except in compliance with the License.
## A copy of the License is located at http://aws.Amazon/apache2.0/
## or in the "license" file accompanying this file.
## This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and limitations under the License.
require 'json'
require 'hpricot'
require 'tempfile'
CONFIG_HEADER = "<?xml version=\"1.0\"?>\n<?xml-stylesheet type=\"text/xsl\" href=\"configuration.xsl\"?>"
conf_fields = [
{ :field => "mapred.child.java.opts={VAR}", :roles => [:slave] },
{ :field => "mapred.tasktracker.map.tasks.maximum={VAR}", :roles => [:master, :slave] },
{ :field => "mapred.tasktracker.reduce.tasks.maximum={VAR}", :roles => [:master, :slave] }
]
configs = {
"m1.small" => ["-Xmx512m", "2", "1"],
"m1.large" => ["-Xmx1024m", "3", "1"],
"m1.xlarge" => ["-Xmx1024m", "8", "3"],
"c1.medium" => ["-Xmx512m", "2", "1"],
"c1.xlarge" => ["-Xmx512m", "7", "2"],
"m2.xlarge" => ["-Xmx3072m", "3", "1"],
"m2.2xlarge" => ["-Xmx4096m", "6", "2"],
"m2.4xlarge" => ["-Xmx4096m", "14", "4"]
}
heap_fields = [
{ :field => "HADOOP_NAMENODE_HEAPSIZE={VAR}", :roles => [:master, :slave] },
{ :field => "HADOOP_JOBTRACKER_HEAPSIZE={VAR}", :roles => [:master, :slave] },
{ :field => "HADOOP_TASKTRACKER_HEAPSIZE={VAR}", :roles => [:master, :slave] },
{ :field => "HADOOP_DATANODE_HEAPSIZE={VAR}", :roles => [:master, :slave] }
]
heaps = {
"m1.small" => ["512", "512", "256", "128"],
"m1.large" => ["1024", "3072", "512", "512"],
"m1.xlarge" => ["3072", "9216", "512", "512"],
"c1.medium" => ["512", "768", "256", "128"],
"c1.xlarge" => ["1024", "2048", "512", "512"],
"m2.xlarge" => ["2048", "4096", "512", "512"],
"m2.2xlarge" => ["2048", "8192", "1024", "1024"],
"m2.4xlarge" => ["8192", "8192", "1024", "1024"]
}
def parse_config_file(config_file_path)
ret = []
if File.exist?(config_file_path) then
doc = open(config_file_path) { |f| Hpricot(f) }
(doc/"configuration"/"property").each do |property|
val = {:name => (property/"name").inner_html, :value => (property/"value").inner_html }
if (property/"final").inner_html != "" then
val[:final] = (property/"final").inner_html
end
ret << val
end
else
puts "#{config_file_path} does not exist, assuming empty configuration"
end
return ret
end
def dump_config_file(file_name, config)
open(file_name, 'w') do |f|
f.puts CONFIG_HEADER
f.puts '<configuration>'
for entry in config
f.print " <property><name>#{entry[:name]}</name><value>#{entry[:value]}</value>"
if entry[:final] then
f.print "<final>#{entry[:final]}</final>"
end
f.puts '</property>'
end
f.puts '</configuration>'
end
end
def merge_config(default, overwrite)
for entry in overwrite
cells = default.select { |x| x[:name] == entry[:name]}
if cells.size == 0 then
puts "'#{entry[:name]}': default does not have key, appending value '#{entry[:value]}'"
default << entry
elsif cells.size == 1 then
puts "'#{entry[:name]}': new value '#{entry[:value]}' overwriting '#{cells[0][:value]}'"
cells[0].replace(entry)
else
raise "'#{entry[:name]}': default has #{cells.size} keys"
end
end
end
def do_overwrites(conf_list, heap_list)
file = "/home/hadoop/conf/mapred-site.xml"
default = parse_config_file(file)
for arg in conf_list
puts "Processing default file #{file} with overwrite #{arg}"
key = arg.split('=', 2)[0]
value = arg.split('=', 2)[1]
overwrite = [{:name => key, :value => value }]
merge_config(default,overwrite)
end
dump_config_file(file + ".new", default)
if File.exist?(file) then
File.rename(file, file + ".old")
end
File.rename(file + ".new", file)
puts "Saved #{file} with overwrites. Original saved to #{file}.old"
file = "/home/hadoop/conf/hadoop-user-env.sh"
if File.exist?(file) then
File.delete(file)
end
open(file, 'w') do |f|
f.puts "#!/bin/bash"
for arg in heap_list
f.puts arg
end
end
end
class JsonInfoFile
INFO_DIR = "/mnt/var/lib/info/"
def initialize(file_type)
@json = JSON.parse(File.read(File.join(INFO_DIR, file_type + ".json")))
end
def [](json_path)
json = @json
begin
path = json_path.split('.')
visited = []
for item in path
if !json.kind_of? Hash then
raise "#{visited.join('.')} not of type object, got '#{json.inspect}' from #{@json.inspect}"
end
visited << item
json = json[item]
end
if json == nil then
raise "#{visited.join('.')} does not exist"
end
return json
rescue
puts "Unable to process path '#{json_path}', #{$!}"
exit -1
end
end
end
def warn(msg)
STDERR.puts "#{Time.now.utc} WARN " + msg
end
def substitute_in(row, fields, instance_role)
if row.size != fields.size then
raise RuntimeError, "Incompatible row and field list row=#{row}, fields=#{fields}"
end
result = []
for index in 0 ... row.size do
if fields[index][:roles].include?(instance_role) then
result << fields[index][:field].sub('{VAR}', row[index])
end
end
return result
end
HPC_INSTANCE_TYPES = [ "cc1.4xlarge", "cg1.4xlarge" ]
jobflow_info = JsonInfoFile.new("job-flow")
instance_info = JsonInfoFile.new("instance")
if instance_info['isMaster'].to_s == 'true' then
instance_type = jobflow_info["masterInstanceType"]
instance_role = :master
else
instance_group_id = instance_info['instanceGroupId']
instance_groups = jobflow_info['instanceGroups']
index = instance_groups.index { |g| g['instanceGroupId'] == instance_group_id }
instance_group = instance_groups[index]
instance_type = instance_group['instanceType']
instance_role = :slave
end
if HPC_INSTANCE_TYPES.include?(instance_type) then
warn "This bootstrap action is not supported for the HPC instances (cc1.4xlarge and cg1.4xlarge)"
else
conf_list = substitute_in(configs[instance_type], conf_fields, instance_role)
heap_list = substitute_in(heaps[instance_type], heap_fields, instance_role)
do_overwrites(conf_list, heap_list)
end
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2013-03-11T11:31:41.005Z INFO Fetching jar file.
2013-03-11T11:31:48.146Z INFO Working dir /mnt/var/lib/hadoop/steps/3
2013-03-11T11:31:48.146Z INFO Executing /usr/lib/jvm/java-6-sun/bin/java -cp /home/hadoop/conf:/usr/lib/jvm/java-6-sun/lib/tools.jar:/home/hadoop:/home/hadoop/hadoop-core.jar:/home/hadoop/hadoop-tools.jar:/home/hadoop/hadoop-core-0.20.205.jar:/home/hadoop/hadoop-tools-0.20.205.jar:/home/hadoop/lib/*:/home/hadoop/lib/jetty-ext/* -Xmx1000m -Dhadoop.log.dir=/mnt/var/log/hadoop/steps/3 -Dhadoop.log.file=syslog -Dhadoop.home.dir=/home/hadoop -Dhadoop.id.str=hadoop -Dhadoop.root.logger=INFO,DRFA -Djava.io.tmpdir=/mnt/var/lib/hadoop/steps/3/tmp -Djava.library.path=/home/hadoop/native/Linux-i386-32 org.apache.hadoop.util.RunJar /mnt/var/lib/hadoop/steps/3/script-runner.jar s3://ap-northeast-1.elasticmapreduce/libs/hive/hive-script --run-hive-script --base-path s3://ap-northeast-1.elasticmapreduce/libs/hive/ --hive-versions latest --args -f s3n://memorycraft-archive/script/s3.hql -d INPUT_BUCKET_LOCATION=s3://memorycraft-log/logs/ -d OUTPUT_BUCKET_LOCATION=s3://memorycraft-archive/rslt/ -d YYYY=2013 -d MM=03 -d DD=04
2013-03-11T11:34:20.383Z INFO Execution ended with ret val 255
2013-03-11T11:34:20.384Z WARN Step failed with bad retval
2013-03-11T11:34:26.454Z INFO Step created jobs: job_201303111128_0001
stderr
Logging initialized using configuration in file:/home/hadoop/.versions/hive-0.8.1/conf/hive-log4j.properties
Hive history file=/mnt/var/lib/hive_081/tmp/history/hive_job_log_hadoop_201303111132_1431275973.txt
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/hadoop/lib/slf4j-log4j12-1.6.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/hadoop/.versions/hive-0.8.1/lib/slf4j-log4j12-1.6.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
OK
Time taken: 13.475 seconds
OK
Time taken: 0.765 seconds
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Starting Job = job_201303111128_0001, Tracking URL = http://ip-10-121-17-157.ap-northeast-1.compute.internal:9100/jobdetails.jsp?jobid=job_201303111128_0001
Kill Command = /home/hadoop/bin/hadoop job -Dmapred.job.tracker=10.121.17.157:9001 -kill job_201303111128_0001
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2013-03-11 11:33:11,849 Stage-1 map = 0%, reduce = 0%
2013-03-11 11:34:12,380 Stage-1 map = 0%, reduce = 0%
2013-03-11 11:34:19,431 Stage-1 map = 100%, reduce = 100%
Ended Job = job_201303111128_0001 with errors
Error during job, obtaining debugging information...
Examining task ID: task_201303111128_0001_m_000002 (and more) from job job_201303111128_0001
Exception in thread "Thread-47" java.lang.RuntimeException: Error while reading from task log url
at org.apache.hadoop.hive.ql.exec.errors.TaskLogProcessor.getErrors(TaskLogProcessor.java:130)
at org.apache.hadoop.hive.ql.exec.JobDebugger.showJobFailDebugInfo(JobDebugger.java:211)
at org.apache.hadoop.hive.ql.exec.JobDebugger.run(JobDebugger.java:81)
at java.lang.Thread.run(Thread.java:662)
Caused by: java.io.IOException: Server returned HTTP response code: 400 for URL: http://10.122.24.232:9103/tasklog?taskid=attempt_201303111128_0001_m_000000_1&start=-8193
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1436)
at java.net.URL.openStream(URL.java:1010)
at org.apache.hadoop.hive.ql.exec.errors.TaskLogProcessor.getErrors(TaskLogProcessor.java:120)
... 3 more
Counters:
FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask
MapReduce Jobs Launched:
Job 0: Map: 1 Reduce: 1 HDFS Read: 0 HDFS Write: 0 FAIL
Total MapReduce CPU Time Spent: 0 msec
Command exiting with ret '255'
2013-03-11 11:33:13,112 INFO org.apache.hadoop.util.NativeCodeLoader (main): Loaded the native-hadoop library
2013-03-11 11:33:13,268 INFO org.apache.hadoop.mapred.TaskRunner (main): Creating symlink: /mnt/var/lib/hadoop/mapred/taskTracker/distcache/-5343292120509370400_-2048022790_1496912286/10.121.17.157/mnt/var/lib/hive_081/tmp/scratch/hive_2013-03-11_11-32-20_837_75945303502865249/-mr-10001/3d096d97-37e9-47c7-a184-8152010d592a <- /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/attempt_201303111128_0001_m_000000_0/work/HIVE_PLAN3d096d97-37e9-47c7-a184-8152010d592a
2013-03-11 11:33:13,354 INFO org.apache.hadoop.filecache.TrackerDistributedCacheManager (main): Creating symlink: /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/jars/job.jar <- /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/attempt_201303111128_0001_m_000000_0/work/job.jar
2013-03-11 11:33:13,362 INFO org.apache.hadoop.filecache.TrackerDistributedCacheManager (main): Creating symlink: /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/jars/.job.jar.crc <- /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/attempt_201303111128_0001_m_000000_0/work/.job.jar.crc
2013-03-11 11:33:13,370 INFO org.apache.hadoop.filecache.TrackerDistributedCacheManager (main): Creating symlink: /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/jars/META-INF <- /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/attempt_201303111128_0001_m_000000_0/work/META-INF
2013-03-11 11:33:13,378 INFO org.apache.hadoop.filecache.TrackerDistributedCacheManager (main): Creating symlink: /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/jars/org <- /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/attempt_201303111128_0001_m_000000_0/work/org
2013-03-11 11:33:13,386 INFO org.apache.hadoop.filecache.TrackerDistributedCacheManager (main): Creating symlink: /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/jars/hive-exec-log4j.properties <- /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/attempt_201303111128_0001_m_000000_0/work/hive-exec-log4j.properties
2013-03-11 11:33:13,430 INFO org.apache.hadoop.filecache.TrackerDistributedCacheManager (main): Creating symlink: /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/jars/javaewah <- /mnt/var/lib/hadoop/mapred/taskTracker/hadoop/jobcache/job_201303111128_0001/attempt_201303111128_0001_m_000000_0/work/javaewah
2013-03-11 11:33:13,838 WARN org.apache.hadoop.metrics2.impl.MetricsSystemImpl (main): Source name ugi already exists!
2013-03-11 11:33:14,037 INFO org.apache.hadoop.mapred.MapTask (main): Host name: ip-10-122-12-88.ap-northeast-1.compute.internal
2013-03-11 11:33:14,088 INFO org.apache.hadoop.util.ProcessTree (main): setsid exited with exit code 0
2013-03-11 11:33:14,106 INFO org.apache.hadoop.mapred.Task (main): Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@1dfd868
2013-03-11 11:33:14,342 INFO com.hadoop.compression.lzo.GPLNativeCodeLoader (main): Loaded native gpl library
2013-03-11 11:33:14,358 WARN com.hadoop.compression.lzo.LzoCodec (main): Could not find build properties file with revision hash
2013-03-11 11:33:14,358 INFO com.hadoop.compression.lzo.LzoCodec (main): Successfully loaded & initialized native-lzo library [hadoop-lzo rev UNKNOWN]
2013-03-11 11:33:14,367 WARN org.apache.hadoop.io.compress.snappy.LoadSnappy (main): Snappy native library is available
2013-03-11 11:33:14,367 INFO org.apache.hadoop.io.compress.snappy.LoadSnappy (main): Snappy native library loaded
2013-03-11 11:33:14,452 WARN org.apache.hadoop.hive.conf.HiveConf (main): hive-site.xml not found on CLASSPATH
2013-03-11 11:33:16,876 INFO org.apache.hadoop.fs.s3native.NativeS3FileSystem (main): Opening 's3://memorycraft-log/logs/2013-03-04-09-16-51-45565F6169B3E2F2' for reading
2013-03-11 11:33:17,238 INFO org.apache.hadoop.io.retry.RetryInvocationHandler (main): Exception while invoking retrievePair of class org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore. Not retrying.Status Code: 403, AWS Service: Amazon S3, AWS Request ID: 2981A88871482E0B, AWS Error Code: InvalidObjectState, AWS Error Message: The operation is not valid for the object's storage class, S3 Extended Request ID: 7yHCAsfLHZKbf+5ojWMS9q2B41bBwizoqW4zldSdfEpLoLrKvI9X1WI0brHGsC49
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:548)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:288)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:170)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:2619)
at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:809)
at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:266)
at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:252)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
at org.apache.hadoop.fs.s3native.$Proxy7.retrievePair(Unknown Source)
at org.apache.hadoop.fs.s3native.NativeS3FileSystem.open(NativeS3FileSystem.java:1002)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:420)
at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:78)
at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:51)
at org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.<init>(CombineHiveRecordReader.java:65)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.initNextRecordReader(HadoopShimsSecure.java:331)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.<init>(HadoopShimsSecure.java:292)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getRecordReader(HadoopShimsSecure.java:406)
at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:549)
at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:199)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:423)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:377)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1059)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
2013-03-11 11:33:17,315 INFO org.apache.hadoop.mapred.TaskLogsTruncater (main): Initializing logs' truncater with mapRetainSize=-1 and reduceRetainSize=-1
2013-03-11 11:33:17,452 INFO org.apache.hadoop.io.nativeio.NativeIO (main): Initialized cache for UID to User mapping with a cache timeout of 14400 seconds.
2013-03-11 11:33:17,453 INFO org.apache.hadoop.io.nativeio.NativeIO (main): Got UserName hadoop for UID 106 from the native implementation
2013-03-11 11:33:17,456 WARN org.apache.hadoop.mapred.Child (main): Error running child
java.io.IOException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.initNextRecordReader(HadoopShimsSecure.java:345)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.<init>(HadoopShimsSecure.java:292)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getRecordReader(HadoopShimsSecure.java:406)
at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:549)
at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:199)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:423)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:377)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1059)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileRecordReader.initNextRecordReader(HadoopShimsSecure.java:331)
... 11 more
Caused by: Status Code: 403, AWS Service: Amazon S3, AWS Request ID: 2981A88871482E0B, AWS Error Code: InvalidObjectState, AWS Error Message: The operation is not valid for the object's storage class, S3 Extended Request ID: 7yHCAsfLHZKbf+5ojWMS9q2B41bBwizoqW4zldSdfEpLoLrKvI9X1WI0brHGsC49
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:548)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:288)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:170)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:2619)
at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:809)
at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:266)
at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.retrievePair(Jets3tNativeFileSystemStore.java:252)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
at org.apache.hadoop.fs.s3native.$Proxy7.retrievePair(Unknown Source)
at org.apache.hadoop.fs.s3native.NativeS3FileSystem.open(NativeS3FileSystem.java:1002)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:420)
at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:78)
at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:51)
at org.apache.hadoop.hive.ql.io.CombineHiveRecordReader.<init>(CombineHiveRecordReader.java:65)
... 16 more
2013-03-11 11:33:17,500 INFO org.apache.hadoop.mapred.Task (main): Runnning cleanup for the task
内容をみると、 Error Message: The operation is not valid for the object's storage class, S3 Extended Request ID
となっており、ストレージクラスが良くないと言われています。
# /usr/local/cassandra/bin/cqlsh --help
Usage: cqlsh [options] [host [port]]
CQL Shell for Apache Cassandra
Options:
--version show program's version number and exit
-h, --help show this help message and exit
-C, --color Always use color output
--no-color Never use color output
-u USERNAME, --username=USERNAME
Authenticate as user.
-p PASSWORD, --password=PASSWORD
Authenticate using password.
-k KEYSPACE, --keyspace=KEYSPACE
Authenticate to the given keyspace.
-f FILE, --file=FILE Execute commands from FILE, then exit
-t TRANSPORT_FACTORY, --transport-factory=TRANSPORT_FACTORY
Use the provided Thrift transport factory function.
--debug Show additional debugging information
--cqlversion=CQLVERSION
Specify a particular CQL version (default: 3).
Examples: "2", "3.0.0-beta1"
-2, --cql2 Shortcut notation for --cqlversion=2
-3, --cql3 Shortcut notation for --cqlversion=3
Connects to localhost:9160 by default. These defaults can be changed by
setting $CQLSH_HOST and/or $CQLSH_PORT. When a host (and optional port number)
are given on the command line, they take precedence over any defaults.
cqlsh:Hogebook> help
Documented shell commands:
===========================
ASSUME CONSISTENCY DESC EXIT SHOW TRACING
CAPTURE COPY DESCRIBE HELP SOURCE
CQL help topics:
================
ALTER CREATE_KEYSPACE SELECT_EXPR
ALTER_ADD CREATE_TABLE SELECT_LIMIT
ALTER_ALTER CREATE_TABLE_OPTIONS SELECT_TABLE
ALTER_DROP CREATE_TABLE_TYPES SELECT_WHERE
ALTER_WITH DELETE TEXT_OUTPUT
APPLY DELETE_COLUMNS TIMESTAMP_INPUT
ASCII_OUTPUT DELETE_USING TIMESTAMP_OUTPUT
BEGIN DELETE_WHERE TRUNCATE
BLOB_INPUT DROP TYPES
BOOLEAN_INPUT DROP_COLUMNFAMILY UPDATE
CONSISTENCYLEVEL DROP_INDEX UPDATE_COUNTERS
CREATE DROP_KEYSPACE UPDATE_SET
CREATE_COLUMNFAMILY DROP_TABLE UPDATE_USING
CREATE_COLUMNFAMILY_OPTIONS INSERT UPDATE_WHERE
CREATE_COLUMNFAMILY_TYPES SELECT USE
CREATE_INDEX SELECT_COLUMNFAMILY UUID_INPUT
さらに各コマンドでhelpを打つと詳細が分かります。
cqlsh:Hogebook> help SELECT
SELECT [FIRST n] [REVERSED] <selectExpr>
FROM [<keyspace>.]<table>
[USING CONSISTENCY <consistencylevel>]
[WHERE <clause>]
[ORDER BY <colname> [DESC]]
[LIMIT m];
SELECT is used to read one or more records from a CQL table. It returns
a set of rows matching the selection criteria specified.
Note that FIRST and REVERSED are only supported in CQL 2, and ORDER BY
is only supported in CQL 3 and higher.
For more information, see one of the following:
HELP SELECT_EXPR
HELP SELECT_TABLE
HELP SELECT_WHERE
HELP SELECT_LIMIT
HELP CONSISTENCYLEVEL
かなりSQLっぽく使えるようです。
いくつかコマンドを打ってみたいと思います。
DESC TABLE
cqlsh:Hogebook> desc table User;
CREATE TABLE User (
KEY text PRIMARY KEY,
email text,
gender text
) WITH
comment='' AND
comparator=text AND
read_repair_chance=0.100000 AND
gc_grace_seconds=864000 AND
default_validation=text AND
min_compaction_threshold=4 AND
max_compaction_threshold=32 AND
replicate_on_write='true' AND
compaction_strategy_class='SizeTieredCompactionStrategy' AND
compression_parameters:sstable_compression='SnappyCompressor';
CREATE INDEX User_gender_idx ON User (gender);
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#RandomPartitionerの場合
def tokens(nodes):
for x in xrange(nodes):
print 2 ** 127 / nodes * x
#Murmur3Partitionerの場合
def tokens(num_nodes):
for i in range(num_nodes):
print ((2**64 / num_nodes) * i) - 2**63
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /usr/local/cassandra/bin/nodetool ring
Datacenter: ap-northeast
==========
Replicas: 3
Address Rack Status State Load Owns Token
-4611686018427387905
10.0.1.146 1a Up Normal 75.43 KB 66.67% -1537228672809129303
10.0.1.10 1a Up Normal 61.65 KB 50.00% 0
10.0.1.176 1a Up Normal 58.33 KB 58.33% -9223372036854775808
10.0.1.232 1a Up Normal 58.45 KB 50.00% 4611686018427387901
10.0.1.134 1a Up Normal 58.48 KB 75.00% -4611686018427387905
だったのが、起動後
# /usr/local/cassandra/bin/nodetool ring
Datacenter: ap-northeast
==========
Replicas: 3
Address Rack Status State Load Owns Token
-4611686018427387905
10.0.1.213 1a Up Normal 49.55 KB 33.33% 1537228672809129299
10.0.1.146 1a Up Normal 63.7 KB 66.67% -1537228672809129303
10.0.1.10 1a Up Normal 66.59 KB 50.00% 0
10.0.1.176 1a Up Normal 63.28 KB 50.00% -9223372036854775808
10.0.1.232 1a Up Normal 63.4 KB 33.33% 4611686018427387901
10.0.1.134 1a Up Normal 63.43 KB 66.67% -4611686018427387905
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"Table does not support optimize"と表示されます。 ドキュメントを見ると、InnoDBではOPTIMIZE TABLEはALTER TABLEにマップされると書いてあります。
そのため、ALTER TABLEを行なっても最適化されるようです。
再びいろいろデータを削除してみます。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters